diff options
-rw-r--r-- | parsers/caves.py | 30 | ||||
-rw-r--r-- | parsers/survex.py | 7 | ||||
-rw-r--r-- | templates/dataissues.html | 2 | ||||
-rw-r--r-- | templates/personexpedition.html | 9 |
4 files changed, 31 insertions, 17 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 3b96d69..314d331 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -6,10 +6,10 @@ from pathlib import Path from django.conf import settings from django.db import transaction -from troggle.settings import SURVEX_DATA, EXPOWEB +from troggle.settings import SURVEX_DATA, EXPOWEB, CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS from troggle.core.models.troggle import DataIssue from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance -'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave-data/1623-161.html ) +'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html ) and creating the various Cave, Entrance and necessary Area objects. BUT in Django 2.0 and later we cannot do any queries on data we have just entered @@ -23,7 +23,7 @@ todo='''- db Update does not work when a cave id is in the pending list but a pr and is being imported. It should work. But currently Django aborts and he file is not read in. - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. - So we will need a separate file-editing capability just for this configuration file. + So we will need a separate file-editing capability just for this configuration file ?! ''' entrances_xslug = {} caves_xslug = {} @@ -140,7 +140,7 @@ def readcaves(): # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys # should put this in a simple list which can be edited using 'Edit this file' pending = set() - fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt") + fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") if fpending.is_file(): with open(fpending, "r") as fo: cids = fo.readlines() @@ -153,6 +153,7 @@ def readcaves(): Entrance.objects.all().delete() # Clear the cave data issues and the caves as we are reloading DataIssue.objects.filter(parser='caves').delete() + DataIssue.objects.filter(parser='caves ok').delete() DataIssue.objects.filter(parser='entrances').delete() area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None) @@ -176,9 +177,9 @@ def readcaves(): raise with transaction.atomic(): - print(" - settings.CAVEDESCRIPTIONS: ", settings.CAVEDESCRIPTIONS) + print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) print(" - Reading Entrances from entrance descriptions xml files") - for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files # if filename.endswith('.html'): # if Path(filename).stem[5:] in pending: # print(f'Skipping pending entrance dummy file <{filename}>') @@ -187,7 +188,7 @@ def readcaves(): readentrance(filename) print(" - Reading Caves from cave descriptions xml files") - for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) @@ -197,10 +198,10 @@ def readentrance(filename): global areas_xslug # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: + with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() context = filename - #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) + #print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename)) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) if len(entrancecontentslist) != 1: message = f'! BAD ENTRANCE at "{filename}"' @@ -293,7 +294,7 @@ def readcave(filename): global areas_xslug # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: + with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f: contents = f.read() context = filename cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) @@ -419,9 +420,14 @@ def readcave(filename): DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') print(message) - if description_file[0]: + + if description_file[0]: # if not an empty string + message = f' - {slug:12} complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"' + DataIssue.objects.create(parser='caves ok', message=message, url=f'/cave/{slug}/edit/') + print(message) + if not (Path(EXPOWEB) / description_file[0]).is_file(): - message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' + message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file' DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') print(message) #c.description_file="" # done only once, to clear out cruft. diff --git a/parsers/survex.py b/parsers/survex.py index dd40638..70649da 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1185,9 +1185,12 @@ class LoadingSurvex(): self.uniquename[fn] = [fullpath] else: self.uniquename[fn].append(fullpath) - message = f" ! NON-UNIQUE survex filename, overwriting in expowebcache/3d/ '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" + # This is not an error now that we are moving .3d files to the :loser: directory tree + # message = f" ! NON-UNIQUE survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" + # print(message) + # DataIssue.objects.create(parser='survex', message=message) + message = f" NOTE: non-unique survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" print(message) - DataIssue.objects.create(parser='survex', message=message) def RunSurvexIfNeeded(self,fullpath): diff --git a/templates/dataissues.html b/templates/dataissues.html index d42821d..5bd53c0 100644 --- a/templates/dataissues.html +++ b/templates/dataissues.html @@ -6,7 +6,7 @@ <h1>Loading data from files: Issues arising that need attention</h1> <p> -This is work in progress (April 2021).The URL links to the offending objects are enabled on only some types of fault as yet. +This is work in progress (March 2022).The URL links to the offending objects are enabled on only some types of fault as yet. <style> tr { text-align:center; font-family: Tahoma,'Trebuchet MS','Lucida Grande',Verdana, Arial, Helvetica, Sans-Serif; diff --git a/templates/personexpedition.html b/templates/personexpedition.html index b71b584..90d4f26 100644 --- a/templates/personexpedition.html +++ b/templates/personexpedition.html @@ -62,10 +62,12 @@ If anyone really cares, they can always look in the original survex file </table> </div> <div style="color: slategray; background-color:lightcyan"> -<p>Horrible bug here but only when there is more than one survex block per day, +<p>Horrible bug here but only when there is more than one survex block per day, or is there ?! + +<p>WHat we thought was the bug: e.g. see <a href="/personexpedition/Wookey/1999">Wookey 1999</a> where there are 3 eiscream survex blocks on 5th August. it duplicates the entry but gets it wrong. The length from the first block is displayed twice but there should be 3 rows: eiscream, eiscream2, eiscream3. -<p>Fortunately it is <b>just this display on this page which is wrong</b>: no bad calculations get into the database. + <p>The interaction of django database query idioms with <a href="https://docs.djangoproject.com/en/1.11/ref/templates/api/">django HTML templating language</a> is a bit impenetrable here. I blame Aaron Curtis who was too fond of being clever with the Django templating system instead or writing it in python anyone could understand.<br> @@ -76,5 +78,8 @@ instead or writing it in python anyone could understand.<br> <br> - the connection between the two is made in the URL resolver in <var>troggle/urls.py</var> <p>To be fixed! +<h3>What we now know</h3> +<p>The eiscream.svx file does indeed record 3 blocks: eiscream, eiscream2 & eiscream3. But (more) careful inspection shows that eiscream2 and eiscream3 are in +the year 2000, not in 1999. So they absolutely should not be shown here. So maybe everything is correct after all. (Well, apart from the duplication.) </div> {% endblock %} |