diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2024-06-30 00:17:39 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2024-06-30 00:17:39 +0300 |
commit | 6789d4f627167344b88c3ec33788cbdace6258da (patch) | |
tree | a5b9b6d500b212693cfcf151142b1ebdf7ff93ef /parsers/caves.py | |
parent | 24a97b9dbde259a3ec5a70d8e62090d701549c3c (diff) | |
download | troggle-6789d4f627167344b88c3ec33788cbdace6258da.tar.gz troggle-6789d4f627167344b88c3ec33788cbdace6258da.tar.bz2 troggle-6789d4f627167344b88c3ec33788cbdace6258da.zip |
detecting mismatched entrance slugs and filenames
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index a351bd1..f9c9b61 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -52,6 +52,8 @@ entrances_xslug = {} caves_xslug = {} areas_xslug = {} +LETTERS = list(string.ascii_lowercase) + def dummy_entrance(k, slug, msg="DUMMY"): """Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if @@ -450,6 +452,10 @@ def read_entrance(filename, ent=None): If not called as part of initial import, then the global lists will not be correct but this is OK, a search will find them in the db. + + EDIT href examples + /1623-1:1623-1a_entrance_edit + /1623/1/1623-1_cave_edit/ Args: filename: The name of the .html file. @@ -474,13 +480,31 @@ def read_entrance(filename, ent=None): ent_area = filename[:4] if ent_area not in AREACODES: message = f'! BAD AREA CODE in "{filename}". Not recognised.' - DataIssue.objects.create(parser="entrances", message=message, url=f"/entrance_data/{filename}_edit") + DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) print(message) + + + + # Derive the letter, entrance slug and cave slug fromthe filename + entslug_fn = filename[:-5] + if entslug_fn[-1] in LETTERS: + caveslug_fn = entslug_fn[:-1] + letter_fn = entslug_fn[-1] + else: + caveslug_fn = entslug_fn + letter_fn = "" + + cave_name_fn = caveslug_fn[5:] # remove initial "1623-" + + ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit" + cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit" + + entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) if len(entrancecontentslist) != 1: message = f'! BAD ENTRANCE DATA in "{filename}". More than one entrance. Edit file manually, click.' - DataIssue.objects.create(parser="entrances", message=message, url=f"/entrance_data/{filename}_edit") + DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) print(message) return None @@ -491,12 +515,17 @@ def read_entrance(filename, ent=None): if len(slugs) >1: # Only ever one of these per entrance in the expo dataset message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all except first." - DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/") + DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url) print(message) - lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/") - lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/") - long_wgs84 = getXML(entrancecontents, "long_wgs84", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/") + if slug != entslug_fn: + message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. " + DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url) + print(message) + + lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=cave_edit_url) + lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=cave_edit_url) + long_wgs84 = getXML(entrancecontents, "long_wgs84", maxItems=1, minItems=0, context=cave_edit_url) alt = getXMLmax1("alt") approach = getXMLmax1("approach") @@ -551,7 +580,7 @@ def read_entrance(filename, ent=None): except: message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click." #http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit - DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit") + DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) # url=f"/1623/{slug}/{slug}:{slug}_entrance_edit") print(message) ent.save() return ent @@ -923,7 +952,7 @@ def readcaves(): with transaction.atomic(): print(" - Reading Entrances from entrance descriptions xml files") - for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files + for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files, use pathlib ! read_entrance(filename) # Why is this needed ? Without it, we lose these DataIssues! |