summaryrefslogtreecommitdiffstats
path: root/parsers/caves.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2024-06-30 00:17:39 +0300
committerPhilip Sargent <philip.sargent@gmail.com>2024-06-30 00:17:39 +0300
commit6789d4f627167344b88c3ec33788cbdace6258da (patch)
treea5b9b6d500b212693cfcf151142b1ebdf7ff93ef /parsers/caves.py
parent24a97b9dbde259a3ec5a70d8e62090d701549c3c (diff)
downloadtroggle-6789d4f627167344b88c3ec33788cbdace6258da.tar.gz
troggle-6789d4f627167344b88c3ec33788cbdace6258da.tar.bz2
troggle-6789d4f627167344b88c3ec33788cbdace6258da.zip
detecting mismatched entrance slugs and filenames
Diffstat (limited to 'parsers/caves.py')
-rw-r--r--parsers/caves.py45
1 files changed, 37 insertions, 8 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index a351bd1..f9c9b61 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -52,6 +52,8 @@ entrances_xslug = {}
caves_xslug = {}
areas_xslug = {}
+LETTERS = list(string.ascii_lowercase)
+
def dummy_entrance(k, slug, msg="DUMMY"):
"""Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
@@ -450,6 +452,10 @@ def read_entrance(filename, ent=None):
If not called as part of initial import, then the global lists will not be correct
but this is OK, a search will find them in the db.
+
+ EDIT href examples
+ /1623-1:1623-1a_entrance_edit
+ /1623/1/1623-1_cave_edit/
Args:
filename: The name of the .html file.
@@ -474,13 +480,31 @@ def read_entrance(filename, ent=None):
ent_area = filename[:4]
if ent_area not in AREACODES:
message = f'! BAD AREA CODE in "{filename}". Not recognised.'
- DataIssue.objects.create(parser="entrances", message=message, url=f"/entrance_data/{filename}_edit")
+ DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
print(message)
+
+
+
+ # Derive the letter, entrance slug and cave slug fromthe filename
+ entslug_fn = filename[:-5]
+ if entslug_fn[-1] in LETTERS:
+ caveslug_fn = entslug_fn[:-1]
+ letter_fn = entslug_fn[-1]
+ else:
+ caveslug_fn = entslug_fn
+ letter_fn = ""
+
+ cave_name_fn = caveslug_fn[5:] # remove initial "1623-"
+
+ ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit"
+ cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit"
+
+
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
if len(entrancecontentslist) != 1:
message = f'! BAD ENTRANCE DATA in "{filename}". More than one entrance. Edit file manually, click.'
- DataIssue.objects.create(parser="entrances", message=message, url=f"/entrance_data/{filename}_edit")
+ DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
print(message)
return None
@@ -491,12 +515,17 @@ def read_entrance(filename, ent=None):
if len(slugs) >1:
# Only ever one of these per entrance in the expo dataset
message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Ignoring all except first."
- DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
+ DataIssue.objects.create(parser="entrances", message=message, url=cave_edit_url)
print(message)
- lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")
- lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")
- long_wgs84 = getXML(entrancecontents, "long_wgs84", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")
+ if slug != entslug_fn:
+ message = f" ! - Warning, mismatch between entrance slug and filename: {slug=} {filename=}. "
+ DataIssue.objects.create(parser="xEntrances", message=message, url=cave_edit_url)
+ print(message)
+
+ lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=cave_edit_url)
+ lat_wgs84 = getXML(entrancecontents, "lat_wgs84", maxItems=1, minItems=0, context=cave_edit_url)
+ long_wgs84 = getXML(entrancecontents, "long_wgs84", maxItems=1, minItems=0, context=cave_edit_url)
alt = getXMLmax1("alt")
approach = getXMLmax1("approach")
@@ -551,7 +580,7 @@ def read_entrance(filename, ent=None):
except:
message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
#http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
- DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
+ DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) # url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
print(message)
ent.save()
return ent
@@ -923,7 +952,7 @@ def readcaves():
with transaction.atomic():
print(" - Reading Entrances from entrance descriptions xml files")
- for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
+ for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files, use pathlib !
read_entrance(filename)
# Why is this needed ? Without it, we lose these DataIssues!