diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2024-07-02 12:23:33 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2024-07-02 12:23:33 +0300 |
commit | e4d3e33dec40fe9b494c46a486172055a2e80e00 (patch) | |
tree | 5579c0a87a2a314bed31f95930d3e95008bd04a2 /parsers/caves.py | |
parent | 208c0ffa1f1d3af3959e2bec8f837d6c03935799 (diff) | |
download | troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.gz troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.bz2 troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.zip |
bug fix
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 63 |
1 files changed, 24 insertions, 39 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 0bf3520..f2176e0 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -479,12 +479,6 @@ def read_entrance(filename, ent=None): contents = f.read() context = filename - # validate filename, check areacode - ent_area = filename[:4] - if ent_area not in AREACODES: - message = f'! BAD AREA CODE in "{filename}". Not recognised.' - DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) - print(message) # Derive the letter, entrance slug and cave slug fromthe filename @@ -497,10 +491,16 @@ def read_entrance(filename, ent=None): letter_fn = "" cave_name_fn = caveslug_fn[5:] # remove initial "1623-" + ent_area = filename[:4] ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit" cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit" + # validate filename, check areacode + if ent_area not in AREACODES: + message = f'! BAD AREA CODE in "{filename}". Not recognised.' + DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url) + print(message) entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) @@ -726,8 +726,7 @@ def read_cave(filename, mvf=None, cave=None): def check_slug(areacode, kataster_number, unofficial_number, url): """There is a <caveslug> field in the .html file, but we now ignore it as we use the filename itself to set the slug. - However we do check it for sanity, pending its removal eventually.""" - # context = f"/{cave.areacode}/{slug}_cave_edit/" + However we do check it for sanity, if it is there, pending its removal eventually.""" if kataster_number: if slug == f"{areacode}-{kataster_number}": @@ -764,14 +763,20 @@ def read_cave(filename, mvf=None, cave=None): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. fn = settings.CAVEDESCRIPTIONS / filename - context = f"/cave_data/{filename}_edit" # the expo html editor, not the specialist cave data editor. oops.FIX THIS - # print(f" - Reading Cave from cave descriptions file {fn}") + #print(f" - Reading Cave from cave descriptions file {fn}") if not fn.exists(): message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'" - DataIssue.objects.create(parser="caves", message=message, url=context) + DataIssue.objects.create(parser="caves", message=message, url="") print(message) return None + + # ignore <caveslug> read from the filename + slug = filename[:-5] # strip off the ".html" at the end of the filename + slugs = [slug] + areacode = slug[:4] + + context = f"/{areacode}/{slug}_cave_edit" with open(fn) as f: contents = f.read() @@ -779,32 +784,17 @@ def read_cave(filename, mvf=None, cave=None): if len(cavecontentslist) != 1: message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.' - DataIssue.objects.create(parser="caves", message=message, url=context) + DataIssue.objects.create(parser="caves", message=message, url="") print(message) return None cavecontents = cavecontentslist[0] - - # This is ignored, we are using the filename not this <caveslug> field now - # New 2024 June 28th. - # slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context) - # if len(slugs) > 1: - # message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}." - # DataIssue.objects.create(parser="caves", message=message, url=context) - # print(message) - # slug = slugs[0] - - # but ignore <caveslug> read from the file - slug = filename[:-5] # strip off the ".html" at the end of the filename - slugs = [slug] + #print(f"{filename=} {slug=}") - #print(slug[-1].lower(), slug) if slug[-1].lower() in LETTERS: message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} " DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing print(message) - - non_public = getXMLmax1("non_public") official_name = getXMLmax1("official_name") @@ -822,14 +812,11 @@ def read_cave(filename, mvf=None, cave=None): survex_file = getXMLmax1("survex_file") description_file = getXMLmax1("description_file") + # Optional, but probably deprecated as we should just derive this from the survex data length = getXMLmin0("length") depth = getXMLmin0("depth") extent = getXMLmin0("extent") - - areacode = slug[:4] - - contextguess = f"/{slug[0:4]}/{slug}_cave_edit/" - + manual_edit = True if not cave: # we are parsing using databaseReset.py not an online edit @@ -843,7 +830,7 @@ def read_cave(filename, mvf=None, cave=None): kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up for k in kaves: message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) - DataIssue.objects.create(parser="caves", message=message, url=contextguess) + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) for k in kaves: if k.slug() is not None: @@ -855,15 +842,13 @@ def read_cave(filename, mvf=None, cave=None): do_caveslugstuff() # needs cave!=None # We no longer need the <area> tag to define 1623 etc as we get that from the filename. - areas = getXML(cavecontents, "area", context=contextguess, minItems=0) # can be multiple <area> tags + areas = getXML(cavecontents, "area", context=context, minItems=0) # can be multiple <area> tags for area_slug in areas: if area_slug not in AREACODES: # only detect subareas cave.subarea = area_slug if not cave.areacode: if areacode in AREACODES: cave.areacode = areacode - context = f"/{cave.areacode}/{slug}_cave_edit/" - cave.non_public=boolify(non_public) cave.official_name=official_name[0] @@ -886,13 +871,13 @@ def read_cave(filename, mvf=None, cave=None): cave.extent=extent[0] cave.survex_file=survex_file[0] cave.description_file=description_file[0] - # cave.url=url[0] # set algorithically: + # cave.url=url[0] # set algorithically now: cave.url = f"{cave.areacode}/{cave.number()}/{cave.number()}.html" check_directory(cave.areacode, cave.number(), cave.url, cave) slug = check_slug(cave.areacode, cave.kataster_number, cave.unofficial_number, cave.url) #NB cave.slug is not a field on Cave - # Thsi whole way of doing entrances can be replaced by simply knowing formthe entrance_data filename what the cave is. + # Thsi whole way of doing entrances can be replaced by simply knowing the entrance_data filename what the cave is. to do. entrances = getXML(cavecontents, "entrance", context=context) do_entrances() # print(f"- {entrances_xslug=}") |