diff options
-rw-r--r-- | parsers/caves.py | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 72b84cd..168e66d 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -1,9 +1,11 @@ import os import re +from pathlib import Path from django.conf import settings from django.db import transaction +from troggle.settings import SURVEX_DATA, EXPOWEB from troggle.core.models.troggle import DataIssue from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance '''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave-data/1623-161.html ) @@ -33,7 +35,7 @@ def readcaves(): area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None) - # This seems to retrun a tuple, not a single object! i.e. (<Area: 1623>, True) + # This seems to return a tuple, not a single object! i.e. (<Area: 1623>, True) #print(f' ! - READ CAVES: {area_1623}') area_1626 = Area.objects.update_or_create(short_name = "1626", parent = None) @@ -106,7 +108,11 @@ def readentrance(filename): context = "in file %s" % filename #print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) - if len(entrancecontentslist) == 1: + if len(entrancecontentslist) != 1: + message = f'! BAD ENTRANCE at "{filename}"' + DataIssue.objects.create(parser='caves', message=message) + print(message) + else: entrancecontents = entrancecontentslist[0] non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context) name = getXML(entrancecontents, "name", maxItems = 1, context = context) @@ -192,7 +198,11 @@ def readcave(filename): contents = f.read() context = " in file %s" % filename cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) - if len(cavecontentslist) == 1: + if len(cavecontentslist) != 1: + message = f'! BAD CAVE at "{filename}"' + DataIssue.objects.create(parser='caves', message=message) + print(message) + else: cavecontents = cavecontentslist[0] non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context) slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context) @@ -281,20 +291,31 @@ def readcave(filename): primary = False for entrance in entrances: - slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] + eslug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] try: - if slug in entrances_xslug: - entrance = entrances_xslug[slug] + if eslug in entrances_xslug: + entrance = entrances_xslug[eslug] else: - entrance = Entrance.objects.get(entranceslug__slug = slug) - entrances_xslug[slug] = entrance + entrance = Entrance.objects.get(entranceslug__slug = eslug) + entrances_xslug[eslug] = entrance ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: - message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) + message = f' ! Entrance setting failure, slug:"{slug}" letter:"{letter}" cave:"{c}" filename:"{filename}"' DataIssue.objects.create(parser='caves', message=message) print(message) - + + if survex_file[0]: + if not (Path(SURVEX_DATA) / survex_file[0]).is_file(): + message = f' ! survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' + DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') + print(message) + + if description_file[0]: + if not (Path(EXPOWEB) / description_file[0]).is_file(): + message = f' ! description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' + DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') + print(message) def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) @@ -306,7 +327,7 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, print(message) if maxItems is not None and len(items) > maxItems and printwarnings: - message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), + message = " ! %(count)i %(itemname)s found, no more than %(max)i expected in this XML unit " % {"count": len(items), "itemname": itemname, "max": maxItems} + context DataIssue.objects.create(parser='caves', message=message) |