diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2023-03-31 12:19:22 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2023-03-31 12:19:22 +0100 |
commit | 4d8cb82ef944e75ea8dfcd65b72733d48cd241fd (patch) | |
tree | fb94c2639acc2c69052dce5eab692b78364849e4 /parsers/caves.py | |
parent | 9ffe3f690b7b691a2ba28c3b0feae728a5aba3e9 (diff) | |
download | troggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.tar.gz troggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.tar.bz2 troggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.zip |
initial refactoring
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 108 |
1 files changed, 59 insertions, 49 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index de7c8e8..2489d6e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -274,8 +274,11 @@ def do_pending_cave(k, url, area): def readentrance(filename): """Reads an entrance description from the .html file - Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting """ + def getXMLmax1(field): + return getXML(entrancecontents, field, maxItems=1, context=context) + # return getXML(entrancecontents, field, maxItems=1, context=context)[0] + global entrances_xslug global caves_xslug global areas_xslug @@ -287,36 +290,44 @@ def readentrance(filename): # print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename)) entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) if len(entrancecontentslist) != 1: - message = f'! BAD ENTRANCE at "{filename}"' + message = f'! BAD ENTRANCE at "{filename}". Loading aborted. ' DataIssue.objects.create(parser="entrances", message=message) print(message) else: entrancecontents = entrancecontentslist[0] - non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context) - name = getXML(entrancecontents, "name", maxItems=1, context=context) - slugs = getXML(entrancecontents, "slug", context=context) - entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context) - explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context) - map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context) - location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context) lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context) - approach = getXML(entrancecontents, "approach", maxItems=1, context=context) - underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context) - photo = getXML(entrancecontents, "photo", maxItems=1, context=context) - marking = getXML(entrancecontents, "marking", maxItems=1, context=context) - marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context) - findability = getXML(entrancecontents, "findability", maxItems=1, context=context) - findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context) - alt = getXML(entrancecontents, "alt", maxItems=1, context=context) - northing = getXML(entrancecontents, "northing", maxItems=1, context=context) - easting = getXML(entrancecontents, "easting", maxItems=1, context=context) - tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context) - exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context) - other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context) - other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context) - bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context) - url = getXML(entrancecontents, "url", maxItems=1, context=context) + slugs = getXML(entrancecontents, "slug", context=context) + alt = getXMLmax1("alt") + approach = getXMLmax1("approach") + bearings = getXMLmax1("bearings") + easting = getXMLmax1("easting") + entrance_description = getXMLmax1("entrance_description") + exact_station = getXMLmax1("exact_station") + explorers = getXMLmax1("explorers") + findability = getXMLmax1("findability") + findability_description = getXMLmax1("findability_description") + location_description = getXMLmax1("location_description") + map_description = getXMLmax1("map_description") + marking = getXMLmax1("marking") + marking_comment = getXMLmax1("marking_comment") + name = getXMLmax1("name") + non_public = getXMLmax1("non_public") + northing = getXMLmax1("northing") + other_description = getXMLmax1("other_description") + other_station = getXMLmax1("other_station") + photo = getXMLmax1("photo") + tag_station = getXMLmax1("tag_station") + underground_description = getXMLmax1("underground_description") + url = getXMLmax1("url") + + if len(slugs) >1: + # Only ever one of these per entrance in the expo dataset + message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting." + DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/") + print(message) + return + e, state = Entrance.objects.update_or_create( name=name[0], non_public={ @@ -325,35 +336,31 @@ def readentrance(filename): "true": True, "false": False, }[non_public[0]], + alt=alt[0], + approach=approach[0], + bearings=bearings[0], + easting=easting[0], entrance_description=entrance_description[0], + exact_station=exact_station[0], explorers=explorers[0], - map_description=map_description[0], - location_description=location_description[0], + filename=filename, + findability=findability[0], + findability_description=findability_description[0], lastvisit=lastvisit[0], - approach=approach[0], - underground_description=underground_description[0], - photo=photo[0], + location_description=location_description[0], + map_description=map_description[0], marking=marking[0], marking_comment=marking_comment[0], - findability=findability[0], - findability_description=findability_description[0], - alt=alt[0], northing=northing[0], - easting=easting[0], - tag_station=tag_station[0], - exact_station=exact_station[0], - other_station=other_station[0], other_description=other_description[0], - bearings=bearings[0], - url=url[0], - filename=filename, + other_station=other_station[0], + photo=photo[0], slug=slugs[0], + tag_station=tag_station[0], + underground_description=underground_description[0], + url=url[0], ) - if len(slugs) >1: - # Only ever one of these in the expo dataset - message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting." - DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/") - print(message) + def readcave(filename): """Reads an enrance description from the .html file @@ -538,10 +545,13 @@ def readcave(filename): print(message) -def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""): - """Reads a single XML tag""" +def getXML(text, itemname, minItems=1, maxItems=None, context=""): + """Reads a single XML tag + Should throw exception rather than producing error message here, + then handle exception in calling routine where it has the context. + """ items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) - if len(items) < minItems and printwarnings: + if len(items) < minItems: message = ( " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items), "itemname": itemname, "min": minItems} @@ -551,7 +561,7 @@ def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, contex DataIssue.objects.create(parser="caves", message=message, url="" + context) print(message) - if maxItems is not None and len(items) > maxItems and printwarnings: + if maxItems is not None and len(items) > maxItems: message = ( " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items), "itemname": itemname, "max": maxItems} |