summaryrefslogtreecommitdiffstats
path: root/parsers/caves.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2023-03-31 12:19:22 +0100
committerPhilip Sargent <philip.sargent@gmail.com>2023-03-31 12:19:22 +0100
commit4d8cb82ef944e75ea8dfcd65b72733d48cd241fd (patch)
treefb94c2639acc2c69052dce5eab692b78364849e4 /parsers/caves.py
parent9ffe3f690b7b691a2ba28c3b0feae728a5aba3e9 (diff)
downloadtroggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.tar.gz
troggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.tar.bz2
troggle-4d8cb82ef944e75ea8dfcd65b72733d48cd241fd.zip
initial refactoring
Diffstat (limited to 'parsers/caves.py')
-rw-r--r--parsers/caves.py108
1 files changed, 59 insertions, 49 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index de7c8e8..2489d6e 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -274,8 +274,11 @@ def do_pending_cave(k, url, area):
def readentrance(filename):
"""Reads an entrance description from the .html file
- Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
"""
+ def getXMLmax1(field):
+ return getXML(entrancecontents, field, maxItems=1, context=context)
+ # return getXML(entrancecontents, field, maxItems=1, context=context)[0]
+
global entrances_xslug
global caves_xslug
global areas_xslug
@@ -287,36 +290,44 @@ def readentrance(filename):
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
if len(entrancecontentslist) != 1:
- message = f'! BAD ENTRANCE at "{filename}"'
+ message = f'! BAD ENTRANCE at "{filename}". Loading aborted. '
DataIssue.objects.create(parser="entrances", message=message)
print(message)
else:
entrancecontents = entrancecontentslist[0]
- non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context)
- name = getXML(entrancecontents, "name", maxItems=1, context=context)
- slugs = getXML(entrancecontents, "slug", context=context)
- entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context)
- explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context)
- map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context)
- location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context)
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
- approach = getXML(entrancecontents, "approach", maxItems=1, context=context)
- underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context)
- photo = getXML(entrancecontents, "photo", maxItems=1, context=context)
- marking = getXML(entrancecontents, "marking", maxItems=1, context=context)
- marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context)
- findability = getXML(entrancecontents, "findability", maxItems=1, context=context)
- findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context)
- alt = getXML(entrancecontents, "alt", maxItems=1, context=context)
- northing = getXML(entrancecontents, "northing", maxItems=1, context=context)
- easting = getXML(entrancecontents, "easting", maxItems=1, context=context)
- tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context)
- exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context)
- other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context)
- other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context)
- bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
- url = getXML(entrancecontents, "url", maxItems=1, context=context)
+ slugs = getXML(entrancecontents, "slug", context=context)
+ alt = getXMLmax1("alt")
+ approach = getXMLmax1("approach")
+ bearings = getXMLmax1("bearings")
+ easting = getXMLmax1("easting")
+ entrance_description = getXMLmax1("entrance_description")
+ exact_station = getXMLmax1("exact_station")
+ explorers = getXMLmax1("explorers")
+ findability = getXMLmax1("findability")
+ findability_description = getXMLmax1("findability_description")
+ location_description = getXMLmax1("location_description")
+ map_description = getXMLmax1("map_description")
+ marking = getXMLmax1("marking")
+ marking_comment = getXMLmax1("marking_comment")
+ name = getXMLmax1("name")
+ non_public = getXMLmax1("non_public")
+ northing = getXMLmax1("northing")
+ other_description = getXMLmax1("other_description")
+ other_station = getXMLmax1("other_station")
+ photo = getXMLmax1("photo")
+ tag_station = getXMLmax1("tag_station")
+ underground_description = getXMLmax1("underground_description")
+ url = getXMLmax1("url")
+
+ if len(slugs) >1:
+ # Only ever one of these per entrance in the expo dataset
+ message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting."
+ DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
+ print(message)
+ return
+
e, state = Entrance.objects.update_or_create(
name=name[0],
non_public={
@@ -325,35 +336,31 @@ def readentrance(filename):
"true": True,
"false": False,
}[non_public[0]],
+ alt=alt[0],
+ approach=approach[0],
+ bearings=bearings[0],
+ easting=easting[0],
entrance_description=entrance_description[0],
+ exact_station=exact_station[0],
explorers=explorers[0],
- map_description=map_description[0],
- location_description=location_description[0],
+ filename=filename,
+ findability=findability[0],
+ findability_description=findability_description[0],
lastvisit=lastvisit[0],
- approach=approach[0],
- underground_description=underground_description[0],
- photo=photo[0],
+ location_description=location_description[0],
+ map_description=map_description[0],
marking=marking[0],
marking_comment=marking_comment[0],
- findability=findability[0],
- findability_description=findability_description[0],
- alt=alt[0],
northing=northing[0],
- easting=easting[0],
- tag_station=tag_station[0],
- exact_station=exact_station[0],
- other_station=other_station[0],
other_description=other_description[0],
- bearings=bearings[0],
- url=url[0],
- filename=filename,
+ other_station=other_station[0],
+ photo=photo[0],
slug=slugs[0],
+ tag_station=tag_station[0],
+ underground_description=underground_description[0],
+ url=url[0],
)
- if len(slugs) >1:
- # Only ever one of these in the expo dataset
- message = f" ! - More than one slug for an entrance: {entrance}, slugs: {slugs}. Aborting."
- DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
- print(message)
+
def readcave(filename):
"""Reads an enrance description from the .html file
@@ -538,10 +545,13 @@ def readcave(filename):
print(message)
-def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
- """Reads a single XML tag"""
+def getXML(text, itemname, minItems=1, maxItems=None, context=""):
+ """Reads a single XML tag
+ Should throw exception rather than producing error message here,
+ then handle exception in calling routine where it has the context.
+ """
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
- if len(items) < minItems and printwarnings:
+ if len(items) < minItems:
message = (
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
% {"count": len(items), "itemname": itemname, "min": minItems}
@@ -551,7 +561,7 @@ def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, contex
DataIssue.objects.create(parser="caves", message=message, url="" + context)
print(message)
- if maxItems is not None and len(items) > maxItems and printwarnings:
+ if maxItems is not None and len(items) > maxItems:
message = (
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
% {"count": len(items), "itemname": itemname, "max": maxItems}