diff options
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 127 |
1 files changed, 38 insertions, 89 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 6850a1c..f70cdcd 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -8,7 +8,7 @@ from pathlib import Path from django.conf import settings from django.db import transaction -from troggle.core.models.caves import Area, Cave, CaveAndEntrance, Entrance, GetCaveLookup +from troggle.core.models.caves import Cave, CaveAndEntrance, Entrance, GetCaveLookup from troggle.core.models.logbooks import CaveSlug from troggle.core.models.troggle import DataIssue from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA @@ -99,36 +99,6 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"): # DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}") # print(message) -def make_areas(): - print(" - Creating Areas 1623, 1624, 1627 and 1626") - # This crashes on the server with MariaDB even though a null parent is explicitly allowed. - area_1623 = Area.objects.create(short_name="1623", super=None) - area_1623.save() - area_1624 = Area.objects.create(short_name="1624", super=None) - area_1624.save() - area_1626 = Area.objects.create(short_name="1626", super=None) - area_1626.save() - area_1627 = Area.objects.create(short_name="1627", super=None) - area_1627.save() - -def get_area(areanum): - """Given the number as a string, return the area object - """ - a = Area.objects.all() - if len(a) == 0: - make_areas() - - area = Area.objects.get(short_name="1623") # default - - if areanum == "1623": - area = Area.objects.get(short_name="1623") - if areanum == "1624": - area = Area.objects.get(short_name="1624") - if areanum == "1626": - area = Area.objects.get(short_name="1626") - if areanum == "1627": - area = Area.objects.get(short_name="1627") - return area def create_new_cave(svxpath, msg=None): """This is called only when a new survex file is edited online which has a path on the @@ -145,29 +115,29 @@ def create_new_cave(svxpath, msg=None): print(f"parts {parts}, {a}, {caveid}") # double check if a[0:3] == "162": - areanum = a[0:4] - url = f"{areanum}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability. - #url = f"{areanum}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls + areacode = a[0:4] + url = f"{areacode}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability. + #url = f"{areacode}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls else: print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}") - areanum = "1623" + areacode = "1623" url = f"1623/{caveid}.html" #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls - k = f"{areanum}-{caveid}" - area = get_area(areanum) + k = f"{areacode}-{caveid}" - caves = Cave.objects.filter(unofficial_number=caveid, area =areanum) + + caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode) if caves: - message = f" ! Already exists, caveid:{k} in area {areanum} {caves} - {msg}" + message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) return caves[0] try: - cave = do_pending_cave(k, caveid, url, area, msg) + cave = do_pending_cave(k, caveid, url, areacode, msg) except: - message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum} - {msg}" + message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areacode} - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) raise @@ -175,14 +145,15 @@ def create_new_cave(svxpath, msg=None): # we know what the survex file is, we don't need to use the guess. # But this sets the survex file on he cave from the first one we find, not necessarily the best survex file for this cave cave.survex_file=survex_file + cave.areacode=areacode cave.save() return cave -def do_ARGE_cave(slug, caveid, url, area, svxid): +def do_ARGE_cave(slug, caveid, url, areacode, svxid): """Only called by survex parser. Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have. We already know the survex file. - We already know that it doesn't exist. + We already know that it doesn't exist... though there are bugs.. """ default_note = "This is an ARGE cave where we only have the survex file and no other information" @@ -203,35 +174,31 @@ def do_ARGE_cave(slug, caveid, url, area, svxid): cave = Cave( unofficial_number=caveid.upper(), + kataster_number=caveid.upper(), # should only set this if all digits underground_description="ARGE cave.", survex_file= f"{svxid}.svx", url=url, notes=default_note, + areacode=areacode, ) if cave: cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. - # cave.area.add(area) - # cave.save() # crashes entire transaction with foreign key error. - # The 'caves' list page uses the area__short_name to select for the area, so these ARGE caves do not appear. - - # message = f" ! {slug:18} ARGE cave url: {url} " - # DataIssue.objects.create(parser="caves", message=message, url=url) - # print(message) try: # Now create a cave slug ID CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False) except: - message = f" ! {slug:11s} ARGE CaveSLUG create failure {caveid=} {url=} {area=} {svxid=}" + message = f" ! {slug:11s} ARGE CaveSLUG create failure {caveid=} {url=} {areacode=} {svxid=}" DataIssue.objects.create(parser="caves", message=message) print(message) else: - message = f" ! {slug:11s} ARGE cave create failure {caveid=} {url=} {area=} {svxid=}" + message = f" ! {slug:11s} ARGE cave create failure {caveid=} {url=} {areacode=} {svxid=}" DataIssue.objects.create(parser="caves", message=message) print(message) + return None return cave -def do_pending_cave(slug, caveid, url, area, msg=None): +def do_pending_cave(slug, caveid, url, areacode, msg=None): """ default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists in expoweb/cave_data/1623-"k".html @@ -250,16 +217,16 @@ def do_pending_cave(slug, caveid, url, area, msg=None): else: id = Path(k) - survex_file = f"caves-{area.short_name}/{id}/{id}.svx" + survex_file = f"caves-{areacode}/{id}/{id}.svx" if Path(settings.SURVEX_DATA, survex_file).is_file(): return survex_file else: - survex_file = f"caves-{area.short_name}/{id}.svx" + survex_file = f"caves-{areacode}/{id}.svx" if Path(settings.SURVEX_DATA, survex_file).is_file(): return survex_file survex_file = "" - d = Path(settings.SURVEX_DATA, f"caves-{area.short_name}/{id}") + d = Path(settings.SURVEX_DATA, f"caves-{areacode}/{id}") if d.is_dir(): prime_suspect = "" dir = d.iterdir() @@ -334,11 +301,10 @@ def do_pending_cave(slug, caveid, url, area, msg=None): survex_file=survex_file, url=url, notes=default_note, + areacode=areacode, ) if cave: cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. - cave.area.add(area) - cave.save() message = f" ! {slug:18} Pending cave write-up url: {url} - {msg}" DataIssue.objects.create(parser="caves", message=message, url=url) print(message) @@ -555,7 +521,7 @@ def read_cave(filename, cave=None): # print(f"! Entrance {eslug}") if eslug.endswith('a b'): message = f' - Entrance has weird name slug:"{eslug}" cave:"{cave}" caveslug:"{slug}" filename:"cave_data/{filename}"' - DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/") # print(message) letter = getXML(e, "letter", maxItems=1, context=context)[0] @@ -566,7 +532,7 @@ def read_cave(filename, cave=None): if letter.lower() not in list(string.ascii_lowercase): letter = "x" message = f"- Warning - Empty 'letter' field for '{eslug}' in multiple-entrance cave '{cave}', setting to {letter}." - DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/") print(message) if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug> @@ -584,13 +550,13 @@ def read_cave(filename, cave=None): entrances_xslug[eslug] = entrance except: message = f"! Fail entrance loading {eslug} /entrance_data/{eslug} file does not exist or loading it failed." - DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/") print(message) return if eslug != f"{entrance}": message = f"eslug {eslug} using different entrance {entrance} to set CaveAndEntrance" - DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.area}/{cave.area}-{cave.url}_cave_edit/") + DataIssue.objects.create(parser="xEntrances", message=message, url=f"{cave.areacode}/{cave.areacode}-{cave.url}_cave_edit/") print(message) try: # this fails if there is not an unambiguous letter set. @@ -728,27 +694,13 @@ def read_cave(filename, cave=None): cave.description_file=description_file[0] cave.url=url[0] - areas = getXML(cavecontents, "area", context=context) - cave.area.clear() # Deletes all links to areas in db + areas = getXML(cavecontents, "area", context=context) # can be multiple <area> tags for area_slug in areas: - if area_slug in areas_xslug: - newArea = areas_xslug[area_slug] + if area_slug in ["1623", "1624", "1626", "1627"]: # ignore sub areas which are in another <area> tag + cave.areacode = area_slug else: - areas_new = Area.objects.filter(short_name=area_slug) - if areas_new: - newArea = areas_new[0] # just the first one we find, but we are going to clean up Areas anyway - else: - # Area not seen before. SHould not happen with manual edit - if manual_edit: - message = f" ! Cave edit failure due to unrecognised Area: {area_slug[0]}, skipping this field edit. " - DataIssue.objects.create(parser="caves", message=message) - print(message) - # super value is highly dodgy - newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623")) - newArea.save() - areas_xslug[area_slug] = newArea - cave.area.add(newArea) - + cave.subarea = area_slug + entrances = getXML(cavecontents, "entrance", context=context) do_entrances() # print(f"- {entrances_xslug=}") @@ -834,8 +786,6 @@ def readcaves(): #DataIssue.objects.filter(parser="xEntrances").delete() with transaction.atomic(): - area = get_area("1623") - print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files read_entrance(filename) @@ -860,19 +810,18 @@ def readcaves(): for k in pending: if k[0:3] == "162": - areanum = k[0:4] + areacode = k[0:4] number = k[5:] - url = f"{areanum}/{k[5:]}.html" # Note we are appending the .htm to allow for offline websites + url = f"{areacode}/{k[5:]}.html" # Note we are appending the .htm to allow for offline websites else: - areanum = "1623" + areacode = "1623" number = k url = f"1623/{k}" - area = get_area(areanum) try: - do_pending_cave(k, number, url, area) + do_pending_cave(k, number, url, areacode) except: - message = f" ! Error. Cannot create pending cave, pending-id:{k} in area {areanum}" + message = f" ! Error. Cannot create pending cave, pending-id:{k} in area {areacode}" DataIssue.objects.create(parser="caves", message=message) print(message) raise |