diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2022-07-23 19:26:47 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2022-07-23 19:26:47 +0300 |
commit | a1e538e93a03a9b46dc664e0d1ca8504f41ffd0d (patch) | |
tree | 14a527766073e0dc3233b4a49c5ca6b64648b5d1 /parsers/caves.py | |
parent | 7288bd9da31349612f4336026512a440fd499847 (diff) | |
download | troggle-a1e538e93a03a9b46dc664e0d1ca8504f41ffd0d.tar.gz troggle-a1e538e93a03a9b46dc664e0d1ca8504f41ffd0d.tar.bz2 troggle-a1e538e93a03a9b46dc664e0d1ca8504f41ffd0d.zip |
making it work with 1626 no schonberg
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 163 |
1 files changed, 89 insertions, 74 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index e9be47b..4b8178a 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -87,8 +87,10 @@ def do_pending_cave(k, url, area_1623): ''' default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists in expoweb/cave_data/1623-"k".html + + oops. Now need to do for 1626 area too ''' - slug = "1623-" + k + slug = k default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n" default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " @@ -151,78 +153,6 @@ def do_pending_cave(k, url, area_1623): print(message) -def readcaves(): - '''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. - ''' - # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys - # should put this in a simple list which can be edited using 'Edit this file' - pending = set() - fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") - if fpending.is_file(): - with open(fpending, "r") as fo: - cids = fo.readlines() - for cid in cids: - pending.add(cid.rstrip('\n')) - - with transaction.atomic(): - print(" - Deleting Caves and Entrances") - # attempting to avoid MariaDB crash when doing this - try: - Area.objects.all().delete() - except: - pass - try: - Cave.objects.all().delete() - except: - pass - try: - Entrance.objects.all().delete() - except: - pass - # Clear the cave data issues and the caves as we are reloading - DataIssue.objects.filter(parser='areas').delete() - DataIssue.objects.filter(parser='caves').delete() - DataIssue.objects.filter(parser='caves ok').delete() - DataIssue.objects.filter(parser='entrances').delete() - - print(" - Creating Areas 1623 and 1626") - # This crashes on the server with MariaDB even though a null parent is explicitly allowed. - area_1623= Area.objects.create(short_name = "1623", super=None) - print(" - Saving Area 1623") - area_1623.save() - area_1626= Area.objects.create(short_name = "1626", super=None) - print(" - Saving Area 1626") - area_1626.save() - - print (" - Setting pending caves") - # Do this first, so that these empty entries are overwritten as they get properly created. - - - for k in pending: - url = "1623/" + k # Note we are not appending the .htm as we are modern folks now. - try: - do_pending_cave(k, url, area_1623) - except: - message = " ! Error. Cannot create pending cave and entrance, pending-id:{}".format(k) - DataIssue.objects.create(parser='caves', message=message) - print(message) - raise - - with transaction.atomic(): - print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) - print(" - Reading Entrances from entrance descriptions xml files") - for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - # if filename.endswith('.html'): - # if Path(filename).stem[5:] in pending: - # print(f'Skipping pending entrance dummy file <{filename}>') - # else: - # readentrance(filename) - readentrance(filename) - - print(" - Reading Caves from cave descriptions xml files") - for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readcave(filename) def readentrance(filename): global entrances_xslug @@ -484,4 +414,89 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, "max": maxItems} + " in file " + context DataIssue.objects.create(parser='caves', message=message) print(message) - return items
\ No newline at end of file + return items + +def readcaves(): + '''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. + ''' + # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys + # should put this in a simple list which can be edited using 'Edit this file' + pending = set() + fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") + if fpending.is_file(): + with open(fpending, "r") as fo: + cids = fo.readlines() + for cid in cids: + pending.add(cid.rstrip('\n')) + + with transaction.atomic(): + print(" - Deleting Caves and Entrances") + # attempting to avoid MariaDB crash when doing this + try: + Area.objects.all().delete() + except: + pass + try: + Cave.objects.all().delete() + except: + pass + try: + Entrance.objects.all().delete() + except: + pass + # Clear the cave data issues and the caves as we are reloading + DataIssue.objects.filter(parser='areas').delete() + DataIssue.objects.filter(parser='caves').delete() + DataIssue.objects.filter(parser='caves ok').delete() + DataIssue.objects.filter(parser='entrances').delete() + + print(" - Creating Areas 1623 and 1626") + # This crashes on the server with MariaDB even though a null parent is explicitly allowed. + area_1623= Area.objects.create(short_name = "1623", super=None) + print(" - Saving Area 1623") + area_1623.save() + area_1624= Area.objects.create(short_name = "1624", super=None) + print(" - Saving Area 1624") + area_1624.save() + + area_1626= Area.objects.create(short_name = "1626", super=None) + print(" - Saving Area 1626") + area_1626.save() + + print (" - Setting pending caves") + # Do this first, so that these empty entries are overwritten as they get properly created. + + for k in pending: + + url = k.replace("-","/") # Note we are not appending the .htm as we are modern folks now. + area = area_1623 + areanum = k[0:3] + if areanum == "1623": + area = area_1623 + if areanum == "1624": + area = area_1624 + if areanum == "1626": + area = area_1626 + try: + do_pending_cave(k, url, area) + except: + message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" + DataIssue.objects.create(parser='caves', message=message) + print(message) + raise + + with transaction.atomic(): + print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) + print(" - Reading Entrances from entrance descriptions xml files") + for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + # if filename.endswith('.html'): + # if Path(filename).stem[5:] in pending: + # print(f'Skipping pending entrance dummy file <{filename}>') + # else: + # readentrance(filename) + readentrance(filename) + + print(" - Reading Caves from cave descriptions xml files") + for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readcave(filename) |