diff options
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 53 |
1 files changed, 30 insertions, 23 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 3c5d98e..ff87bcd 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -1,18 +1,27 @@ -# -*- coding: utf-8 -*- import os import re from django.conf import settings -import troggle.core.models as models +from troggle.core.models import DataIssue, get_process_memory import troggle.core.models_caves as models_caves def readcaves(): - # Clear the cave data issues as we are reloading - models.DataIssue.objects.filter(parser='caves').delete() + print(" - Deleting Caves and Entrances") + models_caves.Cave.objects.all().delete() + models_caves.Entrance.objects.all().delete() + # Clear the cave data issues and the caves as we are reloading + DataIssue.objects.filter(parser='caves').delete() + DataIssue.objects.filter(parser='entrances').delete() + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) + + print (" - Setting pending caves") # Do this first, so that these empty entries are overwritten as they get properly created. # For those caves which do not have XML files even though they exist and have surveys + # also needs to be done *before* entrances so that the entrance-cave links work properly. forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", @@ -21,32 +30,30 @@ def readcaves(): try: cave = models_caves.Cave( unofficial_number = k, - official_name = "Mislaid cave - created as empty object. No XML available at this time.", + official_name = "Pending cave write-up - creating as empty object. No XML file available yet.", notes="_Survex file found in loser repo but no description in expoweb") if cave: - print("{} {}".format(cave.unofficial_number, cave.official_name)) + cave.save() # must save to have id before foreign keys work + cave.area = area_1623 cave.save() + message = " ! {} {}".format(cave.unofficial_number, cave.official_name) + DataIssue.objects.create(parser='caves', message=message) + print(message) else: print("Failed to create cave {} ".format(k)) except: - message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k) - models.DataIssue.objects.create(parser='caves', message=message) + message = " ! Error. Cannot create pending cave, pending-id:{}".format(k) + DataIssue.objects.create(parser='caves', message=message) print(message) - - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) - print(" - Reading Entrances") + raise + print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readentrance(filename) - print (" - Reading Caves") + print(" - Reading Caves from cave descriptions xml files") for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) - - - def readentrance(filename): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. @@ -120,7 +127,7 @@ def readentrance(filename): primary = primary) for k in kents: message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) for k in kents: if k.slug() != None: @@ -189,7 +196,7 @@ def readcave(filename): kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) for k in kaves: if k.slug() != None: @@ -213,7 +220,7 @@ def readcave(filename): primary = primary) except: message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) primary = False @@ -225,7 +232,7 @@ def readcave(filename): ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) @@ -235,13 +242,13 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, "min": minItems} + context - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, "max": maxItems} + context - models.DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser='caves', message=message) print(message) return items
\ No newline at end of file |