diff options
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 53 |
1 files changed, 38 insertions, 15 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 7f7364c..3c5d98e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -8,21 +8,44 @@ import troggle.core.models as models import troggle.core.models_caves as models_caves def readcaves(): - # Clear the cave data issues as we are reloading - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - models.DataIssue.objects.filter(parser='caves').delete() + # Clear the cave data issues as we are reloading + models.DataIssue.objects.filter(parser='caves').delete() + + # Do this first, so that these empty entries are overwritten as they get properly created. + # For those caves which do not have XML files even though they exist and have surveys + forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", + "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", + "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", + "2018-pf-01", "2018-pf-02", "haldenloch", "gruenstein"] + for k in forgotten: + try: + cave = models_caves.Cave( + unofficial_number = k, + official_name = "Mislaid cave - created as empty object. No XML available at this time.", + notes="_Survex file found in loser repo but no description in expoweb") + if cave: + print("{} {}".format(cave.unofficial_number, cave.official_name)) + cave.save() + else: + print("Failed to create cave {} ".format(k)) + except: + message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k) + models.DataIssue.objects.create(parser='caves', message=message) + print(message) + + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) + print(" - Reading Entrances") + for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readentrance(filename) + print (" - Reading Caves") + for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readcave(filename) + - area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) - print(" - Reading Entrances") - #print "list of <Slug> <Filename>" - for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readentrance(filename) - print (" - Reading Caves") - for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readcave(filename) def readentrance(filename): @@ -204,7 +227,7 @@ def readcave(filename): message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) models.DataIssue.objects.create(parser='caves', message=message) print(message) - + def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) |