summaryrefslogtreecommitdiffstats
path: root/parsers/caves.py
diff options
context:
space:
mode:
authorSam Wenham <sam@wenhams.co.uk>2019-04-19 22:52:54 +0100
committerSam Wenham <sam@wenhams.co.uk>2019-04-19 22:52:54 +0100
commitb42249890e83fafdfd2f2918cd80f4621252f7ab (patch)
treef069b8ebc452437ebb09fca8f343be6e4e0e6cee /parsers/caves.py
parentcc313246bb50fa8851fe65b1511101060206cd79 (diff)
downloadtroggle-b42249890e83fafdfd2f2918cd80f4621252f7ab.tar.gz
troggle-b42249890e83fafdfd2f2918cd80f4621252f7ab.tar.bz2
troggle-b42249890e83fafdfd2f2918cd80f4621252f7ab.zip
Updating caves and entrances is no longer nuclear!
Big overhaul of people processing, fullname added to the model lastname is now names -1 unless you only have one (yes you Wookey) this allows for Jon Arne Toft and Wookey to live it the same DB names can now have html chars in them, this should be real unicode but that can only happen when we go to Python 3!
Diffstat (limited to 'parsers/caves.py')
-rw-r--r--parsers/caves.py37
1 files changed, 19 insertions, 18 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 2c28365..606007f 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -6,16 +6,18 @@ import re
def readcaves():
- area_1623 = models.Area(short_name = "1623", parent = None)
- area_1623.save()
- area_1626 = models.Area(short_name = "1626", parent = None)
- area_1626.save()
- print("Reading Entrances")
+
+ # Clear the cave data issues as we are reloading
+ models.DataIssue.objects.filter(parser='caves').delete()
+
+ area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
+ area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
+ print(" - Reading Entrances")
#print "list of <Slug> <Filename>"
for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readentrance(filename)
- print ("Reading Caves")
+ print (" - Reading Caves")
for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readcave(filename)
@@ -51,7 +53,7 @@ def readentrance(filename):
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
url = getXML(entrancecontents, "url", maxItems = 1, context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
- e = models.Entrance(name = name[0],
+ e, state = models.Entrance.objects.update_or_create(name = name[0],
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
entrance_description = entrance_description[0],
explorers = explorers[0],
@@ -75,14 +77,12 @@ def readentrance(filename):
url = url[0],
filename = filename,
cached_primary_slug = slugs[0])
- e.save()
primary = True
for slug in slugs:
#print slug, filename
- cs = models.EntranceSlug(entrance = e,
+ cs = models.EntranceSlug.objects.update_or_create(entrance = e,
slug = slug,
primary = primary)
- cs.save()
primary = False
def readcave(filename):
@@ -117,7 +117,7 @@ def readcave(filename):
url = getXML(cavecontents, "url", maxItems = 1, context = context)
entrances = getXML(cavecontents, "entrance", context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
- c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+ c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
official_name = official_name[0],
kataster_code = kataster_code[0],
kataster_number = kataster_number[0],
@@ -137,7 +137,6 @@ def readcave(filename):
description_file = description_file[0],
url = url[0],
filename = filename)
- c.save()
for area_slug in areas:
area = models.Area.objects.filter(short_name = area_slug)
if area:
@@ -149,12 +148,13 @@ def readcave(filename):
primary = True
for slug in slugs:
try:
- cs = models.CaveSlug(cave = c,
+ cs = models.CaveSlug.objects.update_or_create(cave = c,
slug = slug,
primary = primary)
- cs.save()
except:
- print("Can't find text (slug): %s, skipping %s" % (slug, context))
+ message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+ models.DataIssue.objects.create(parser='caves', message=message)
+ print(message)
primary = False
for entrance in entrances:
@@ -162,10 +162,11 @@ def readcave(filename):
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
try:
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
- ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance)
- ce.save()
+ ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
- print ("Entrance text (slug) %s missing %s" % (slug, context))
+ message = "Entrance text (slug) %s missing %s" % (slug, context)
+ models.DataIssue.objects.create(parser='caves', message=message)
+ print(message)
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):