diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/QMs.py | 2 | ||||
-rw-r--r-- | parsers/caves.py | 24 | ||||
-rw-r--r-- | parsers/logbooks.py | 43 | ||||
-rw-r--r-- | parsers/survex.py | 52 | ||||
-rw-r--r-- | parsers/surveys.py | 2 |
5 files changed, 75 insertions, 48 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py index 578e422..24ad7c9 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -7,7 +7,7 @@ from datetime import * from django.conf import settings -from troggle.core.models import QM, Cave, LogbookEntry +from troggle.core.models_caves import QM, Cave, LogbookEntry from utils import save_carefully diff --git a/parsers/caves.py b/parsers/caves.py index 745b119..514aa78 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -5,14 +5,15 @@ import re from django.conf import settings import troggle.core.models as models +import troggle.core.models_caves as models_caves def readcaves(): # Clear the cave data issues as we are reloading models.DataIssue.objects.filter(parser='caves').delete() - area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None) + area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) print(" - Reading Entrances") #print "list of <Slug> <Filename>" for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files @@ -23,6 +24,7 @@ def readcaves(): if filename.endswith('.html'): readcave(filename) + def readentrance(filename): with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() @@ -54,7 +56,7 @@ def readentrance(filename): bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) url = getXML(entrancecontents, "url", maxItems = 1, context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: - e, state = models.Entrance.objects.update_or_create(name = name[0], + e, state = models_caves.Entrance.objects.update_or_create(name = name[0], non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], entrance_description = entrance_description[0], explorers = explorers[0], @@ -81,7 +83,7 @@ def readentrance(filename): primary = True for slug in slugs: #print slug, filename - cs = models.EntranceSlug.objects.update_or_create(entrance = e, + cs = models_caves.EntranceSlug.objects.update_or_create(entrance = e, slug = slug, primary = primary) primary = False @@ -118,7 +120,7 @@ def readcave(filename): url = getXML(cavecontents, "url", maxItems = 1, context = context) entrances = getXML(cavecontents, "entrance", context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: - c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], official_name = official_name[0], kataster_code = kataster_code[0], kataster_number = kataster_number[0], @@ -139,17 +141,17 @@ def readcave(filename): url = url[0], filename = filename) for area_slug in areas: - area = models.Area.objects.filter(short_name = area_slug) + area = models_caves.Area.objects.filter(short_name = area_slug) if area: newArea = area[0] else: - newArea = models.Area(short_name = area_slug, parent = models.Area.objects.get(short_name = "1623")) + newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623")) newArea.save() c.area.add(newArea) primary = True for slug in slugs: try: - cs = models.CaveSlug.objects.update_or_create(cave = c, + cs = models_caves.CaveSlug.objects.update_or_create(cave = c, slug = slug, primary = primary) except: @@ -162,8 +164,8 @@ def readcave(filename): slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] try: - entrance = models.Entrance.objects.get(entranceslug__slug = slug) - ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) + entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) + ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance text (slug) %s missing %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) @@ -185,4 +187,4 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, "max": maxItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) - return items + return items
\ No newline at end of file diff --git a/parsers/logbooks.py b/parsers/logbooks.py index e2f0ba0..f0ae2fa 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -12,6 +12,7 @@ from django.template.defaultfilters import slugify from django.utils.timezone import get_current_timezone, make_aware import troggle.core.models as models +import troggle.core.models_caves as models_caves from parsers.people import GetPersonExpeditionNameLookup from utils import save_carefully @@ -49,23 +50,24 @@ def GetTripPersons(trippeople, expedition, logtime_underground): author = res[-1][0] return res, author -def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function... -# print "Getting cave for " , place +def GetTripCave(place): + #need to be fuzzier about matching here. Already a very slow function... + # print "Getting cave for " , place try: katastNumRes=[] - katastNumRes=list(models.Cave.objects.filter(kataster_number=int(place))) + katastNumRes=list(models_caves.Cave.objects.filter(kataster_number=int(place))) except ValueError: pass - officialNameRes=list(models.Cave.objects.filter(official_name=place)) + officialNameRes=list(models_caves.Cave.objects.filter(official_name=place)) tripCaveRes=officialNameRes+katastNumRes if len(tripCaveRes)==1: -# print "Place " , place , "entered as" , tripCaveRes[0] + # print "Place " , place , "entered as" , tripCaveRes[0] return tripCaveRes[0] - elif models.OtherCaveName.objects.filter(name=place): - tripCaveRes=models.OtherCaveName.objects.filter(name__icontains=place)[0].cave -# print "Place " , place , "entered as" , tripCaveRes + elif models_caves.OtherCaveName.objects.filter(name=place): + tripCaveRes=models_caves.OtherCaveName.objects.filter(name__icontains=place)[0].cave + # print "Place " , place , "entered as" , tripCaveRes return tripCaveRes elif len(tripCaveRes)>1: @@ -76,6 +78,25 @@ def GetTripCave(place): #need to be fuzzier about matching here. Already a very print(("No cave found for place " , place)) return +# lookup function modelled on GetPersonExpeditionNameLookup +Gcavelookup = None +def GetCaveLookup(): + global Gcavelookup + if Gcavelookup: + return Gcavelookup + Gcavelookup = {"NONEPLACEHOLDER":None} + for cave in models_caves.Cave.objects.all(): + Gcavelookup[cave.official_name.lower()] = cave + if cave.kataster_number: + Gcavelookup[cave.kataster_number] = cave + if cave.unofficial_number: + Gcavelookup[cave.unofficial_number] = cave + + Gcavelookup["tunnocks"] = Gcavelookup["258"] + Gcavelookup["hauchhole"] = Gcavelookup["234"] + return Gcavelookup + + logentries = [] # the entire logbook is a single object: a list of entries noncaveplaces = [ "Journey", "Loser Plateau" ] @@ -195,7 +216,6 @@ def Parseloghtmltxt(year, expedition, txt): if logbook_entry_count == 0: print(" - No trip entries found in logbook, check the syntax matches htmltxt format") - # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it def Parseloghtml01(year, expedition, txt): tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt) @@ -381,6 +401,7 @@ def LoadLogbookForExpedition(expedition): #return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count()) + def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. """ @@ -440,7 +461,7 @@ def parseAutoLogBookEntry(filename): if caveMatch: caveRef, = caveMatch.groups() try: - cave = models.getCaveByReference(caveRef) + cave = models_caves.getCaveByReference(caveRef) except AssertionError: cave = None errors.append("Cave not found in database") @@ -496,4 +517,4 @@ def parseAutoLogBookEntry(filename): time_underground = TU, logbook_entry = logbookEntry, is_logbook_entry_author = author).save() - print(logbookEntry) + print(logbookEntry)
\ No newline at end of file diff --git a/parsers/survex.py b/parsers/survex.py index 35e09b8..b7158aa 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -10,6 +10,8 @@ from django.utils.timezone import make_aware import troggle.settings as settings import troggle.core.models as models +import troggle.core.models_caves as models_caves +import troggle.core.models_survex as models_survex from troggle.parsers.people import GetPersonExpeditionNameLookup from troggle.core.views_caves import MapLocations @@ -26,7 +28,7 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave): ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]]) ssto = survexblock.MakeSurvexStation(ls[stardata["to"]]) - survexleg = models.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto) + survexleg = models_survex.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto) # this next fails for two surface survey svx files which use / for decimal point # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05) if stardata["type"] == "normal": @@ -153,7 +155,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) # print(insp+'Match') # print(insp+os_cave) - cave = models.getCaveByReference(pos_cave) + cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave svxlines = '' @@ -188,7 +190,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): wallet = "0" + wallet refscan = "%s#%s%s" % (yr, letterx, wallet ) #print(insp+' - Wallet ;ref - %s - looking for survexscansfolder' % refscan) - survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan) + survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan) if survexscansfolders: survexblock.survexscansfolder = survexscansfolders[0] #survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2)) @@ -228,15 +230,15 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): # If the QM isn't resolved (has a resolving station) then load it if not qm_resolve_section or qm_resolve_section != '-' or qm_resolve_section != 'None': - from_section = models.SurvexBlock.objects.filter(name=qm_from_section) + from_section = models_survex.SurvexBlock.objects.filter(name=qm_from_section) # If we can find a section (survex note chunck, named) if len(from_section) > 0: # print(insp+from_section[0]) - from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station) + from_station = models_survex.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station) # If we can find a from station then we have the nearest station and can import it if len(from_station) > 0: # print(insp+from_station[0]) - qm = models.QM.objects.create(number=qm_no, + qm = models_caves.QM.objects.create(number=qm_no, nearest_station=from_station[0], grade=qm_grade.upper(), location_description=qm_notes) @@ -261,7 +263,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet refscan = "%s#%s%s" % (yr, letterx, wallet) - survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan) + survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan) if survexscansfolders: survexblock.survexscansfolder = survexscansfolders[0] survexblock.save() @@ -299,12 +301,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) # print(insp+pos_cave) - cave = models.getCaveByReference(pos_cave) + cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: print((insp+' - No match in DB (i) for %s, so loading..' % includepath)) - includesurvexfile = models.SurvexFile(path=includepath) + includesurvexfile = models_survex.SurvexFile(path=includepath) includesurvexfile.save() includesurvexfile.SetDirectory() if includesurvexfile.exists(): @@ -322,7 +324,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) # print(insp+pos_cave) - cave = models.getCaveByReference(pos_cave) + cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: @@ -331,7 +333,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): name = line.lower() print((insp+' - Begin found for: ' + name)) # print(insp+'Block cave: ' + str(survexfile.cave)) - survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0) + survexblockdown = models_survex.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0) survexblockdown.save() survexblock.save() survexblock = survexblockdown @@ -376,7 +378,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower()) if (personexpedition, tm) not in teammembers: teammembers.append((personexpedition, tm)) - personrole = models.SurvexPersonRole(survexblock=survexblock, nrole=mteammember.group(1).lower(), personexpedition=personexpedition, personname=tm) + personrole = models_survex.SurvexPersonRole(survexblock=survexblock, nrole=mteammember.group(1).lower(), personexpedition=personexpedition, personname=tm) personrole.expeditionday = survexblock.expeditionday if personexpedition: personrole.person=personexpedition.person @@ -384,7 +386,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): elif cmd == "title": #print(insp+' - Title found: ') - survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave) + survextitle = models_survex.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave) survextitle.save() pass @@ -431,14 +433,14 @@ def LoadAllSurvexBlocks(): print(' - Flushing All Survex Blocks...') - models.SurvexBlock.objects.all().delete() - models.SurvexFile.objects.all().delete() - models.SurvexDirectory.objects.all().delete() - models.SurvexEquate.objects.all().delete() - models.SurvexLeg.objects.all().delete() - models.SurvexTitle.objects.all().delete() - models.SurvexPersonRole.objects.all().delete() - models.SurvexStation.objects.all().delete() + models_survex.SurvexBlock.objects.all().delete() + models_survex.SurvexFile.objects.all().delete() + models_survex.SurvexDirectory.objects.all().delete() + models_survex.SurvexEquate.objects.all().delete() + models_survex.SurvexLeg.objects.all().delete() + models_survex.SurvexTitle.objects.all().delete() + models_survex.SurvexPersonRole.objects.all().delete() + models_survex.SurvexStation.objects.all().delete() print(" - Data flushed") # Clear the data issues as we are reloading @@ -450,12 +452,12 @@ def LoadAllSurvexBlocks(): # Redirect sys.stdout to the file sys.stdout = open('loadsurvexblks.log', 'w') - survexfile = models.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None) + survexfile = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None) survexfile.save() survexfile.SetDirectory() #Load all - survexblockroot = models.SurvexBlock(name="root", survexpath="", begin_char=0, cave=None, survexfile=survexfile, totalleglength=0.0) + survexblockroot = models_survex.SurvexBlock(name="root", survexpath="", begin_char=0, cave=None, survexfile=survexfile, totalleglength=0.0) survexblockroot.save() fin = survexfile.OpenFile() textlines = [ ] @@ -545,14 +547,14 @@ def LoadPos(): for line in posfile.readlines(): r = poslineregex.match(line) if r: - x, y, z, id = r.groups() # easting, northing, altitude, survexstation + x, y, z, id = r.groups() if id in notfoundbefore: skip[id] = 1 else: for sid in mappoints: if id.endswith(sid): try: - ss = models.SurvexStation.objects.lookup(id) + ss = models_survex.SurvexStation.objects.lookup(id) ss.x = float(x) ss.y = float(y) ss.z = float(z) diff --git a/parsers/surveys.py b/parsers/surveys.py index 2f0ff90..80be33f 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -16,6 +16,8 @@ from functools import reduce import settings from troggle.core.models import * +from troggle.core.models_caves import * +from troggle.core.models_survex import * def get_or_create_placeholder(year): """ All surveys must be related to a logbookentry. We don't have a way to |