diff options
author | Sam Wenham <sam@wenhams.co.uk> | 2019-07-16 00:07:37 +0100 |
---|---|---|
committer | Sam Wenham <sam@wenhams.co.uk> | 2019-07-16 00:07:37 +0100 |
commit | 08a41941f9e2b35b16548624d5070216dd933f79 (patch) | |
tree | cfd0bd7dfc68c77cf66d92d72b6d1d74e6155880 /parsers | |
parent | a26109cb3006dd64e1bec38100397ad636813e98 (diff) | |
download | troggle-08a41941f9e2b35b16548624d5070216dd933f79.tar.gz troggle-08a41941f9e2b35b16548624d5070216dd933f79.tar.bz2 troggle-08a41941f9e2b35b16548624d5070216dd933f79.zip |
Part one of getting troggle to work with django 1.10
Major rework of how survex is processed
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/logbooks.py | 2 | ||||
-rw-r--r-- | parsers/people.py | 22 | ||||
-rw-r--r-- | parsers/survex.py | 108 | ||||
-rw-r--r-- | parsers/surveys.py | 26 |
4 files changed, 98 insertions, 60 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index e5817a6..224b2cd 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -121,7 +121,7 @@ def ParseDate(tripdate, year): day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: assert False, tripdate - return datetime.date(year, month, day) + return make_aware(datetime.datetime(year, month, day), get_current_timezone()) # 2006, 2008 - 2010 def Parselogwikitxt(year, expedition, txt): diff --git a/parsers/people.py b/parsers/people.py index 34a5ff3..28a036a 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -87,7 +87,7 @@ def LoadPersonsExpos(): lastname = "" lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")} - nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname} + nonLookupAttribs={'is_vfho':bool(personline[header["VfHO member"]]), 'fullname':fullname} person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs) parseMugShotAndBlurb(personline=personline, header=header, person=person) @@ -100,26 +100,6 @@ def LoadPersonsExpos(): nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")} save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs) - - # this fills in those people for whom 2008 was their first expo - #print "Loading personexpeditions 2008" - #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") - #expomissing = set(expoers2008) - #for name in expomissing: - # firstname, lastname = name.split() - # is_guest = name in ["Eeva Makiranta", "Keith Curtis"] - # print "2008:", name - # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname)) - # if not persons: - # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "") - # #person.Sethref() - # person.save() - # else: - # person = persons[0] - # expedition = models.Expedition.objects.get(year="2008") - # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest) - # personexpedition.save() - # used in other referencing parser functions # expedition name lookup cached for speed (it's a very big list) Gpersonexpeditionnamelookup = { } diff --git a/parsers/survex.py b/parsers/survex.py index 294de73..f80f9f2 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -5,9 +5,14 @@ import troggle.settings as settings from subprocess import call, Popen, PIPE from troggle.parsers.people import GetPersonExpeditionNameLookup +from django.utils.timezone import get_current_timezone +from django.utils.timezone import make_aware + import re import os +from datetime import datetime +line_leg_regex = re.compile(r"[\d\-+.]+$") def LoadSurvexLineLeg(survexblock, stardata, sline, comment): ls = sline.lower().split() @@ -53,8 +58,8 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment): survexleg.compass = 1000 survexleg.clino = -90.0 else: - assert re.match(r"[\d\-+.]+$", lcompass), ls - assert re.match(r"[\d\-+.]+$", lclino) and lclino != "-", ls + assert line_leg_regex.match(lcompass), ls + assert line_leg_regex.match(lclino) and lclino != "-", ls survexleg.compass = float(lcompass) survexleg.clino = float(lclino) @@ -80,11 +85,16 @@ def LoadSurvexEquate(survexblock, sline): def LoadSurvexLinePassage(survexblock, stardata, sline, comment): pass - stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4} stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"} +regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") +regex_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(\d+)') +regex_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') +regex_team = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)") +regex_team_member = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)") + def RecursiveLoad(survexblock, survexfile, fin, textlines): iblankbegins = 0 text = [ ] @@ -92,37 +102,47 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): teammembers = [ ] # uncomment to print out all files during parsing - print("Reading file: " + survexblock.survexfile.path) + print(" - Reading file: " + survexblock.survexfile.path) + stamp = datetime.now() + lineno = 0 while True: svxline = fin.readline().decode("latin1") if not svxline: + print(' - Not survex') return textlines.append(svxline) + lineno += 1 + + # print(' - Line: %d' % lineno) + # break the line at the comment - sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups() + sline, comment = regex_comment.match(svxline.strip()).groups() # detect ref line pointing to the scans directory - mref = comment and re.match(r'.*?ref.*?(\d+)\s*#\s*(\d+)', comment) + mref = comment and regex_ref.match(comment) if mref: refscan = "%s#%s" % (mref.group(1), mref.group(2)) survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan) if survexscansfolders: survexblock.survexscansfolder = survexscansfolders[0] #survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2)) - survexblock.save() + survexblock.save() continue if not sline: continue # detect the star command - mstar = re.match(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$', sline) + mstar = regex_star.match(sline) if not mstar: if "from" in stardata: LoadSurvexLineLeg(survexblock, stardata, sline, comment) + # print(' - From: ') + #print(stardata) elif stardata["type"] == "passage": LoadSurvexLinePassage(survexblock, stardata, sline, comment) + # print(' - Passage: ') #Missing "station" in stardata. continue @@ -131,7 +151,19 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): cmd = cmd.lower() if re.match("include$(?i)", cmd): includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) - includesurvexfile = models.SurvexFile(path=includepath, cave=survexfile.cave) + print(' - Include file found including - ' + includepath) + # Try to find the cave in the DB if not use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) + if path_match: + pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + print(pos_cave) + cave = models.getCaveByReference(pos_cave) + if not cave: + cave = survexfile.cave + else: + print('No match for %s' % includepath) + cave = survexfile.cave + includesurvexfile = models.SurvexFile(path=includepath, cave=cave) includesurvexfile.save() includesurvexfile.SetDirectory() if includesurvexfile.exists(): @@ -141,6 +173,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): elif re.match("begin$(?i)", cmd): if line: name = line.lower() + #print(' - Begin found for: ' + name) survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexblock.cave, survexfile=survexfile, totalleglength=0.0) survexblockdown.save() textlinesdown = [ ] @@ -154,11 +187,16 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): else: survexblock.text = "".join(textlines) survexblock.save() + # print(' - End found: ') + endstamp = datetime.now() + timetaken = endstamp - stamp + # print(' - Time to process: ' + str(timetaken)) return elif re.match("date$(?i)", cmd): if len(line) == 10: - survexblock.date = re.sub(r"\.", "-", line) + #print(' - Date found: ' + line) + survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone()) expeditions = models.Expedition.objects.filter(year=line[:4]) if expeditions: assert len(expeditions) == 1 @@ -167,9 +205,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): survexblock.save() elif re.match("team$(?i)", cmd): - mteammember = re.match(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)", line) + pass + # print(' - Team found: ') + mteammember = regex_team.match(line) if mteammember: - for tm in re.split(r" and | / |, | & | \+ |^both$|^none$(?i)", mteammember.group(2)): + for tm in regex_team_member.split(mteammember.group(2)): if tm: personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower()) if (personexpedition, tm) not in teammembers: @@ -181,6 +221,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): personrole.save() elif cmd == "title": + #print(' - Title found: ') survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexblock.cave) survextitle.save() @@ -189,8 +230,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): pass elif cmd == "data": + #print(' - Data found: ') ls = line.lower().split() stardata = { "type":ls[0] } + #print(' - Star data: ', stardata) + #print(ls) for i in range(0, len(ls)): stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1 if ls[0] in ["normal", "cartesian", "nosurvey"]: @@ -201,33 +245,47 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): assert ls[0] == "passage", line elif cmd == "equate": + #print(' - Equate found: ') LoadSurvexEquate(survexblock, line) elif cmd == "fix": + #print(' - Fix found: ') survexblock.MakeSurvexStation(line.split()[0]) else: + #print(' - Stuff') if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument", "calibrate", "set", "infer", "alias", "ref", "cs", "declination", "case"]: print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path) + endstamp = datetime.now() + timetaken = endstamp - stamp + # print(' - Time to process: ' + str(timetaken)) def ReloadSurvexCave(survex_cave, area): - print(survex_cave, area) - cave = models.Cave.objects.get(kataster_number=survex_cave, area__short_name=area) - print(cave) - #cave = models.Cave.objects.get(kataster_number=survex_cave) + print(' - Area: ' + str(area) + ' Cave: ' + str(survex_cave)) + filt_cave = models.Cave.objects.filter(kataster_number=survex_cave, area__short_name=area)[:1] + + if len(filt_cave) < 1: + filt_cave = models.Cave.objects.filter(unofficial_number=survex_cave, area__short_name=area)[:1] + + cave = filt_cave[0] + print(' - ' + str(cave)) cave.survexblock_set.all().delete() cave.survexfile_set.all().delete() cave.survexdirectory_set.all().delete() - + + file_stamp_start = datetime.now() survexfile = models.SurvexFile(path="caves-" + cave.kat_area() + "/" + survex_cave + "/" + survex_cave, cave=cave) survexfile.save() survexfile.SetDirectory() - + survexblockroot = models.SurvexBlock(name="root", survexpath="caves-" + cave.kat_area(), begin_char=0, cave=cave, survexfile=survexfile, totalleglength=0.0) survexblockroot.save() fin = survexfile.OpenFile() + file_stamp_end = datetime.now() + file_time = file_stamp_end - file_stamp_start + print(' - Files time to process: ' + str(file_time)) textlines = [ ] RecursiveLoad(survexblockroot, survexfile, fin, textlines) survexblockroot.text = "".join(textlines) @@ -268,10 +326,18 @@ def LoadAllSurvexBlocks(): print(" - Reloading all caves") caves = models.Cave.objects.all() for cave in caves: + rec_stamp_start = datetime.now() if cave.kataster_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.kataster_number)): if cave.kataster_number not in ['40']: - print("loading", cave, cave.kat_area()) - ReloadSurvexCave(cave.kataster_number, cave.kat_area()) + print(" - Loading " + str(cave) + " " + cave.kat_area()) + #ReloadSurvexCave(cave.kataster_number, cave.kat_area()) + rec_stamp_end = datetime.now() + if cave.unofficial_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.unofficial_number)): + print(" - Loading " + str(cave) + " " + cave.kat_area()) + #ReloadSurvexCave(cave.unofficial_number, cave.kat_area()) + timetaken = rec_stamp_end - rec_stamp_start + # print(' - Time to process: ' + str(timetaken)) + # print('--------') poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$") @@ -286,7 +352,7 @@ def LoadPos(): posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)) posfile.readline() #Drop header for line in posfile.readlines(): - r = poslineregex.match(line) + r = poslineregex.match(line) if r: x, y, z, name = r.groups() try: diff --git a/parsers/surveys.py b/parsers/surveys.py index efab536..0eed6f0 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -1,12 +1,7 @@ import sys, os, types, logging, stat -#sys.path.append('C:\\Expo\\expoweb') -#from troggle import * -#os.environ['DJANGO_SETTINGS_MODULE']='troggle.settings' import settings from troggle.core.models import * from PIL import Image -#import settings -#import core.models as models import csv import re import datetime @@ -45,21 +40,18 @@ def readSurveysFromCSV(): logging.info("Deleting all scanned images") ScannedImage.objects.all().delete() - - + logging.info("Deleting all survey objects") Survey.objects.all().delete() - - + logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n") - + for survey in surveyreader: - #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. + # I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) - # print(walletNumberLetter.groups()) + # print(walletNumberLetter.groups()) year=survey[header['Year']] - surveyobj = Survey( expedition = Expedition.objects.filter(year=year)[0], wallet_number = walletNumberLetter.group('number'), @@ -73,7 +65,6 @@ def readSurveysFromCSV(): pass surveyobj.save() - logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r") # dead @@ -141,7 +132,7 @@ def parseSurveyScans(expedition, logfile=None): yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) print("No folder found for " + expedition.year + " at:- " + yearPath) -# dead + def parseSurveys(logfile=None): try: readSurveysFromCSV() @@ -271,8 +262,9 @@ def SetTunnelfileInfo(tunnelfile): fin.close() mtype = re.search("<(fontcolours|sketch)", ttext) - assert mtype, ff - tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours") + #assert mtype, ff + if mtype: + tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours") tunnelfile.npaths = len(re.findall("<skpath", ttext)) tunnelfile.save() |