summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorSam Wenham <sam@wenhams.co.uk>2019-07-16 00:07:37 +0100
committerSam Wenham <sam@wenhams.co.uk>2019-07-16 00:07:37 +0100
commit08a41941f9e2b35b16548624d5070216dd933f79 (patch)
treecfd0bd7dfc68c77cf66d92d72b6d1d74e6155880 /parsers
parenta26109cb3006dd64e1bec38100397ad636813e98 (diff)
downloadtroggle-08a41941f9e2b35b16548624d5070216dd933f79.tar.gz
troggle-08a41941f9e2b35b16548624d5070216dd933f79.tar.bz2
troggle-08a41941f9e2b35b16548624d5070216dd933f79.zip
Part one of getting troggle to work with django 1.10
Major rework of how survex is processed
Diffstat (limited to 'parsers')
-rw-r--r--parsers/logbooks.py2
-rw-r--r--parsers/people.py22
-rw-r--r--parsers/survex.py108
-rw-r--r--parsers/surveys.py26
4 files changed, 98 insertions, 60 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index e5817a6..224b2cd 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -121,7 +121,7 @@ def ParseDate(tripdate, year):
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else:
assert False, tripdate
- return datetime.date(year, month, day)
+ return make_aware(datetime.datetime(year, month, day), get_current_timezone())
# 2006, 2008 - 2010
def Parselogwikitxt(year, expedition, txt):
diff --git a/parsers/people.py b/parsers/people.py
index 34a5ff3..28a036a 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -87,7 +87,7 @@ def LoadPersonsExpos():
lastname = ""
lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
- nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname}
+ nonLookupAttribs={'is_vfho':bool(personline[header["VfHO member"]]), 'fullname':fullname}
person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
parseMugShotAndBlurb(personline=personline, header=header, person=person)
@@ -100,26 +100,6 @@ def LoadPersonsExpos():
nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs)
-
- # this fills in those people for whom 2008 was their first expo
- #print "Loading personexpeditions 2008"
- #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
- #expomissing = set(expoers2008)
- #for name in expomissing:
- # firstname, lastname = name.split()
- # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
- # print "2008:", name
- # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
- # if not persons:
- # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
- # #person.Sethref()
- # person.save()
- # else:
- # person = persons[0]
- # expedition = models.Expedition.objects.get(year="2008")
- # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
- # personexpedition.save()
-
# used in other referencing parser functions
# expedition name lookup cached for speed (it's a very big list)
Gpersonexpeditionnamelookup = { }
diff --git a/parsers/survex.py b/parsers/survex.py
index 294de73..f80f9f2 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -5,9 +5,14 @@ import troggle.settings as settings
from subprocess import call, Popen, PIPE
from troggle.parsers.people import GetPersonExpeditionNameLookup
+from django.utils.timezone import get_current_timezone
+from django.utils.timezone import make_aware
+
import re
import os
+from datetime import datetime
+line_leg_regex = re.compile(r"[\d\-+.]+$")
def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
ls = sline.lower().split()
@@ -53,8 +58,8 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
survexleg.compass = 1000
survexleg.clino = -90.0
else:
- assert re.match(r"[\d\-+.]+$", lcompass), ls
- assert re.match(r"[\d\-+.]+$", lclino) and lclino != "-", ls
+ assert line_leg_regex.match(lcompass), ls
+ assert line_leg_regex.match(lclino) and lclino != "-", ls
survexleg.compass = float(lcompass)
survexleg.clino = float(lclino)
@@ -80,11 +85,16 @@ def LoadSurvexEquate(survexblock, sline):
def LoadSurvexLinePassage(survexblock, stardata, sline, comment):
pass
-
stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
+regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
+regex_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(\d+)')
+regex_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
+regex_team = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)")
+regex_team_member = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)")
+
def RecursiveLoad(survexblock, survexfile, fin, textlines):
iblankbegins = 0
text = [ ]
@@ -92,37 +102,47 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
teammembers = [ ]
# uncomment to print out all files during parsing
- print("Reading file: " + survexblock.survexfile.path)
+ print(" - Reading file: " + survexblock.survexfile.path)
+ stamp = datetime.now()
+ lineno = 0
while True:
svxline = fin.readline().decode("latin1")
if not svxline:
+ print(' - Not survex')
return
textlines.append(svxline)
+ lineno += 1
+
+ # print(' - Line: %d' % lineno)
+
# break the line at the comment
- sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
+ sline, comment = regex_comment.match(svxline.strip()).groups()
# detect ref line pointing to the scans directory
- mref = comment and re.match(r'.*?ref.*?(\d+)\s*#\s*(\d+)', comment)
+ mref = comment and regex_ref.match(comment)
if mref:
refscan = "%s#%s" % (mref.group(1), mref.group(2))
survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan)
if survexscansfolders:
survexblock.survexscansfolder = survexscansfolders[0]
#survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2))
- survexblock.save()
+ survexblock.save()
continue
if not sline:
continue
# detect the star command
- mstar = re.match(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$', sline)
+ mstar = regex_star.match(sline)
if not mstar:
if "from" in stardata:
LoadSurvexLineLeg(survexblock, stardata, sline, comment)
+ # print(' - From: ')
+ #print(stardata)
elif stardata["type"] == "passage":
LoadSurvexLinePassage(survexblock, stardata, sline, comment)
+ # print(' - Passage: ')
#Missing "station" in stardata.
continue
@@ -131,7 +151,19 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
- includesurvexfile = models.SurvexFile(path=includepath, cave=survexfile.cave)
+ print(' - Include file found including - ' + includepath)
+ # Try to find the cave in the DB if not use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
+ if path_match:
+ pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+ print(pos_cave)
+ cave = models.getCaveByReference(pos_cave)
+ if not cave:
+ cave = survexfile.cave
+ else:
+ print('No match for %s' % includepath)
+ cave = survexfile.cave
+ includesurvexfile = models.SurvexFile(path=includepath, cave=cave)
includesurvexfile.save()
includesurvexfile.SetDirectory()
if includesurvexfile.exists():
@@ -141,6 +173,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
elif re.match("begin$(?i)", cmd):
if line:
name = line.lower()
+ #print(' - Begin found for: ' + name)
survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexblock.cave, survexfile=survexfile, totalleglength=0.0)
survexblockdown.save()
textlinesdown = [ ]
@@ -154,11 +187,16 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
else:
survexblock.text = "".join(textlines)
survexblock.save()
+ # print(' - End found: ')
+ endstamp = datetime.now()
+ timetaken = endstamp - stamp
+ # print(' - Time to process: ' + str(timetaken))
return
elif re.match("date$(?i)", cmd):
if len(line) == 10:
- survexblock.date = re.sub(r"\.", "-", line)
+ #print(' - Date found: ' + line)
+ survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone())
expeditions = models.Expedition.objects.filter(year=line[:4])
if expeditions:
assert len(expeditions) == 1
@@ -167,9 +205,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
survexblock.save()
elif re.match("team$(?i)", cmd):
- mteammember = re.match(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)", line)
+ pass
+ # print(' - Team found: ')
+ mteammember = regex_team.match(line)
if mteammember:
- for tm in re.split(r" and | / |, | & | \+ |^both$|^none$(?i)", mteammember.group(2)):
+ for tm in regex_team_member.split(mteammember.group(2)):
if tm:
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
if (personexpedition, tm) not in teammembers:
@@ -181,6 +221,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
personrole.save()
elif cmd == "title":
+ #print(' - Title found: ')
survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexblock.cave)
survextitle.save()
@@ -189,8 +230,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
pass
elif cmd == "data":
+ #print(' - Data found: ')
ls = line.lower().split()
stardata = { "type":ls[0] }
+ #print(' - Star data: ', stardata)
+ #print(ls)
for i in range(0, len(ls)):
stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1
if ls[0] in ["normal", "cartesian", "nosurvey"]:
@@ -201,33 +245,47 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
assert ls[0] == "passage", line
elif cmd == "equate":
+ #print(' - Equate found: ')
LoadSurvexEquate(survexblock, line)
elif cmd == "fix":
+ #print(' - Fix found: ')
survexblock.MakeSurvexStation(line.split()[0])
else:
+ #print(' - Stuff')
if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument",
"calibrate", "set", "infer", "alias", "ref", "cs", "declination", "case"]:
print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
+ endstamp = datetime.now()
+ timetaken = endstamp - stamp
+ # print(' - Time to process: ' + str(timetaken))
def ReloadSurvexCave(survex_cave, area):
- print(survex_cave, area)
- cave = models.Cave.objects.get(kataster_number=survex_cave, area__short_name=area)
- print(cave)
- #cave = models.Cave.objects.get(kataster_number=survex_cave)
+ print(' - Area: ' + str(area) + ' Cave: ' + str(survex_cave))
+ filt_cave = models.Cave.objects.filter(kataster_number=survex_cave, area__short_name=area)[:1]
+
+ if len(filt_cave) < 1:
+ filt_cave = models.Cave.objects.filter(unofficial_number=survex_cave, area__short_name=area)[:1]
+
+ cave = filt_cave[0]
+ print(' - ' + str(cave))
cave.survexblock_set.all().delete()
cave.survexfile_set.all().delete()
cave.survexdirectory_set.all().delete()
-
+
+ file_stamp_start = datetime.now()
survexfile = models.SurvexFile(path="caves-" + cave.kat_area() + "/" + survex_cave + "/" + survex_cave, cave=cave)
survexfile.save()
survexfile.SetDirectory()
-
+
survexblockroot = models.SurvexBlock(name="root", survexpath="caves-" + cave.kat_area(), begin_char=0, cave=cave, survexfile=survexfile, totalleglength=0.0)
survexblockroot.save()
fin = survexfile.OpenFile()
+ file_stamp_end = datetime.now()
+ file_time = file_stamp_end - file_stamp_start
+ print(' - Files time to process: ' + str(file_time))
textlines = [ ]
RecursiveLoad(survexblockroot, survexfile, fin, textlines)
survexblockroot.text = "".join(textlines)
@@ -268,10 +326,18 @@ def LoadAllSurvexBlocks():
print(" - Reloading all caves")
caves = models.Cave.objects.all()
for cave in caves:
+ rec_stamp_start = datetime.now()
if cave.kataster_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.kataster_number)):
if cave.kataster_number not in ['40']:
- print("loading", cave, cave.kat_area())
- ReloadSurvexCave(cave.kataster_number, cave.kat_area())
+ print(" - Loading " + str(cave) + " " + cave.kat_area())
+ #ReloadSurvexCave(cave.kataster_number, cave.kat_area())
+ rec_stamp_end = datetime.now()
+ if cave.unofficial_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.unofficial_number)):
+ print(" - Loading " + str(cave) + " " + cave.kat_area())
+ #ReloadSurvexCave(cave.unofficial_number, cave.kat_area())
+ timetaken = rec_stamp_end - rec_stamp_start
+ # print(' - Time to process: ' + str(timetaken))
+ # print('--------')
poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$")
@@ -286,7 +352,7 @@ def LoadPos():
posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
posfile.readline() #Drop header
for line in posfile.readlines():
- r = poslineregex.match(line)
+ r = poslineregex.match(line)
if r:
x, y, z, name = r.groups()
try:
diff --git a/parsers/surveys.py b/parsers/surveys.py
index efab536..0eed6f0 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -1,12 +1,7 @@
import sys, os, types, logging, stat
-#sys.path.append('C:\\Expo\\expoweb')
-#from troggle import *
-#os.environ['DJANGO_SETTINGS_MODULE']='troggle.settings'
import settings
from troggle.core.models import *
from PIL import Image
-#import settings
-#import core.models as models
import csv
import re
import datetime
@@ -45,21 +40,18 @@ def readSurveysFromCSV():
logging.info("Deleting all scanned images")
ScannedImage.objects.all().delete()
-
-
+
logging.info("Deleting all survey objects")
Survey.objects.all().delete()
-
-
+
logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n")
-
+
for survey in surveyreader:
- #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
+ # I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']])
- # print(walletNumberLetter.groups())
+ # print(walletNumberLetter.groups())
year=survey[header['Year']]
-
surveyobj = Survey(
expedition = Expedition.objects.filter(year=year)[0],
wallet_number = walletNumberLetter.group('number'),
@@ -73,7 +65,6 @@ def readSurveysFromCSV():
pass
surveyobj.save()
-
logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r")
# dead
@@ -141,7 +132,7 @@ def parseSurveyScans(expedition, logfile=None):
yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
print("No folder found for " + expedition.year + " at:- " + yearPath)
-# dead
+
def parseSurveys(logfile=None):
try:
readSurveysFromCSV()
@@ -271,8 +262,9 @@ def SetTunnelfileInfo(tunnelfile):
fin.close()
mtype = re.search("<(fontcolours|sketch)", ttext)
- assert mtype, ff
- tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours")
+ #assert mtype, ff
+ if mtype:
+ tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours")
tunnelfile.npaths = len(re.findall("<skpath", ttext))
tunnelfile.save()