summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
authorSam Wenham <sam@wenhams.co.uk>2020-02-21 15:57:07 +0000
committerSam Wenham <sam@wenhams.co.uk>2020-02-21 15:57:07 +0000
commite77aa9fb84192bcb0813589589130fb7c1b29c0f (patch)
tree0470ce711abeac54ff4d85b35c7a965aefe58d29 /parsers/survex.py
parentf5fe2d9e335dde5fc8607e3ba9fd54e948bf6d6d (diff)
downloadtroggle-Faster-survex-parser.tar.gz
troggle-Faster-survex-parser.tar.bz2
troggle-Faster-survex-parser.zip
Changes needed to stop the survex parser having to go through the data twiceFaster-survex-parser
Taken from the Django 1.10 upgrade branch
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py252
1 files changed, 178 insertions, 74 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index 294de73..14bd035 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -5,20 +5,26 @@ import troggle.settings as settings
from subprocess import call, Popen, PIPE
from troggle.parsers.people import GetPersonExpeditionNameLookup
+from django.utils.timezone import get_current_timezone
+from django.utils.timezone import make_aware
+
import re
import os
+from datetime import datetime
+line_leg_regex = re.compile(r"[\d\-+.]+$")
-def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
+def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
+ # The try catches here need replacing as they are relativly expensive
ls = sline.lower().split()
ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]])
ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
-
+
survexleg = models.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
if stardata["type"] == "normal":
try:
survexleg.tape = float(ls[stardata["tape"]])
- except ValueError:
+ except ValueError:
print("Tape misread in", survexblock.survexfile.path)
print("Stardata:", stardata)
print("Line:", ls)
@@ -53,14 +59,17 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
survexleg.compass = 1000
survexleg.clino = -90.0
else:
- assert re.match(r"[\d\-+.]+$", lcompass), ls
- assert re.match(r"[\d\-+.]+$", lclino) and lclino != "-", ls
+ assert line_leg_regex.match(lcompass), ls
+ assert line_leg_regex.match(lclino) and lclino != "-", ls
survexleg.compass = float(lcompass)
survexleg.clino = float(lclino)
-
+
+ if cave:
+ survexleg.cave = cave
+
# only save proper legs
survexleg.save()
-
+
itape = stardata.get("tape")
if itape:
try:
@@ -80,96 +89,212 @@ def LoadSurvexEquate(survexblock, sline):
def LoadSurvexLinePassage(survexblock, stardata, sline, comment):
pass
-
stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
+regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
+regex_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(\d+)')
+regex_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
+regex_team = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)")
+regex_team_member = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)")
+regex_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
+
def RecursiveLoad(survexblock, survexfile, fin, textlines):
iblankbegins = 0
text = [ ]
stardata = stardatadefault
teammembers = [ ]
-
-# uncomment to print out all files during parsing
- print("Reading file: " + survexblock.survexfile.path)
- while True:
- svxline = fin.readline().decode("latin1")
- if not svxline:
- return
- textlines.append(svxline)
+
+ # uncomment to print out all files during parsing
+ print(" - Reading file: " + survexblock.survexfile.path)
+ stamp = datetime.now()
+ lineno = 0
+
+ # Try to find the cave in the DB if not use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+ if path_match:
+ pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+ # print('Match')
+ # print(pos_cave)
+ cave = models.getCaveByReference(pos_cave)
+ if cave:
+ survexfile.cave = cave
+ svxlines = ''
+ svxlines = fin.read().splitlines()
+ # print('Cave - preloop ' + str(survexfile.cave))
+ # print(survexblock)
+ for svxline in svxlines:
+
+ # print(survexblock)
+
+ # print(svxline)
+ # if not svxline:
+ # print(' - Not survex')
+ # return
+ # textlines.append(svxline)
+
+ lineno += 1
+
+ # print(' - Line: %d' % lineno)
# break the line at the comment
- sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
-
+ sline, comment = regex_comment.match(svxline.strip()).groups()
# detect ref line pointing to the scans directory
- mref = comment and re.match(r'.*?ref.*?(\d+)\s*#\s*(\d+)', comment)
+ mref = comment and regex_ref.match(comment)
if mref:
refscan = "%s#%s" % (mref.group(1), mref.group(2))
survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan)
if survexscansfolders:
survexblock.survexscansfolder = survexscansfolders[0]
#survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2))
- survexblock.save()
+ survexblock.save()
continue
-
+
+ # This whole section should be moved if we can have *QM become a proper survex command
+ # Spec of QM in SVX files, currently commented out need to add to survex
+ # needs to match regex_qm
+ # ;Serial number grade(A/B/C/D/X) nearest-station resolution-station description
+ # ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage
+ # ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage
+ qmline = comment and regex_qm.match(comment)
+ if qmline:
+ print(qmline.groups())
+ #(u'1', u'B', u'miraclemaze', u'1.17', u'-', None, u'\tcontinuation of rift')
+ qm_no = qmline.group(1)
+ qm_grade = qmline.group(2)
+ qm_from_section = qmline.group(3)
+ qm_from_station = qmline.group(4)
+ qm_resolve_section = qmline.group(6)
+ qm_resolve_station = qmline.group(7)
+ qm_notes = qmline.group(8)
+
+ print('Cave - %s' % survexfile.cave)
+ print('QM no %d' % int(qm_no))
+ print('QM grade %s' % qm_grade)
+ print('QM section %s' % qm_from_section)
+ print('QM station %s' % qm_from_station)
+ print('QM res section %s' % qm_resolve_section)
+ print('QM res station %s' % qm_resolve_station)
+ print('QM notes %s' % qm_notes)
+
+ # If the QM isn't resolved (has a resolving station) thn load it
+ if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
+ from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
+ # If we can find a section (survex note chunck, named)
+ if len(from_section) > 0:
+ print(from_section[0])
+ from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station)
+ # If we can find a from station then we have the nearest station and can import it
+ if len(from_station) > 0:
+ print(from_station[0])
+ qm = models.QM.objects.create(number=qm_no,
+ nearest_station=from_station[0],
+ grade=qm_grade.upper(),
+ location_description=qm_notes)
+ else:
+ print('QM found but resolved')
+
+ #print('Cave -sline ' + str(cave))
if not sline:
continue
-
+
# detect the star command
- mstar = re.match(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$', sline)
+ mstar = regex_star.match(sline)
if not mstar:
if "from" in stardata:
- LoadSurvexLineLeg(survexblock, stardata, sline, comment)
+ # print('Cave ' + str(survexfile.cave))
+ # print(survexblock)
+ LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave)
+ # print(' - From: ')
+ #print(stardata)
+ pass
elif stardata["type"] == "passage":
LoadSurvexLinePassage(survexblock, stardata, sline, comment)
+ # print(' - Passage: ')
#Missing "station" in stardata.
continue
-
+
# detect the star command
cmd, line = mstar.groups()
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
- includesurvexfile = models.SurvexFile(path=includepath, cave=survexfile.cave)
+ print(' - Include file found including - ' + includepath)
+ # Try to find the cave in the DB if not use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
+ if path_match:
+ pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+ # print(pos_cave)
+ cave = models.getCaveByReference(pos_cave)
+ if cave:
+ survexfile.cave = cave
+ else:
+ print('No match for %s' % includepath)
+ includesurvexfile = models.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
if includesurvexfile.exists():
+ survexblock.save()
fininclude = includesurvexfile.OpenFile()
RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
-
+
elif re.match("begin$(?i)", cmd):
- if line:
+ if line:
+ newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
+ # Try to find the cave in the DB if not use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+ if path_match:
+ pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+ print(pos_cave)
+ cave = models.getCaveByReference(pos_cave)
+ if cave:
+ survexfile.cave = cave
+ else:
+ print('No match for %s' % newsvxpath)
+
name = line.lower()
- survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexblock.cave, survexfile=survexfile, totalleglength=0.0)
+ print(' - Begin found for: ' + name)
+ # print('Block cave: ' + str(survexfile.cave))
+ survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
survexblockdown.save()
+ survexblock.save()
+ survexblock = survexblockdown
+ # print(survexblockdown)
textlinesdown = [ ]
RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
else:
iblankbegins += 1
-
+
elif re.match("end$(?i)", cmd):
if iblankbegins:
iblankbegins -= 1
else:
survexblock.text = "".join(textlines)
survexblock.save()
+ # print(' - End found: ')
+ endstamp = datetime.now()
+ timetaken = endstamp - stamp
+ # print(' - Time to process: ' + str(timetaken))
return
-
+
elif re.match("date$(?i)", cmd):
if len(line) == 10:
- survexblock.date = re.sub(r"\.", "-", line)
+ #print(' - Date found: ' + line)
+ survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone())
expeditions = models.Expedition.objects.filter(year=line[:4])
if expeditions:
assert len(expeditions) == 1
survexblock.expedition = expeditions[0]
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
survexblock.save()
-
+
elif re.match("team$(?i)", cmd):
- mteammember = re.match(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)", line)
+ pass
+ # print(' - Team found: ')
+ mteammember = regex_team.match(line)
if mteammember:
- for tm in re.split(r" and | / |, | & | \+ |^both$|^none$(?i)", mteammember.group(2)):
+ for tm in regex_team_member.split(mteammember.group(2)):
if tm:
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
if (personexpedition, tm) not in teammembers:
@@ -179,18 +304,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if personexpedition:
personrole.person=personexpedition.person
personrole.save()
-
+
elif cmd == "title":
- survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexblock.cave)
+ #print(' - Title found: ')
+ survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave)
survextitle.save()
-
+ pass
+
elif cmd == "require":
# should we check survex version available for processing?
pass
elif cmd == "data":
+ #print(' - Data found: ')
ls = line.lower().split()
stardata = { "type":ls[0] }
+ #print(' - Star data: ', stardata)
+ #print(ls)
for i in range(0, len(ls)):
stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1
if ls[0] in ["normal", "cartesian", "nosurvey"]:
@@ -199,40 +329,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
stardata = stardatadefault
else:
assert ls[0] == "passage", line
-
+
elif cmd == "equate":
+ #print(' - Equate found: ')
LoadSurvexEquate(survexblock, line)
elif cmd == "fix":
+ #print(' - Fix found: ')
survexblock.MakeSurvexStation(line.split()[0])
else:
+ #print(' - Stuff')
if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument",
"calibrate", "set", "infer", "alias", "ref", "cs", "declination", "case"]:
print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
-
-
-def ReloadSurvexCave(survex_cave, area):
- print(survex_cave, area)
- cave = models.Cave.objects.get(kataster_number=survex_cave, area__short_name=area)
- print(cave)
- #cave = models.Cave.objects.get(kataster_number=survex_cave)
- cave.survexblock_set.all().delete()
- cave.survexfile_set.all().delete()
- cave.survexdirectory_set.all().delete()
-
- survexfile = models.SurvexFile(path="caves-" + cave.kat_area() + "/" + survex_cave + "/" + survex_cave, cave=cave)
- survexfile.save()
- survexfile.SetDirectory()
-
- survexblockroot = models.SurvexBlock(name="root", survexpath="caves-" + cave.kat_area(), begin_char=0, cave=cave, survexfile=survexfile, totalleglength=0.0)
- survexblockroot.save()
- fin = survexfile.OpenFile()
- textlines = [ ]
- RecursiveLoad(survexblockroot, survexfile, fin, textlines)
- survexblockroot.text = "".join(textlines)
- survexblockroot.save()
-
+ endstamp = datetime.now()
+ timetaken = endstamp - stamp
+ # print(' - Time to process: ' + str(timetaken))
def LoadAllSurvexBlocks():
@@ -258,22 +371,13 @@ def LoadAllSurvexBlocks():
survexblockroot.save()
fin = survexfile.OpenFile()
textlines = [ ]
+ # The real work starts here
RecursiveLoad(survexblockroot, survexfile, fin, textlines)
+ fin.close()
survexblockroot.text = "".join(textlines)
survexblockroot.save()
- #Load each cave,
- #FIXME this should be dealt with load all above
- print(" - Reloading all caves")
- caves = models.Cave.objects.all()
- for cave in caves:
- if cave.kataster_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.kataster_number)):
- if cave.kataster_number not in ['40']:
- print("loading", cave, cave.kat_area())
- ReloadSurvexCave(cave.kataster_number, cave.kat_area())
-
-
poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$")
@@ -286,7 +390,7 @@ def LoadPos():
posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
posfile.readline() #Drop header
for line in posfile.readlines():
- r = poslineregex.match(line)
+ r = poslineregex.match(line)
if r:
x, y, z, name = r.groups()
try: