summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@klebos.com>2020-06-24 22:46:18 +0100
committerPhilip Sargent <philip.sargent@klebos.com>2020-06-24 22:46:18 +0100
commit04f14c91f0b85108384ca4b9f2b190c0b26eef98 (patch)
treea68606697d75fd3b149e4fc71da54437ecc34bb2 /parsers/survex.py
parent664c18ebbebfd01c69dc6de0b38a78703aa35d36 (diff)
downloadtroggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.tar.gz
troggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.tar.bz2
troggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.zip
rearrange ref and comment detection
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py368
1 files changed, 192 insertions, 176 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index f800039..beca1b8 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -38,7 +38,6 @@ class LoadSurvex():
stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
- rx_braskets= re.compile(r"[()]")
rx_linelen = re.compile(r"[\d\-+.]+$")
rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
@@ -46,21 +45,15 @@ class LoadSurvex():
# remember there is also QM_PATTERN used in views_other and set in settings.py
rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
- rx_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)')
+ rx_ref = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
- # years from 1960 to 2039
rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
- # rx_starref = re.compile("""?x # VERBOSE mode - can't get this to work
- # ^\s*\*ref # look for *ref at start of line
- # [\s.:]* # some spaces, stops or colons
- # ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field
- # \s*# # spaces then hash separator
- # ?\s*(X) # optional X - captured
- # ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured
- # $(?i)""", re.X) # the end (do the whole thing case insensitively)
+ rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
survexlegsalllength = 0.0
survexlegsnumber = 0
+ depthbegin = 0
+ lineno = 0
insp = ""
callcount = 0
stardata ={}
@@ -112,16 +105,21 @@ class LoadSurvex():
survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
survexblock.save()
- def LoadSurvexLineLeg(self, survexblock, stardata, sline, comment):
+ def LoadSurvexLineLeg(self, survexblock, svxline, sline, comment):
"""This reads compass, clino and tape data but only keeps the tape lengths,
the rest is discarded after error-checking.
"""
- ls = sline.lower().split()
+ stardata = self.stardata
survexleg = SurvexLeg()
+
+ ls = sline.lower().split()
# this next fails for two surface survey svx files which use / for decimal point
# e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
- if stardata["type"] == "normal":
- tape = self.rx_braskets.sub("",ls[stardata["tape"]])
+ if stardata["type"] == "normal": # should use current flags setting for this
+ # print(" !! lineno '{}'\n !! svxline '{}'\n !! sline '{}'\n !! ls '{}'\n !! stardata {}".format(self.lineno, svxline, sline, ls,stardata))
+ tape = ls[stardata["tape"]]
+ tape = tape.replace("(",".")
+ tape = tape.replace(")",".")
tape = tape.replace("/",".")
try:
survexleg.tape = float(tape)
@@ -134,6 +132,13 @@ class LoadSurvex():
models.DataIssue.objects.create(parser='survex', message=message)
survexleg.tape = 0
try:
+ survexblock.totalleglength += survexleg.tape
+ self.survexlegsalllength += survexleg.tape
+ except ValueError:
+ message = ' ! Value Error: Tape length not added %s in %s' % (ls, survexblock.survexfile.path)
+ models.DataIssue.objects.create(parser='survex', message=message)
+
+ try:
lclino = ls[stardata["clino"]]
except:
print(("! Clino misread in", survexblock.survexfile.path))
@@ -142,6 +147,7 @@ class LoadSurvex():
message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
lclino = error
+
try:
lcompass = ls[stardata["compass"]]
except:
@@ -151,6 +157,7 @@ class LoadSurvex():
message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path)
models.DataIssue.objects.create(parser='survex', message=message)
lcompass = error
+
if lclino == "up":
survexleg.compass = 0.0
survexleg.clino = 90.0
@@ -176,22 +183,23 @@ class LoadSurvex():
# delete the object so that django autosaving doesn't save it.
survexleg = None
- itape = stardata.get("tape")
- if itape:
- try:
- survexblock.totalleglength += float(ls[itape])
- self.survexlegsalllength += float(ls[itape])
- except ValueError:
- print("! Length not added")
+ def LoadSurvexRef(self, survexblock, args):
+ # *REF but also ; Ref years from 1960 to 2039
+ if len(args)< 4:
+ message = " ! Empty or BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
+ print((self.insp+message))
+ models.DataIssue.objects.create(parser='survex', message=message)
+ return
+ argsgps = self.rx_argsref.match(args)
+ if argsgps:
+ yr, letterx, wallet = argsgps.groups()
+ else:
+ message = " ! BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path)
+ print((self.insp+message))
+ models.DataIssue.objects.create(parser='survex', message=message)
+ return
- def LoadSurvexLinePassage(self, survexblock, stardata, sline, comment):
- # do not import this: *data passage.. data which is LRUD not tape/compass/clino
- pass
-
- def LoadSurvexRef(self, survexblock, mstar):
- # *REF but also ; Ref
- yr,letterx,wallet = mstar.groups()
if not letterx:
letterx = ""
else:
@@ -199,23 +207,32 @@ class LoadSurvex():
if len(wallet)<2:
wallet = "0" + wallet
assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
- assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
refscan = "%s#%s%s" % (yr, letterx, wallet)
+ try:
+ if int(wallet)>100:
+ message = " ! Wallet *REF {} - too big {}".format(refscan, survexblock.survexfile.path)
+ print((self.insp+message))
+ models.DataIssue.objects.create(parser='survex', message=message)
+ except:
+ message = " ! Wallet *REF {} - not numeric {}".format(refscan, survexblock.survexfile.path)
+ print((self.insp+message))
+ models.DataIssue.objects.create(parser='survex', message=message)
manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
if manyscansfolders:
survexblock.scansfolder = manyscansfolders[0]
survexblock.save()
if len(manyscansfolders) > 1:
- message = ' ! Wallet *REF {} - multiple scan folders found {}'.format(refscan, survexblock.survexfile.path)
+ message = " ! Wallet *REF {} - multiple scan folders found {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
else:
- message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
+ message = " ! Wallet *REF {} - NOT found in manyscansfolders {}".format(refscan, survexblock.survexfile.path)
print((self.insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
- def LoadSurvexQM(self, insp, survexblock, qmline):
+ def LoadSurvexQM(self, survexblock, qmline):
+ insp = self.insp
qm_no = qmline.group(1)
qm_grade = qmline.group(2)
qm_from_section = qmline.group(3)
@@ -261,28 +278,33 @@ class LoadSurvex():
models.DataIssue.objects.create(parser='survex', message=message)
pass
+ def LoadSurvexComment(self, survexblock, comment):
+ # ignore all comments except ;ref and ;QM
+ refline = self.rx_ref.match(comment)
+ if refline:
+ comment = comment.replace("ref","").strip()
+ self.LoadSurvexRef(survexblock, comment)
- def RecursiveLoad(self,survexblock, survexfile, fin):
- """Follows the *include links in all the survex files from the root file 1623.svx
- and reads in the survex blocks, other data and the wallet references (scansfolder) as it
- goes. This part of the data import process is where the maximum memory is used and where it
- crashes on memory-constrained machines. Begin-end blocks may also be nested.
- """
- iblankbegins = 0
- stardata = self.stardatadefault
- insp =self.insp
- blocklegs = self.survexlegsnumber
+ qmline = self.rx_qm.match(comment)
+ if qmline:
+ self.LoadSurvexQM(survexblock, qmline)
- print(insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path,survexfile.path))
- stamp = datetime.now()
- lineno = 0
-
+ def LoadSurvexFlags(self, line, cmd):
+ # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
+ # but this data is only used for sense-checking not to actually calculate anything important
+ pass
+
+ def LoadSurvexSetup(self,survexblock, survexfile):
+ self.depthbegin = 0
+ self.stardata = self.stardatadefault
+ blocklegs = self.survexlegsnumber
+ print(self.insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path, survexfile.path))
+ self.lineno = 0
sys.stderr.flush();
self.callcount +=1
if self.callcount >=10:
self.callcount=0
print(".", file=sys.stderr,end='')
-
# Try to find the cave in the DB if not use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
if path_match:
@@ -290,150 +312,144 @@ class LoadSurvex():
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
- svxlines = ''
+
+ def RecursiveLoad(self, survexblock, survexfile, fin):
+ """Follows the *include links in all the survex files from the root file 1623.svx
+ and reads in the survex blocks, other data and the wallet references (scansfolder) as it
+ goes. This part of the data import process is where the maximum memory is used and where it
+ crashes on memory-constrained machines. Begin-end blocks may also be nested.
+ """
+ self.LoadSurvexSetup(survexblock, survexfile)
+ insp =self.insp
+ previousnlegs = 0
+
svxlines = fin.read().splitlines()
- # cannot close file now as it may be recursively called with the same file id fin if nested *begin
- # occurs.
+ # cannot close file now as may be recursively called with the same fin if nested *begin-end
+
for svxline in svxlines:
- lineno += 1
- # break the line at the comment
+ self.lineno += 1
sline, comment = self.rx_comment.match(svxline.strip()).groups()
- mref = comment and self.rx_ref.match(comment)
- if mref:
- self.LoadSurvexRef(survexblock, mref)
-
- qmline = comment and self.rx_qm.match(comment)
- if qmline:
- self.LoadSurvexQM(insp, survexblock, qmline)
-
+ if comment:
+ self.LoadSurvexComment(survexblock, comment)
if not sline:
- continue
-
- # detect the star ref command
- rstar = self.rx_starref.match(sline)
- if rstar:
- self.LoadSurvexRef(survexblock, rstar)
+ continue # skip blank lines
# detect the star command
mstar = self.rx_star.match(sline)
- if not mstar:
- if "from" in stardata:
- self.LoadSurvexLineLeg(survexblock, stardata, sline, comment)
- pass
- elif stardata["type"] == "passage":
- pass
- #self.LoadSurvexLinePassage(survexblock, stardata, sline, comment)
- #Missing "station" in stardata.
- continue
-
- # detect the star command
- cmd, line = mstar.groups()
- cmd = cmd.lower()
- if re.match("include$(?i)", cmd):
- includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
- print((insp+' - Include path found, including - ' + includepath))
- # Try to find the cave in the DB. if not, use the string as before
- path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
- if path_match:
- pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
- print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
- cave = models_caves.getCaveByReference(pos_cave)
- if cave:
- survexfile.cave = cave
- else:
- print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
-
- includesurvexfile = models_survex.SurvexFile(path=includepath)
- includesurvexfile.save()
- includesurvexfile.SetDirectory()
- if includesurvexfile.exists():
- survexblock.save()
- fininclude = includesurvexfile.OpenFile()
- self.survexlegsnumber = blocklegs
- self.insp += "> "
- self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
- #--------------------------------------------------------
- fininclude.close()
- self.insp = self.insp[2:]
- insp = self.insp
- blocklegs = self.survexlegsnumber
- else:
- print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
-
- elif re.match("begin$(?i)", cmd):
- # On a *begin statement we start a new survexblock.
- # There should not be any *include inside a begin-end block, so this is a simple
- # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
- if line:
- newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
- # Try to find the cave in the DB if not use the string as before
- path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+ if mstar: # yes we are reading a *cmd
+ cmd, args = mstar.groups()
+ cmd = cmd.lower()
+ if re.match("include$(?i)", cmd):
+ includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
+ print((insp+' - INCLUDE-go path found, including - ' + includepath))
+ # Try to find the cave in the DB. if not, use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
- # print(insp+pos_cave)
+ print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
- print((insp+' - No match (b) for %s' % newsvxpath))
-
- previousnlegs = blocklegs
- name = line.lower()
- print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
- survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
- survexpath=survexblock.survexpath+"."+name,
- cave=survexfile.cave, survexfile=survexfile,
- legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
- survexblockdown.save()
- survexblock.save()
- survexblock = survexblockdown
- print(insp+" - ENTERING nested *begin/*end block: {}".format(name))
- self.survexlegsnumber = blocklegs
- self.insp += "> "
- self.RecursiveLoad(survexblockdown, survexfile, fin)
- #--------------------------------------------------------
- # do not close the file as there may be more blocks in this one
- # and it is re-read afresh with every nested begin-end block.
- self.insp = self.insp[2:]
- insp = self.insp
- blocklegs = self.survexlegsnumber
- else:
- iblankbegins += 1
+ print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
+
+ includesurvexfile = models_survex.SurvexFile(path=includepath)
+ includesurvexfile.save()
+ includesurvexfile.SetDirectory()
+ if includesurvexfile.exists():
+ survexblock.save()
+ self.insp += "> "
+ #--------------------------------------------------------
+ fininclude = includesurvexfile.OpenFile()
+ self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
+ fininclude.close()
+ #--------------------------------------------------------
+ self.insp = self.insp[2:]
+ insp = self.insp
+ print((insp+' - INCLUDE-return from include - ' + includepath))
+ else:
+ print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
+
+ elif re.match("begin$(?i)", cmd):
+ # On a *begin statement we start a new survexblock.
+ # There should not be any *include inside a begin-end block, so this is a simple
+ # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
+ if args:
+ newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
+ # Try to find the cave in the DB if not use the string as before
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+ if path_match:
+ pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+ # print(insp+pos_cave)
+ cave = models_caves.getCaveByReference(pos_cave)
+ if cave:
+ survexfile.cave = cave
+ else:
+ print((insp+' - No match (b) for %s' % newsvxpath))
+
+ previousnlegs = self.survexlegsnumber
+ name = args.lower()
+ print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
+ # the recursive call re-reads the entire file. This is wasteful. We should pass in only
+ # the un-parsed part of the file.
+ survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
+ survexpath=survexblock.survexpath+"."+name,
+ cave=survexfile.cave, survexfile=survexfile,
+ legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+ survexblockdown.save()
+ survexblock.save()
+ survexblock = survexblockdown
+ print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name))
+ self.insp += "> "
+ #--------------------------------------------------------
+ self.RecursiveLoad(survexblockdown, survexfile, fin)
+ #--------------------------------------------------------
+ # do not close the file as there may be more blocks in this one
+ # and it is re-read afresh with every nested begin-end block.
+ self.insp = self.insp[2:]
+ insp = self.insp
+ else:
+ self.depthbegin += 1
- elif re.match("end$(?i)", cmd):
- if iblankbegins:
- print(insp+" - RETURNING from nested *begin/*end block: {}".format(line))
- iblankbegins -= 1
+ elif re.match("end$(?i)", cmd):
+ if self.depthbegin:
+ print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args))
+ self.depthbegin -= 1
+ else:
+ legsinblock = self.survexlegsnumber - previousnlegs
+ print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
+ survexblock.legsall = legsinblock
+ survexblock.save()
+ return
+ elif cmd == "ref":
+ self.LoadSurvexRef(survexblock, args)
+ elif cmd == "flags":
+ self.LoadSurvexFlags(args, cmd)
+ elif cmd == "data":
+ ls = args.lower().split()
+ stardata = { "type":ls[0] }
+ for i in range(0, len(ls)):
+ stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
+ self.stardata = stardata
+ if ls[0] in ["normal", "cartesian", "nosurvey"]:
+ assert (("from" in stardata and "to" in stardata) or "station" in stardata), args
+ elif ls[0] == "default":
+ stardata = self.stardatadefault
+ else:
+ assert ls[0] == "passage", args
+ elif cmd == "set" and re.match("names(?i)", args):
+ pass
+ elif re.match("date$(?i)", cmd):
+ self.LoadSurvexDate(survexblock, args)
+ elif re.match("team$(?i)", cmd):
+ self.LoadSurvexTeam(survexblock, args)
else:
- legsinblock = self.survexlegsnumber - previousnlegs
- print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
- survexblock.legsall = legsinblock
- survexblock.save()
- return
- elif cmd == "flags":
- # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
- # but this data is only used for sense-checking not to actually calculate anything important
- pass
- elif cmd == "data":
- ls = line.lower().split()
- stardata = { "type":ls[0] }
- for i in range(0, len(ls)):
- stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
- self.stardata = stardata
- if ls[0] in ["normal", "cartesian", "nosurvey"]:
- assert (("from" in stardata and "to" in stardata) or "station" in stardata), line
- elif ls[0] == "default":
- stardata = self.stardatadefault
+ self.LoadSurvexIgnore(survexblock, args, cmd)
+ else: # not a *cmd so we are reading data OR rx_comment failed
+ if "from" in self.stardata: # only interested in survey legs
+ self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
else:
- assert ls[0] == "passage", line
- elif cmd == "set" and re.match("names(?i)", line):
- pass
- elif re.match("date$(?i)", cmd):
- self.LoadSurvexDate(survexblock, line)
- elif re.match("team$(?i)", cmd):
- self.LoadSurvexTeam(survexblock, line)
- else:
- self.LoadSurvexIgnore(survexblock, line, cmd)
+ pass # ignore all other sorts of data
def FindAndLoadAllSurvex(survexblockroot, survexfileroot):