diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2020-06-24 22:46:18 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2020-06-24 22:46:18 +0100 |
commit | 04f14c91f0b85108384ca4b9f2b190c0b26eef98 (patch) | |
tree | a68606697d75fd3b149e4fc71da54437ecc34bb2 /parsers/survex.py | |
parent | 664c18ebbebfd01c69dc6de0b38a78703aa35d36 (diff) | |
download | troggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.tar.gz troggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.tar.bz2 troggle-04f14c91f0b85108384ca4b9f2b190c0b26eef98.zip |
rearrange ref and comment detection
Diffstat (limited to 'parsers/survex.py')
-rw-r--r-- | parsers/survex.py | 368 |
1 files changed, 192 insertions, 176 deletions
diff --git a/parsers/survex.py b/parsers/survex.py index f800039..beca1b8 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -38,7 +38,6 @@ class LoadSurvex(): stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4} stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"} - rx_braskets= re.compile(r"[()]") rx_linelen = re.compile(r"[\d\-+.]+$") rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$") rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$") @@ -46,21 +45,15 @@ class LoadSurvex(): # remember there is also QM_PATTERN used in views_other and set in settings.py rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") - rx_ref = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)') + rx_ref = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') - # years from 1960 to 2039 rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') - # rx_starref = re.compile("""?x # VERBOSE mode - can't get this to work - # ^\s*\*ref # look for *ref at start of line - # [\s.:]* # some spaces, stops or colons - # ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field - # \s*# # spaces then hash separator - # ?\s*(X) # optional X - captured - # ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured - # $(?i)""", re.X) # the end (do the whole thing case insensitively) + rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') survexlegsalllength = 0.0 survexlegsnumber = 0 + depthbegin = 0 + lineno = 0 insp = "" callcount = 0 stardata ={} @@ -112,16 +105,21 @@ class LoadSurvex(): survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date) survexblock.save() - def LoadSurvexLineLeg(self, survexblock, stardata, sline, comment): + def LoadSurvexLineLeg(self, survexblock, svxline, sline, comment): """This reads compass, clino and tape data but only keeps the tape lengths, the rest is discarded after error-checking. """ - ls = sline.lower().split() + stardata = self.stardata survexleg = SurvexLeg() + + ls = sline.lower().split() # this next fails for two surface survey svx files which use / for decimal point # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05) - if stardata["type"] == "normal": - tape = self.rx_braskets.sub("",ls[stardata["tape"]]) + if stardata["type"] == "normal": # should use current flags setting for this + # print(" !! lineno '{}'\n !! svxline '{}'\n !! sline '{}'\n !! ls '{}'\n !! stardata {}".format(self.lineno, svxline, sline, ls,stardata)) + tape = ls[stardata["tape"]] + tape = tape.replace("(",".") + tape = tape.replace(")",".") tape = tape.replace("/",".") try: survexleg.tape = float(tape) @@ -134,6 +132,13 @@ class LoadSurvex(): models.DataIssue.objects.create(parser='survex', message=message) survexleg.tape = 0 try: + survexblock.totalleglength += survexleg.tape + self.survexlegsalllength += survexleg.tape + except ValueError: + message = ' ! Value Error: Tape length not added %s in %s' % (ls, survexblock.survexfile.path) + models.DataIssue.objects.create(parser='survex', message=message) + + try: lclino = ls[stardata["clino"]] except: print(("! Clino misread in", survexblock.survexfile.path)) @@ -142,6 +147,7 @@ class LoadSurvex(): message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path) models.DataIssue.objects.create(parser='survex', message=message) lclino = error + try: lcompass = ls[stardata["compass"]] except: @@ -151,6 +157,7 @@ class LoadSurvex(): message = ' ! Value Error: Compass misread in line %s in %s' % (ls, survexblock.survexfile.path) models.DataIssue.objects.create(parser='survex', message=message) lcompass = error + if lclino == "up": survexleg.compass = 0.0 survexleg.clino = 90.0 @@ -176,22 +183,23 @@ class LoadSurvex(): # delete the object so that django autosaving doesn't save it. survexleg = None - itape = stardata.get("tape") - if itape: - try: - survexblock.totalleglength += float(ls[itape]) - self.survexlegsalllength += float(ls[itape]) - except ValueError: - print("! Length not added") + def LoadSurvexRef(self, survexblock, args): + # *REF but also ; Ref years from 1960 to 2039 + if len(args)< 4: + message = " ! Empty or BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path) + print((self.insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) + return + argsgps = self.rx_argsref.match(args) + if argsgps: + yr, letterx, wallet = argsgps.groups() + else: + message = " ! BAD *REF command '{}' at {}".format(args, survexblock.survexfile.path) + print((self.insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) + return - def LoadSurvexLinePassage(self, survexblock, stardata, sline, comment): - # do not import this: *data passage.. data which is LRUD not tape/compass/clino - pass - - def LoadSurvexRef(self, survexblock, mstar): - # *REF but also ; Ref - yr,letterx,wallet = mstar.groups() if not letterx: letterx = "" else: @@ -199,23 +207,32 @@ class LoadSurvex(): if len(wallet)<2: wallet = "0" + wallet assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr - assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet refscan = "%s#%s%s" % (yr, letterx, wallet) + try: + if int(wallet)>100: + message = " ! Wallet *REF {} - too big {}".format(refscan, survexblock.survexfile.path) + print((self.insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) + except: + message = " ! Wallet *REF {} - not numeric {}".format(refscan, survexblock.survexfile.path) + print((self.insp+message)) + models.DataIssue.objects.create(parser='survex', message=message) manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan) if manyscansfolders: survexblock.scansfolder = manyscansfolders[0] survexblock.save() if len(manyscansfolders) > 1: - message = ' ! Wallet *REF {} - multiple scan folders found {}'.format(refscan, survexblock.survexfile.path) + message = " ! Wallet *REF {} - multiple scan folders found {}".format(refscan, survexblock.survexfile.path) print((self.insp+message)) models.DataIssue.objects.create(parser='survex', message=message) else: - message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path) + message = " ! Wallet *REF {} - NOT found in manyscansfolders {}".format(refscan, survexblock.survexfile.path) print((self.insp+message)) models.DataIssue.objects.create(parser='survex', message=message) - def LoadSurvexQM(self, insp, survexblock, qmline): + def LoadSurvexQM(self, survexblock, qmline): + insp = self.insp qm_no = qmline.group(1) qm_grade = qmline.group(2) qm_from_section = qmline.group(3) @@ -261,28 +278,33 @@ class LoadSurvex(): models.DataIssue.objects.create(parser='survex', message=message) pass + def LoadSurvexComment(self, survexblock, comment): + # ignore all comments except ;ref and ;QM + refline = self.rx_ref.match(comment) + if refline: + comment = comment.replace("ref","").strip() + self.LoadSurvexRef(survexblock, comment) - def RecursiveLoad(self,survexblock, survexfile, fin): - """Follows the *include links in all the survex files from the root file 1623.svx - and reads in the survex blocks, other data and the wallet references (scansfolder) as it - goes. This part of the data import process is where the maximum memory is used and where it - crashes on memory-constrained machines. Begin-end blocks may also be nested. - """ - iblankbegins = 0 - stardata = self.stardatadefault - insp =self.insp - blocklegs = self.survexlegsnumber + qmline = self.rx_qm.match(comment) + if qmline: + self.LoadSurvexQM(survexblock, qmline) - print(insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path,survexfile.path)) - stamp = datetime.now() - lineno = 0 - + def LoadSurvexFlags(self, line, cmd): + # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' + # but this data is only used for sense-checking not to actually calculate anything important + pass + + def LoadSurvexSetup(self,survexblock, survexfile): + self.depthbegin = 0 + self.stardata = self.stardatadefault + blocklegs = self.survexlegsnumber + print(self.insp+" - MEM:{:.3f} Reading. parent:{} <> {} ".format(models.get_process_memory(),survexblock.survexfile.path, survexfile.path)) + self.lineno = 0 sys.stderr.flush(); self.callcount +=1 if self.callcount >=10: self.callcount=0 print(".", file=sys.stderr,end='') - # Try to find the cave in the DB if not use the string as before path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) if path_match: @@ -290,150 +312,144 @@ class LoadSurvex(): cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave - svxlines = '' + + def RecursiveLoad(self, survexblock, survexfile, fin): + """Follows the *include links in all the survex files from the root file 1623.svx + and reads in the survex blocks, other data and the wallet references (scansfolder) as it + goes. This part of the data import process is where the maximum memory is used and where it + crashes on memory-constrained machines. Begin-end blocks may also be nested. + """ + self.LoadSurvexSetup(survexblock, survexfile) + insp =self.insp + previousnlegs = 0 + svxlines = fin.read().splitlines() - # cannot close file now as it may be recursively called with the same file id fin if nested *begin - # occurs. + # cannot close file now as may be recursively called with the same fin if nested *begin-end + for svxline in svxlines: - lineno += 1 - # break the line at the comment + self.lineno += 1 sline, comment = self.rx_comment.match(svxline.strip()).groups() - mref = comment and self.rx_ref.match(comment) - if mref: - self.LoadSurvexRef(survexblock, mref) - - qmline = comment and self.rx_qm.match(comment) - if qmline: - self.LoadSurvexQM(insp, survexblock, qmline) - + if comment: + self.LoadSurvexComment(survexblock, comment) if not sline: - continue - - # detect the star ref command - rstar = self.rx_starref.match(sline) - if rstar: - self.LoadSurvexRef(survexblock, rstar) + continue # skip blank lines # detect the star command mstar = self.rx_star.match(sline) - if not mstar: - if "from" in stardata: - self.LoadSurvexLineLeg(survexblock, stardata, sline, comment) - pass - elif stardata["type"] == "passage": - pass - #self.LoadSurvexLinePassage(survexblock, stardata, sline, comment) - #Missing "station" in stardata. - continue - - # detect the star command - cmd, line = mstar.groups() - cmd = cmd.lower() - if re.match("include$(?i)", cmd): - includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))) - print((insp+' - Include path found, including - ' + includepath)) - # Try to find the cave in the DB. if not, use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) - if path_match: - pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave))) - cave = models_caves.getCaveByReference(pos_cave) - if cave: - survexfile.cave = cave - else: - print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath))) - - includesurvexfile = models_survex.SurvexFile(path=includepath) - includesurvexfile.save() - includesurvexfile.SetDirectory() - if includesurvexfile.exists(): - survexblock.save() - fininclude = includesurvexfile.OpenFile() - self.survexlegsnumber = blocklegs - self.insp += "> " - self.RecursiveLoad(survexblock, includesurvexfile, fininclude) - #-------------------------------------------------------- - fininclude.close() - self.insp = self.insp[2:] - insp = self.insp - blocklegs = self.survexlegsnumber - else: - print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) - - elif re.match("begin$(?i)", cmd): - # On a *begin statement we start a new survexblock. - # There should not be any *include inside a begin-end block, so this is a simple - # load not a recursive fileload. But there may be many blocks nested to any depth in one file. - if line: - newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) - # Try to find the cave in the DB if not use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) + if mstar: # yes we are reading a *cmd + cmd, args = mstar.groups() + cmd = cmd.lower() + if re.match("include$(?i)", cmd): + includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) + print((insp+' - INCLUDE-go path found, including - ' + includepath)) + # Try to find the cave in the DB. if not, use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) if path_match: pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print(insp+pos_cave) + print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave))) cave = models_caves.getCaveByReference(pos_cave) if cave: survexfile.cave = cave else: - print((insp+' - No match (b) for %s' % newsvxpath)) - - previousnlegs = blocklegs - name = line.lower() - print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) - survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, - cave=survexfile.cave, survexfile=survexfile, - legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - survexblockdown.save() - survexblock.save() - survexblock = survexblockdown - print(insp+" - ENTERING nested *begin/*end block: {}".format(name)) - self.survexlegsnumber = blocklegs - self.insp += "> " - self.RecursiveLoad(survexblockdown, survexfile, fin) - #-------------------------------------------------------- - # do not close the file as there may be more blocks in this one - # and it is re-read afresh with every nested begin-end block. - self.insp = self.insp[2:] - insp = self.insp - blocklegs = self.survexlegsnumber - else: - iblankbegins += 1 + print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath))) + + includesurvexfile = models_survex.SurvexFile(path=includepath) + includesurvexfile.save() + includesurvexfile.SetDirectory() + if includesurvexfile.exists(): + survexblock.save() + self.insp += "> " + #-------------------------------------------------------- + fininclude = includesurvexfile.OpenFile() + self.RecursiveLoad(survexblock, includesurvexfile, fininclude) + fininclude.close() + #-------------------------------------------------------- + self.insp = self.insp[2:] + insp = self.insp + print((insp+' - INCLUDE-return from include - ' + includepath)) + else: + print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) + + elif re.match("begin$(?i)", cmd): + # On a *begin statement we start a new survexblock. + # There should not be any *include inside a begin-end block, so this is a simple + # load not a recursive fileload. But there may be many blocks nested to any depth in one file. + if args: + newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)) + # Try to find the cave in the DB if not use the string as before + path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) + if path_match: + pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + # print(insp+pos_cave) + cave = models_caves.getCaveByReference(pos_cave) + if cave: + survexfile.cave = cave + else: + print((insp+' - No match (b) for %s' % newsvxpath)) + + previousnlegs = self.survexlegsnumber + name = args.lower() + print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) + # the recursive call re-reads the entire file. This is wasteful. We should pass in only + # the un-parsed part of the file. + survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, + survexpath=survexblock.survexpath+"."+name, + cave=survexfile.cave, survexfile=survexfile, + legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + survexblockdown.save() + survexblock.save() + survexblock = survexblockdown + print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name)) + self.insp += "> " + #-------------------------------------------------------- + self.RecursiveLoad(survexblockdown, survexfile, fin) + #-------------------------------------------------------- + # do not close the file as there may be more blocks in this one + # and it is re-read afresh with every nested begin-end block. + self.insp = self.insp[2:] + insp = self.insp + else: + self.depthbegin += 1 - elif re.match("end$(?i)", cmd): - if iblankbegins: - print(insp+" - RETURNING from nested *begin/*end block: {}".format(line)) - iblankbegins -= 1 + elif re.match("end$(?i)", cmd): + if self.depthbegin: + print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args)) + self.depthbegin -= 1 + else: + legsinblock = self.survexlegsnumber - previousnlegs + print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) + survexblock.legsall = legsinblock + survexblock.save() + return + elif cmd == "ref": + self.LoadSurvexRef(survexblock, args) + elif cmd == "flags": + self.LoadSurvexFlags(args, cmd) + elif cmd == "data": + ls = args.lower().split() + stardata = { "type":ls[0] } + for i in range(0, len(ls)): + stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1 + self.stardata = stardata + if ls[0] in ["normal", "cartesian", "nosurvey"]: + assert (("from" in stardata and "to" in stardata) or "station" in stardata), args + elif ls[0] == "default": + stardata = self.stardatadefault + else: + assert ls[0] == "passage", args + elif cmd == "set" and re.match("names(?i)", args): + pass + elif re.match("date$(?i)", cmd): + self.LoadSurvexDate(survexblock, args) + elif re.match("team$(?i)", cmd): + self.LoadSurvexTeam(survexblock, args) else: - legsinblock = self.survexlegsnumber - previousnlegs - print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) - survexblock.legsall = legsinblock - survexblock.save() - return - elif cmd == "flags": - # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' - # but this data is only used for sense-checking not to actually calculate anything important - pass - elif cmd == "data": - ls = line.lower().split() - stardata = { "type":ls[0] } - for i in range(0, len(ls)): - stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1 - self.stardata = stardata - if ls[0] in ["normal", "cartesian", "nosurvey"]: - assert (("from" in stardata and "to" in stardata) or "station" in stardata), line - elif ls[0] == "default": - stardata = self.stardatadefault + self.LoadSurvexIgnore(survexblock, args, cmd) + else: # not a *cmd so we are reading data OR rx_comment failed + if "from" in self.stardata: # only interested in survey legs + self.LoadSurvexLineLeg(survexblock, svxline, sline, comment) else: - assert ls[0] == "passage", line - elif cmd == "set" and re.match("names(?i)", line): - pass - elif re.match("date$(?i)", cmd): - self.LoadSurvexDate(survexblock, line) - elif re.match("team$(?i)", cmd): - self.LoadSurvexTeam(survexblock, line) - else: - self.LoadSurvexIgnore(survexblock, line, cmd) + pass # ignore all other sorts of data def FindAndLoadAllSurvex(survexblockroot, survexfileroot): |