diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2020-06-28 14:42:26 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2020-06-28 14:42:26 +0100 |
commit | bf1c683fd09ad9c984748e5abc18a610cca03ee1 (patch) | |
tree | ac8da18924240a6eee79a84c4146746d16b32210 /parsers/survex.py | |
parent | 122cdd7fc8620b2348d75b1bb786ae4202db9a55 (diff) | |
download | troggle-bf1c683fd09ad9c984748e5abc18a610cca03ee1.tar.gz troggle-bf1c683fd09ad9c984748e5abc18a610cca03ee1.tar.bz2 troggle-bf1c683fd09ad9c984748e5abc18a610cca03ee1.zip |
fixing parent blocks & titles
Diffstat (limited to 'parsers/survex.py')
-rw-r--r-- | parsers/survex.py | 370 |
1 files changed, 194 insertions, 176 deletions
diff --git a/parsers/survex.py b/parsers/survex.py index 31dff03..1c3f624 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -45,7 +45,7 @@ class LoadingSurvex(): rx_qm = re.compile(r'(?i)^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') # remember there is also QM_PATTERN used in views_other and set in settings.py - rx_cave = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/') + rx_cave = re.compile(r'(?i)caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)') rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$') rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni @@ -61,16 +61,15 @@ class LoadingSurvex(): depthinclude = 0 stackbegin =[] stackinclude = [] + stacksvxfiles = [] svxfileslist = [] svxdirs = {} svxcaves = {} - svxfiletitle = {} lineno = 0 insp = "" callcount = 0 stardata ={} includedfilename ="" - currenttitle ="" currentsurvexblock = None currentsurvexfile = None currentcave = None @@ -79,9 +78,7 @@ class LoadingSurvex(): pass def LoadSurvexIgnore(self, survexblock, line, cmd): - if cmd == "title": - pass # unused in troggle today - but will become text list on SurvexBlock - elif cmd == "require": + if cmd == "require": pass # should we check survex version available for processing? elif cmd in ["equate", "fix", "alias", "calibrate", "cs","entrance", "export", "case", "declination", "infer","instrument", "sd", "units"]: @@ -314,9 +311,9 @@ class LoadingSurvex(): return self.svxcaves[cavepath] path_match = self.rx_cave.search(cavepath) - #print(' - Attempting cave match for %s' % cavepath) if path_match: - sluggy = '%s-%s'.format(path_match.group(1), path_match.group(2)) + sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2)) + print(' - Attempting cave match for %s' % sluggy) cave = GetCaveLookup().get(sluggy) # Below is how it has been done for years: very fuzzy & slow searches # ..and wrong! @@ -326,60 +323,58 @@ class LoadingSurvex(): self.svxcaves[cavepath] = cave print(' - Cave matched for %s' % cavepath) return cave + else: + print(' ! Failed to set cave for {} or {}'.format(cavepath, sluggy)) else: - print(' ! No cave match for %s' % cavepath) + print(' ! No regex cave match for %s' % cavepath) return None - def LoadSurvexFileBlock(self, survexblock, includelabel): - """Creates SurvexDirectory and SurvexFile in the database + def LoadSurvexFile(self, includelabel): + """Creates SurvexFile in the database, and SurvexDirectory if needed with links to 'cave' Creates a new current survexblock with valid .survexfile and valid .survexdirectory + The survexblock passed-in is not necessarily the parent. FIX THIS. """ depth = " " * self.depthbegin print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel)) headpath, tail = os.path.split(includelabel) if headpath not in self.svxdirs: - self.svxdirs[headpath] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=survexblock.survexfile) + self.svxdirs[headpath] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile) newsurvexdirectory = self.svxdirs[headpath] newsurvexfile = models_survex.SurvexFile(path=includelabel) newsurvexfile.survexdirectory = newsurvexdirectory - # Do not create a survexblock. Yes, there is a virtual block before the *begin statement but - # only the *title is usually in that, so just inherit the *title into the blocks. - # name = includelabel - # newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, - # survexpath=survexblock.survexpath+"."+name, - # survexfile=newsurvexfile, - # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - cave = self.IdentifyCave(headpath) if cave: newsurvexdirectory.cave = cave newsurvexfile.cave = cave - #newsurvexblock.cave = cave - newsurvexdirectory.save() - newsurvexfile.save() - #newsurvexblock.save - + self.currentsurvexfile.save() # django insists on this although it is already saved !? + try: + newsurvexdirectory.save() + except: + print(newsurvexdirectory, file=sys.stderr) + print(newsurvexdirectory.primarysurvexfile, file=sys.stderr) + raise self.currentsurvexfile = newsurvexfile - #self.currentsurvexblock = newsurvexblock - def ProcessIncludeLine(self, survexblock, included): - # should do some push stuff here + def ProcessIncludeLine(self, included): svxid = included.groups()[0] #depth = " " * self.depthbegin #print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid)) - self.LoadSurvexFileBlock(survexblock, svxid) + self.LoadSurvexFile(svxid) + self.stacksvxfiles.append(self.currentsurvexfile) + - def ProcessEdulcniLine(self, survexblock, edulcni): - # should do some pop stuff here + def ProcessEdulcniLine(self, edulcni): + """Saves the current survexfile in the db + """ svxid = edulcni.groups()[0] - depth = " " * self.depthbegin - print("{:2}{} - Edulcni survexfile:'{}'".format(self.depthbegin, depth, svxid)) - self.currentsurvexblock = survexblock.parent - self.currentsurvexfile = survexblock.parent.survexfile + #depth = " " * self.depthbegin + #print("{:2}{} - Edulcni survexfile:'{}'".format(self.depthbegin, depth, svxid)) + self.currentsurvexfile.save() + self.currentsurvexfile = self.stacksvxfiles.pop() def LoadSurvexComment(self, survexblock, comment): # ignore all comments except ;ref and ;QM and ;*include (for collated survex file) @@ -395,12 +390,12 @@ class LoadingSurvex(): included = self.rx_comminc.match(comment) # ;*include means we have been included; not 'proceed to include' which *include means if included: - self.ProcessIncludeLine(survexblock,included) + self.ProcessIncludeLine(included) edulcni = self.rx_commcni.match(comment) # ;*edulcni means we are returning from an included file if edulcni: - self.ProcessEdulcniLine(survexblock,edulcni) + self.ProcessEdulcniLine(edulcni) def LoadSurvexSetup(self,survexblock, survexfile): self.depthbegin = 0 @@ -428,121 +423,122 @@ class LoadingSurvex(): goes. This part of the data include process is where the maximum memory is used and where it crashes on memory-constrained machines. Begin-end blocks may also be nested. """ - self.LoadSurvexSetup(survexblock, survexfile) - insp =self.insp - previousnlegs = 0 + # self.LoadSurvexSetup(survexblock, survexfile) + # insp =self.insp + # previousnlegs = 0 - svxlines = fin.read().splitlines() - # cannot close file now as may be recursively called with the same fin if nested *begin-end + # svxlines = fin.read().splitlines() + # # cannot close file now as may be recursively called with the same fin if nested *begin-end - for svxline in svxlines: - self.lineno += 1 - sline, comment = self.rx_comment.match(svxline.strip()).groups() - if comment: - self.LoadSurvexComment(survexblock, comment) - if not sline: - continue # skip blank lines - - # detect the star command - mstar = self.rx_star.match(sline) - if mstar: # yes we are reading a *cmd - cmd, args = mstar.groups() - cmd = cmd.lower() - if re.match("include$(?i)", cmd): - cave = self.IdentifyCave(args) - if cave: - survexfile.cave = cave - - includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) - print((insp+' - INCLUDE-go path found, including - ' + args)) - - includesurvexfile = models_survex.SurvexFile(path=includepath) - includesurvexfile.save() - includesurvexfile.SetDirectory() - if includesurvexfile.exists(): - survexblock.save() - self.insp += "> " - #-------------------------------------------------------- - fininclude = includesurvexfile.OpenFile() - self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude) - fininclude.close() - #-------------------------------------------------------- - self.insp = self.insp[2:] - insp = self.insp - print((insp+' - INCLUDE-return from include - ' + includepath)) - else: - print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) - - elif re.match("begin$(?i)", cmd): - # On a *begin statement we start a new survexblock. - # There should not be any *include inside a begin-end block, so this is a simple - # load not a recursive fileload. But there may be many blocks nested to any depth in one file. - if args: - newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)) - # Try to find the cave in the DB if not use the string as before - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) - if path_match: - pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) - # print(insp+pos_cave) - cave = models_caves.getCaveByReference(pos_cave) - if cave: - survexfile.cave = cave - else: - print((insp+' - No match (b) for %s' % newsvxpath)) - - previousnlegs = self.survexlegsnumber - name = args.lower() - print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) - # the recursive call re-reads the entire file. This is wasteful. We should pass in only - # the un-parsed part of the file. - survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, - cave=survexfile.cave, survexfile=survexfile, - legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - survexblockdown.save() - survexblock.save() - survexblock = survexblockdown - print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name)) - self.insp += "> " - #-------------------------------------------------------- - self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin) - #-------------------------------------------------------- - # do not close the file as there may be more blocks in this one - # and it is re-read afresh with every nested begin-end block. - self.insp = self.insp[2:] - insp = self.insp - else: - self.depthbegin += 1 - - elif re.match("end$(?i)", cmd): - if self.depthbegin: - print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args)) - self.depthbegin -= 1 - else: - legsinblock = self.survexlegsnumber - previousnlegs - print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) - survexblock.legsall = legsinblock - survexblock.save() - return - elif cmd == "ref": - self.LoadSurvexRef(survexblock, args) - elif cmd == "flags": - self.LoadSurvexFlags(args, cmd) - elif cmd == "data": - self.LoadSurvexDataCmd(survexblock, args) - elif cmd == "set" and re.match("names(?i)", args): - pass - elif re.match("date$(?i)", cmd): - self.LoadSurvexDate(survexblock, args) - elif re.match("team$(?i)", cmd): - self.LoadSurvexTeam(survexblock, args) - else: - self.LoadSurvexIgnore(survexblock, args, cmd) - else: # not a *cmd so we are reading data OR rx_comment failed - if "from" in self.stardata: # only interested in survey legs - self.LoadSurvexLineLeg(survexblock, svxline, sline, comment) - else: - pass # ignore all other sorts of data + # for svxline in svxlines: + # self.lineno += 1 + # sline, comment = self.rx_comment.match(svxline.strip()).groups() + # if comment: + # self.LoadSurvexComment(survexblock, comment) + # if not sline: + # continue # skip blank lines + + # # detect the star command + # mstar = self.rx_star.match(sline) + # if mstar: # yes we are reading a *cmd + # cmd, args = mstar.groups() + # cmd = cmd.lower() + # if re.match("include$(?i)", cmd): + # cave = self.IdentifyCave(args) + # if cave: + # survexfile.cave = cave + + # includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) + # print((insp+' - INCLUDE-go path found, including - ' + args)) + + # includesurvexfile = models_survex.SurvexFile(path=includepath) + # includesurvexfile.save() + # includesurvexfile.SetDirectory() + # if includesurvexfile.exists(): + # survexblock.save() + # self.insp += "> " + # #-------------------------------------------------------- + # fininclude = includesurvexfile.OpenFile() + # self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude) + # fininclude.close() + # #-------------------------------------------------------- + # self.insp = self.insp[2:] + # insp = self.insp + # print((insp+' - INCLUDE-return from include - ' + includepath)) + # else: + # print((insp+' ! ERROR *include file not found for %s' % includesurvexfile)) + + # elif re.match("begin$(?i)", cmd): + # # On a *begin statement we start a new survexblock. + # # There should not be any *include inside a begin-end block, so this is a simple + # # load not a recursive fileload. But there may be many blocks nested to any depth in one file. + # if args: + # newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)) + # # Try to find the cave in the DB if not use the string as before + # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath) + # if path_match: + # pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + # # print(insp+pos_cave) + # cave = models_caves.getCaveByReference(pos_cave) + # if cave: + # survexfile.cave = cave + # else: + # print((insp+' - No match (b) for %s' % newsvxpath)) + + # previousnlegs = self.survexlegsnumber + # name = args.lower() + # print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name)) + # # the recursive call re-reads the entire file. This is wasteful. We should pass in only + # # the un-parsed part of the file. + # survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, + # survexpath=survexblock.survexpath+"."+name, + # cave=survexfile.cave, survexfile=survexfile, + # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + # survexblockdown.save() + # survexblock.save() + # survexblock = survexblockdown + # print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name)) + # self.insp += "> " + # #-------------------------------------------------------- + # self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin) + # #-------------------------------------------------------- + # # do not close the file as there may be more blocks in this one + # # and it is re-read afresh with every nested begin-end block. + # self.insp = self.insp[2:] + # insp = self.insp + # else: + # self.depthbegin += 1 + + # elif re.match("end$(?i)", cmd): + # if self.depthbegin: + # print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args)) + # self.depthbegin -= 1 + # else: + # legsinblock = self.survexlegsnumber - previousnlegs + # print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) + # survexblock.legsall = legsinblock + # survexblock.save() + # return + # elif cmd == "ref": + # self.LoadSurvexRef(survexblock, args) + # elif cmd == "flags": + # self.LoadSurvexFlags(args, cmd) + # elif cmd == "data": + # self.LoadSurvexDataCmd(survexblock, args) + # elif cmd == "set" and re.match("names(?i)", args): + # pass + # elif re.match("date$(?i)", cmd): + # self.LoadSurvexDate(survexblock, args) + # elif re.match("team$(?i)", cmd): + # self.LoadSurvexTeam(survexblock, args) + # else: + # self.LoadSurvexIgnore(survexblock, args, cmd) + # else: # not a *cmd so we are reading data OR rx_comment failed + # if "from" in self.stardata: # only interested in survey legs + # self.LoadSurvexLineLeg(survexblock, svxline, sline, comment) + # else: + # pass # ignore all other sorts of data + pass def LinearRecursiveLoad(self, survexblock, path, svxlines): """Loads a single survex file. Usually used to import all the survex files which have been collated @@ -550,8 +546,20 @@ class LoadingSurvex(): """ self.relativefilename = path cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections - + + self.currentsurvexfile = survexblock.survexfile + self.currentsurvexfile.save() # django insists on this although it is already saved !? + blockcount = 0 + def tickle(): + nonlocal blockcount + blockcount +=1 + if blockcount % 10 ==0 : + print(".", file=sys.stderr,end='') + if blockcount % 500 ==0 : + print("\n", file=sys.stderr,end='') + sys.stderr.flush(); + for svxline in svxlines: sline, comment = self.rx_comment.match(svxline.strip()).groups() if comment: @@ -569,43 +577,50 @@ class LoadingSurvex(): if re.match("begin$(?i)", cmd): self.depthbegin += 1 depth = " " * self.depthbegin - self.stackbegin.append(args.lower()) + blockid = args.lower() + self.stackbegin.append(blockid) previousnlegs = self.survexlegsnumber - name = args.lower() - print("{:2}{} - Begin for :'{}'".format(self.depthbegin,depth, name)) - survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, - survexpath=survexblock.survexpath+"."+name, + print("{:2}{} - Begin for :'{}'".format(self.depthbegin,depth, blockid)) + pathlist = "" + for id in self.stackbegin: + if len(id) > 0: + pathlist += "." + id + newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock, + survexpath=pathlist, cave=self.currentcave, survexfile=self.currentsurvexfile, legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) - survexblockdown.save() - survexblock.save() - survexblock = survexblockdown - - blockcount +=1 - if blockcount % 10 ==0 : - print(".", file=sys.stderr,end='') - if blockcount % 500 ==0 : - print("\n", file=sys.stderr,end='') - sys.stderr.flush(); + survexblock = newsurvexblock + survexblock.survexfile.save() # django insists on this although it is already saved !? + survexblock.save() # django insists on this , but we want to save at the end ! + tickle() # ---------------------------END elif re.match("end$(?i)", cmd): depth = " " * self.depthbegin self.currentsurvexblock = survexblock.parent - self.currentsurvexfile = survexblock.parent.survexfile print("{:2}{} - End from:'{}'".format(self.depthbegin,depth,args)) legsinblock = self.survexlegsnumber - previousnlegs print("{:2}{} - LEGS: {} (previous: {}, now:{})".format(self.depthbegin, depth,legsinblock,previousnlegs,self.survexlegsnumber)) survexblock.legsall = legsinblock - survexblock.save() + try: + survexblock.parent.save() # django insists on this although it is already saved !? + except: + print(survexblock.parent, file=sys.stderr) + raise + try: + survexblock.save() # save to db at end of block + except: + print(survexblock, file=sys.stderr) + raise + blockid = self.stackbegin.pop() self.depthbegin -= 1 # ----------------------------- elif re.match("(?i)title$", cmd): - self.currenttitle = args + survexblock.title = args # only apply to current survexblock elif re.match("(?i)ref$", cmd): self.LoadSurvexRef(survexblock, args) elif re.match("(?i)flags$", cmd): @@ -671,7 +686,7 @@ class LoadingSurvex(): if mstar: # yes we are reading a *cmd cmd, args = mstar.groups() cmd = cmd.lower() - if re.match("include$(?i)", cmd): + if re.match("(?i)include$", cmd): includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) @@ -704,7 +719,7 @@ class LoadingSurvex(): print(message) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) - elif re.match("begin$(?i)", cmd): + elif re.match("(?i)begin$", cmd): self.depthbegin += 1 depth = " " * self.depthbegin if args: @@ -714,7 +729,7 @@ class LoadingSurvex(): self.stackbegin.append(pushargs.lower()) flinear.write(" {:2} {} *begin {}\n".format(self.depthbegin, depth, args)) pass - elif re.match("end$(?i)", cmd): + elif re.match("(?i)end$", cmd): depth = " " * self.depthbegin flinear.write(" {:2} {} *end {}\n".format(self.depthbegin, depth, args)) if not args: @@ -729,7 +744,10 @@ class LoadingSurvex(): self.depthbegin -= 1 pass - + elif re.match("(?i)title$", cmd): + depth = " " * self.depthbegin + flinear.write(" {:2} {} *title {}\n".format(self.depthbegin, depth, args)) + pass def FindAndLoadSurvex(survexblockroot): @@ -765,7 +783,7 @@ def FindAndLoadSurvex(survexblockroot): flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) fcollate.write(";*edulcni {}\n".format(survexfileroot.path)) mem1 = models.get_process_memory() - flinear.write(" - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path)) + flinear.write("\n - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path)) flinear.write(" - MEM:{:.3f} MB USED\n".format(mem1-mem0)) svxfileslist = svx_scan.svxfileslist flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist))) @@ -791,7 +809,7 @@ def FindAndLoadSurvex(survexblockroot): svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path, svxlines) #---------------------------------------------------------------- - print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr) + print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr) print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) survexlegsnumber = svx_load.survexlegsnumber @@ -799,7 +817,7 @@ def FindAndLoadSurvex(survexblockroot): mem1 = models.get_process_memory() svx_load = None - print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr) + # print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr) # svxlrl = LoadingSurvex() # finroot = survexfileroot.OpenFile() |