diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2020-06-27 17:55:59 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2020-06-27 17:55:59 +0100 |
commit | 4716eaa4b6cc6169f7c662403fbde55247150249 (patch) | |
tree | 51552da0c4ce5cfa1eb4acae99d96a29d638cc80 /parsers/survex.py | |
parent | c55716df085c3a66c5eb919f1ea6f74f5cd2c650 (diff) | |
download | troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.gz troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.bz2 troggle-4716eaa4b6cc6169f7c662403fbde55247150249.zip |
Working. More fault checking.
Diffstat (limited to 'parsers/survex.py')
-rw-r--r-- | parsers/survex.py | 319 |
1 files changed, 264 insertions, 55 deletions
diff --git a/parsers/survex.py b/parsers/survex.py index 9e4a275..a88d8e3 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -41,26 +41,35 @@ class LoadSurvex(): rx_linelen = re.compile(r"[\d\-+.]+$") rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$") rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$") - rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') + rx_qm = re.compile(r'(?i)^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$') # remember there is also QM_PATTERN used in views_other and set in settings.py - rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") - rx_ref = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') - rx_star = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') + rx_cave = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/') + rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$') + rx_comminc = re.compile(r'(?i)^\s*;\*include[\s](.*)$') # inserted by linear collate ;*include + rx_commcni = re.compile(r'(?i)^\s*;\*edulcni[\s](.*)$') # inserted by linear collate ;*edulcni + rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$') + rx_ref = re.compile(r'(?i)^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') + rx_star = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') survexlegsalllength = 0.0 survexlegsnumber = 0 depthbegin = 0 - depthimport = 0 + depthinclude = 0 stackbegin =[] - stackimport = [] + stackinclude = [] svxfileslist =[] lineno = 0 insp = "" callcount = 0 stardata ={} + includedfilename ="" + currenttitle ="" + currentsurvexblock = None + currentsurvexfile = None + currentcave = None def __init__(self): pass @@ -234,7 +243,6 @@ class LoadSurvex(): print((self.insp+message)) models.DataIssue.objects.create(parser='survex', message=message) - def LoadSurvexQM(self, survexblock, qmline): insp = self.insp qm_no = qmline.group(1) @@ -274,9 +282,65 @@ class LoadSurvex(): print(insp+message) models.DataIssue.objects.create(parser='survex', message=message) + def LoadSurvexDataCmd(survexblock,args): + ls = args.lower().split() + stardata = { "type":ls[0] } + for i in range(0, len(ls)): + stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1 + self.stardata = stardata + if ls[0] in ["normal", "cartesian", "nosurvey"]: + assert (("from" in stardata and "to" in stardata) or "station" in stardata), args + elif ls[0] == "default": + stardata = self.stardatadefault + else: + assert ls[0] == "passage", args + + def LoadSurvexFlags(self, line, cmd): + # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' + # but this data is only used for sense-checking not to actually calculate anything important + pass + + def IdentifyCave(self, cavepath): + path = os.path.join(os.path.split(cavepath)[0], re.sub(r"\.svx$", "", cavepath)) + path_match = self.rx_cave.search(path) + print(' - Attempting cave match for %s' % path) + if path_match: + pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2)) + # print(insp+pos_cave) + cave = models_caves.getCaveByReference(pos_cave) + if cave: + survexfile.cave = cave + print(' - Cave matched for %s' % path) + return cave + else: + print(' ! No cave match for %s' % path) + return None + + def LoadSurvexFileBlock(self, survexblock, includelabel): + """Creates SurvexDirectory and SurvexFile in the database + with links to 'cave' + Creates a new current survexblock with valid .survexfile and valid .survexdirectory + """ + cave = self.IdentifyCave(self, includelabel) + survexdirectory = SurvexDirectory(path=dirpath, cave=cave, primarysurvexfile=self) + survexdirectory.save() + + newsurvexfile = models_survex.SurvexFile(path=includelabel) + newsurvexfile.survexdirectory = survexdirectory + newsurvexfile.save() + + name = includelabel + newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, + survexpath=survexblock.survexpath+"."+name, + cave=survexfile.cave, survexfile=newsurvexfile, + legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + newsurvexblock.save + + self.currentsurvexfile = newsurvexfile + self.currentsurvexblock = newsurvexblock def LoadSurvexComment(self, survexblock, comment): - # ignore all comments except ;ref and ;QM + # ignore all comments except ;ref and ;QM and ;*include (for collated survex file) refline = self.rx_ref.match(comment) if refline: comment = comment.replace("ref","").strip() @@ -285,11 +349,17 @@ class LoadSurvex(): qmline = self.rx_qm.match(comment) if qmline: self.LoadSurvexQM(survexblock, qmline) - - def LoadSurvexFlags(self, line, cmd): - # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate' - # but this data is only used for sense-checking not to actually calculate anything important - pass + + included = self.rx_comminc.match(comment) + # ;*include means we have been included; not 'proceed to include' which *include means + if included: + self.LoadSurvexFileBlock(survexblock, included) + + edulcni = self.rx_commcni.match(comment) + # ;*include means we have been included; not 'proceed to include' which *include means + if edulcni: + currentsurvexblock = currentsurvexblock.parent + currentsurvexfile = currentsurvexblock.parent.survexfile def LoadSurvexSetup(self,survexblock, survexfile): self.depthbegin = 0 @@ -311,12 +381,10 @@ class LoadSurvex(): if cave: survexfile.cave = cave - - - def RecursiveLoad(self, survexblock, survexfile, fin): + def RecursiveRecursiveLoad(self, survexblock, survexfile, fin): """Follows the *include links in all the survex files from the root file 1623.svx and reads in the survex blocks, other data and the wallet references (scansfolder) as it - goes. This part of the data import process is where the maximum memory is used and where it + goes. This part of the data include process is where the maximum memory is used and where it crashes on memory-constrained machines. Begin-end blocks may also be nested. """ self.LoadSurvexSetup(survexblock, survexfile) @@ -361,7 +429,7 @@ class LoadSurvex(): self.insp += "> " #-------------------------------------------------------- fininclude = includesurvexfile.OpenFile() - self.RecursiveLoad(survexblock, includesurvexfile, fininclude) + self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude) fininclude.close() #-------------------------------------------------------- self.insp = self.insp[2:] @@ -402,7 +470,7 @@ class LoadSurvex(): print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name)) self.insp += "> " #-------------------------------------------------------- - self.RecursiveLoad(survexblockdown, survexfile, fin) + self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin) #-------------------------------------------------------- # do not close the file as there may be more blocks in this one # and it is re-read afresh with every nested begin-end block. @@ -451,12 +519,99 @@ class LoadSurvex(): else: pass # ignore all other sorts of data - def RecursiveScan(self, survexblock, survexfile, fin, flinear): + def LinearRecursiveLoad(self, survexblock, path, fin, skipto): + """Loads a single survex file. Usually used to import all the survex files which have been collated + into a single file. Loads the begin/end blocks recursively. + """ + self.relativefilename = path + cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections + + svxlines = fin.read().splitlines() + for svxline in svxlines: + self.lineno += 1 + if self.lineno < skipto: + continue # skip through file to the place we got up to + + sline, comment = self.rx_comment.match(svxline.strip()).groups() + if comment: + self.LoadSurvexComment(survexblock, comment) + if not sline: + continue # skip blank lines + + # detect a star command + mstar = self.rx_star.match(sline) + if mstar: # yes we are reading a *cmd + cmd, args = mstar.groups() + cmd = cmd.lower() + if re.match("begin$(?i)", cmd): + self.depthbegin += 1 + if args: + depth = " " * self.depthbegin + self.stackbegin.append(args.lower()) + + previousnlegs = self.survexlegsnumber + name = args.lower() + print(' - Begin found for:{}, creating new SurvexBlock '.format(name)) + + survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, + survexpath=survexblock.survexpath+"."+name, + cave=self.currentcave, survexfile=self.currentsurvexfile, + legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) + survexblockdown.save() + survexblock.save() + survexblock = survexblockdown + else: + self.depthbegin += 1 + + elif re.match("end$(?i)", cmd): + # haven#t really thought this through.. + if survexblock: + self.currentsurvexblock = survexblock.parent + self.currentsurvexfile = survexblock.parent.survexfile + + if self.depthbegin: + print(" - End -return from nested *begin/*end block: '{}'".format(args)) + self.depthbegin -= 1 + else: + legsinblock = self.survexlegsnumber - previousnlegs + print(" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber)) + survexblock.legsall = legsinblock + survexblock.save() + return + + elif re.match("title$(?i)", cmd): + self.currenttitle = args + elif cmd == "ref": + self.LoadSurvexRef(survexblock, args) + elif cmd == "flags": + self.LoadSurvexFlags(args, cmd) + elif cmd == "data": + self.LoadSurvexDataCmd(survexblock, args) + elif re.match("date$(?i)", cmd): + self.LoadSurvexDate(survexblock, args) + elif re.match("team$(?i)", cmd): + self.LoadSurvexTeam(survexblock, args) + elif cmd == "set" and re.match("names(?i)", args): + pass + elif re.match("include$(?i)", cmd): + message = " ! -ERROR *include command not expected here {}. Re-run a full Survex import.".format(path) + print(message) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + else: + self.LoadSurvexIgnore(survexblock, args, cmd) + else: # not a *cmd so we are reading data OR rx_comment failed + if "from" in self.stardata: # only interested in survey legs + self.LoadSurvexLineLeg(survexblock, svxline, sline, comment) + else: + pass # ignore all other sorts of data + + def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate): """Follows the *include links in all the survex files from the root file 1623.svx - and reads only the *import and *begin and *end statements. It produces a linearised - list of the import tree + and reads only the *include and *begin and *end statements. It produces a linearised + list of the include tree """ - indent = " " * self.depthimport + indent = " " * self.depthinclude sys.stderr.flush(); self.callcount +=1 if self.callcount % 10 ==0 : @@ -464,11 +619,28 @@ class LoadSurvex(): if self.callcount % 500 ==0 : print("\n", file=sys.stderr,end='') + if survexfile in self.svxfileslist: + message = " * Warning. Survex file already seen: {}".format(survexfile.path) + print(message) + print(message,file=flinear) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + if self.svxfileslist.count(survexfile) > 20: + message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path) + print(message) + print(message,file=flinear) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + return self.svxfileslist.append(survexfile) svxlines = fin.read().splitlines() for svxline in svxlines: self.lineno += 1 + includestmt =self.rx_include.match(svxline) + if not includestmt: + fcollate.write("{}\n".format(svxline)) + sline, comment = self.rx_comment.match(svxline.strip()).groups() mstar = self.rx_star.match(sline) if mstar: # yes we are reading a *cmd @@ -481,24 +653,32 @@ class LoadSurvex(): includesurvexfile = models_survex.SurvexFile(path=includepath) if includesurvexfile.exists(): + # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data. #-------------------------------------------------------- - self.depthimport += 1 + self.depthinclude += 1 fininclude = includesurvexfile.OpenFile() - flinear.write("{:2} {} *import {}\n".format(self.depthimport, indent, includesurvexfile.path)) + fcollate.write(";*include {}\n".format(includesurvexfile.path)) + flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path)) push = includesurvexfile.path.lower() - self.stackimport.append(push) - self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear) - pop = self.stackimport.pop() + self.stackinclude.append(push) + self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate) + pop = self.stackinclude.pop() if pop != push: - print("!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackimport)) - print("!!!!!!! ERROR pop != push {} != {} {}\n".format(pop, push, self.stackimport),file=flinear) - print("!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackimport),file=sys.stderr) - flinear.write("{:2} {} *tropmi {}\n".format(self.depthimport, indent, includesurvexfile.path)) + message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude) + print(message) + print(message,file=flinear) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) + flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path)) + fcollate.write(";*edulcni {}\n".format(includesurvexfile.path)) fininclude.close() - self.depthimport -= 1 + self.depthinclude -= 1 #-------------------------------------------------------- else: - print(" ! ERROR *include file not found for {}".format(includesurvexfile)) + message = " ! ERROR *include file not found for {}".format(includesurvexfile) + print(message) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) elif re.match("begin$(?i)", cmd): self.depthbegin += 1 depth = " " * self.depthbegin @@ -516,14 +696,17 @@ class LoadSurvex(): args = " " popargs = self.stackbegin.pop() if popargs != args.lower(): - print("!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)) - print("!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}\n".format(popargs, args, self. stackbegin), file=flinear) - print(" !!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args,self. stackbegin), file=sys.stderr,) + message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin) + print(message) + print(message,file=flinear) + print(message,file=sys.stderr) + models.DataIssue.objects.create(parser='survex', message=message) self.depthbegin -= 1 pass + def FindAndLoadAllSurvex(survexblockroot): """Follows the *include links recursively to find files """ @@ -535,43 +718,69 @@ def FindAndLoadAllSurvex(survexblockroot): print(' - SCANNING All Survex Blocks...',file=sys.stderr) survexfileroot = survexblockroot.survexfile - svxl0 = LoadSurvex() - svxl0.callcount = 0 - svxl0.depthimport = 0 + collatefilename = "_" + survexfileroot.path + ".svx" + + svx_scan = LoadSurvex() + svx_scan.callcount = 0 + svx_scan.depthinclude = 0 indent="" - + fcollate = open(collatefilename, 'w') + mem0 = models.get_process_memory() + print(" - MEM:{:7.2f} MB START".format(mem0),file=sys.stderr) flinear = open('svxlinear.log', 'w') - flinear.write(" - MEM:{:.2f} MB START {}\n".format(mem0,survexfileroot.path)) + flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path)) finroot = survexfileroot.OpenFile() - flinear.write("{:2} {} *import {}\n".format(svxl0.depthimport, indent, survexfileroot.path)) - svxl0.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear) - flinear.write("{:2} {} *tropmi {}\n".format(svxl0.depthimport, indent, survexfileroot.path)) + fcollate.write(";*include {}\n".format(survexfileroot.path)) + flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) + svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate) + flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) + fcollate.write(";*edulcni {}\n".format(survexfileroot.path)) mem1 = models.get_process_memory() flinear.write(" - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path)) flinear.write(" - MEM:{:.3f} MB USED\n".format(mem1-mem0)) - svxfileslist = svxl0.svxfileslist - flinear.write(" - {:,} survex files in linear import list \n".format(len(svxfileslist))) + svxfileslist = svx_scan.svxfileslist + flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist))) flinear.close() - svxl0 = None - print("\n - {:,} survex files in linear import list \n".format(len(svxfileslist)),file=sys.stderr) + fcollate.close() + svx_scan = None + print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr) - # INSERT IN HERE linear, not recursive, wrt import loading of all the data using [svxfileslist] # - for f in svxfileslist: - # Load legs etc. recursive only in BEGIN / END - pass + mem1 = models.get_process_memory() + print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr) + print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) + svxfileslist = [] # free memory + + # Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the + # entrance locations currently loaded after this by LoadPos(), but could better be done before ? + # look in MapLocations() for how we find the entrances + print('\n - Loading All Survex Blocks...',file=sys.stderr) + + svx_load = LoadSurvex() + with open(collatefilename, "r") as fcollate: + #svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path,fcollate, 0) + pass + + print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr) + print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr) + + survexlegsnumber = svx_load.survexlegsnumber + survexlegsalllength = svx_load.survexlegsalllength + mem1 = models.get_process_memory() + svx_load = None + print('\n - Loading All Survex Blocks...',file=sys.stderr) svxlrl = LoadSurvex() finroot = survexfileroot.OpenFile() - svxlrl.RecursiveLoad(survexblockroot, survexfileroot, finroot) + svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot) finroot.close() - survexlegsnumber = svxlrl.survexlegsnumber survexlegsalllength = svxlrl.survexlegsalllength svxlrl = None + # Close the logging file, Restore sys.stdout to our old saved file handle sys.stdout.close() print("+", file=sys.stderr) |