diff options
Diffstat (limited to 'parsers/survex.py')
-rw-r--r-- | parsers/survex.py | 90 |
1 files changed, 47 insertions, 43 deletions
diff --git a/parsers/survex.py b/parsers/survex.py index da0395d..7db8af0 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -324,7 +324,8 @@ class LoadingSurvex(): return self.caveslist[g] print(' ! Failed to find cave for {}'.format(cavepath.lower())) else: - print(' ! No regex cave match for %s' % cavepath.lower()) + # not a cave, but that is fine. + # print(' ! No regex(standard identifier) cave match for %s' % cavepath.lower()) return None def GetSurvexDirectory(self, headpath): @@ -353,17 +354,17 @@ class LoadingSurvex(): print("\n"+message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) - def LoadSurvexFile(self, includelabel): + def LoadSurvexFile(self, svxid): """Creates SurvexFile in the database, and SurvexDirectory if needed with links to 'cave' - Creates a new current survexblock with valid .survexfile and valid .survexdirectory + Creates a new current survexfile and valid .survexdirectory The survexblock passed-in is not necessarily the parent. FIX THIS. """ depth = " " * self.depthbegin - print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel)) - headpath, tail = os.path.split(includelabel) + print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid)) + headpath = os.path.dirname(svxid) - newfile = models_survex.SurvexFile(path=includelabel) + newfile = models_survex.SurvexFile(path=svxid) newfile.save() # until we do this there is no internal id so no foreign key works self.currentsurvexfile = newfile newdirectory = self.GetSurvexDirectory(headpath) @@ -383,10 +384,10 @@ class LoadingSurvex(): newfile.cave = cave #print("\n"+str(newdirectory.cave),file=sys.stderr) else: - self.ReportNonCaveIncludes(headpath, includelabel) + self.ReportNonCaveIncludes(headpath, svxid) if not newfile.survexdirectory: - message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel) + message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid) print(message) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) @@ -401,7 +402,7 @@ class LoadingSurvex(): def ProcessIncludeLine(self, included): svxid = included.groups()[0] #depth = " " * self.depthbegin - #print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid)) + #print("{:2}{} - Include survexfile:'{}' {}".format(self.depthbegin, depth, svxid, included)) self.LoadSurvexFile(svxid) self.stacksvxfiles.append(self.currentsurvexfile) @@ -426,8 +427,10 @@ class LoadingSurvex(): self.LoadSurvexQM(survexblock, qmline) included = self.rx_comminc.match(comment) - # ;*include means we have been included; not 'proceed to include' which *include means + # ;*include means 'we have been included'; whereas *include means 'proceed to include' if included: + #depth = " " * self.depthbegin + #print("{:2}{} - Include comment:'{}' {}".format(self.depthbegin, depth, comment, included)) self.ProcessIncludeLine(included) edulcni = self.rx_commcni.match(comment) @@ -457,7 +460,7 @@ class LoadingSurvex(): def LinearLoad(self, survexblock, path, svxlines): """Loads a single survex file. Usually used to import all the survex files which have been collated - into a single file. Loads the begin/end blocks recursively. + into a single file. Loads the begin/end blocks using a stack for labels. """ self.relativefilename = path cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections @@ -466,19 +469,25 @@ class LoadingSurvex(): self.currentsurvexfile.save() # django insists on this although it is already saved !? blockcount = 0 + lineno = 0 def tickle(): nonlocal blockcount blockcount +=1 if blockcount % 10 ==0 : print(".", file=sys.stderr,end='') - if blockcount % 500 ==0 : + if blockcount % 200 ==0 : print("\n", file=sys.stderr,end='') - sys.stderr.flush(); + print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr) + sys.stderr.flush() for svxline in svxlines: - sline, comment = self.rx_comment.match(svxline.strip()).groups() + lineno += 1 + sline, comment = self.rx_comment.match(svxline).groups() if comment: + depth = " " * self.depthbegin + print("{:4} {:2}{} - Include comment:'{}' {}".format(lineno, self.depthbegin, depth, comment, sline)) self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too + if not sline: continue # skip blank lines @@ -503,10 +512,10 @@ class LoadingSurvex(): pathlist += "." + id newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock, survexpath=pathlist, - title = survexblock.title, # copy parent inititally cave=self.currentcave, survexfile=self.currentsurvexfile, legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0) newsurvexblock.save() + newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally survexblock = newsurvexblock # survexblock.survexfile.save() survexblock.save() # django insists on this , but we want to save at the end ! @@ -564,7 +573,7 @@ class LoadingSurvex(): else: pass # ignore all other sorts of data - def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate): + def RecursiveScan(self, survexblock, path, fin, flinear, fcollate): """Follows the *include links in all the survex files from the root file 1623.svx and reads only the *include and *begin and *end statements. It produces a linearised list of the include tree @@ -577,27 +586,27 @@ class LoadingSurvex(): if self.callcount % 500 ==0 : print("\n", file=sys.stderr,end='') - if survexfile in self.svxfileslist: - message = " * Warning. Survex file already seen: {}".format(survexfile.path) + if path in self.svxfileslist: + message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path) print(message) print(message,file=flinear) - print(message,file=sys.stderr) + print("\n"+message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) - if self.svxfileslist.count(survexfile) > 20: - message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path) + if self.svxfileslist.count(path) > 20: + message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path) print(message) print(message,file=flinear) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) return - self.svxfileslist.append(survexfile) + self.svxfileslist.append(path) svxlines = fin.read().splitlines() for svxline in svxlines: self.lineno += 1 includestmt =self.rx_include.match(svxline) if not includestmt: - fcollate.write("{}\n".format(svxline)) + fcollate.write("{}\n".format(svxline.strip())) sline, comment = self.rx_comment.match(svxline.strip()).groups() mstar = self.rx_star.match(sline) @@ -605,40 +614,35 @@ class LoadingSurvex(): cmd, args = mstar.groups() cmd = cmd.lower() if re.match("(?i)include$", cmd): - includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))) - path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) - - includesurvexfile = models_survex.SurvexFile(path=includepath) - includesurvexfile.save() + includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args))) + #path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath) - if includesurvexfile.exists(): - # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data. + fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx") + if os.path.isfile(fullpath): #-------------------------------------------------------- self.depthinclude += 1 - fininclude = includesurvexfile.OpenFile() - fcollate.write(";*include {}\n".format(includesurvexfile.path)) - flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path)) - push = includesurvexfile.path.lower() + fininclude = open(fullpath,'r') + fcollate.write(";*include {}\n".format(includepath)) + flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath)) + push = includepath.lower() self.stackinclude.append(push) #----------------- - self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate) + self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate) #----------------- pop = self.stackinclude.pop() if pop != push: - message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude) + message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.stackinclude) print(message) print(message,file=flinear) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) - includesurvexfile.path += "-TEMP" - includesurvexfile = None flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop)) fcollate.write(";*edulcni {}\n".format(pop)) fininclude.close() self.depthinclude -= 1 #-------------------------------------------------------- else: - message = " ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath) + message = " ! ERROR *include file not found for:'{}'".format(includepath) print(message) print(message,file=sys.stderr) models.DataIssue.objects.create(parser='survex', message=message) @@ -659,7 +663,7 @@ class LoadingSurvex(): args = " " popargs = self.stackbegin.pop() if popargs != args.lower(): - message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin) + message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin) print(message) print(message,file=flinear) print(message,file=sys.stderr) @@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot): fcollate.write(";*include {}\n".format(survexfileroot.path)) flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) #---------------------------------------------------------------- - svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate) + svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate) #---------------------------------------------------------------- flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) fcollate.write(";*edulcni {}\n".format(survexfileroot.path)) @@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist))) flinear.close() fcollate.close() - svx_scan = None + svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr) mem1 = models.get_process_memory() @@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot): # entrance locations currently loaded after this by LoadPos(), but could better be done before ? # look in MapLocations() for how we find the entrances - print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr) + print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr) svx_load = LoadingSurvex() svx_load.svxdirs[""] = survexfileroot.survexdirectory |