summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@klebos.com>2020-07-01 22:49:38 +0100
committerPhilip Sargent <philip.sargent@klebos.com>2020-07-01 22:49:38 +0100
commitdf434cd39909d177f98dec5a7575f61ea701c102 (patch)
tree3e37aee9a8ab1e4e2515170c774c4fa7f6b20514 /parsers/survex.py
parent8cc768e5b6398e4f3fe3211b3f8dc9712e58dd93 (diff)
downloadtroggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.gz
troggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.bz2
troggle-df434cd39909d177f98dec5a7575f61ea701c102.zip
SurvexBlocks now importing in deatil
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py90
1 files changed, 47 insertions, 43 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index da0395d..7db8af0 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -324,7 +324,8 @@ class LoadingSurvex():
return self.caveslist[g]
print(' ! Failed to find cave for {}'.format(cavepath.lower()))
else:
- print(' ! No regex cave match for %s' % cavepath.lower())
+ # not a cave, but that is fine.
+ # print(' ! No regex(standard identifier) cave match for %s' % cavepath.lower())
return None
def GetSurvexDirectory(self, headpath):
@@ -353,17 +354,17 @@ class LoadingSurvex():
print("\n"+message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- def LoadSurvexFile(self, includelabel):
+ def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
with links to 'cave'
- Creates a new current survexblock with valid .survexfile and valid .survexdirectory
+ Creates a new current survexfile and valid .survexdirectory
The survexblock passed-in is not necessarily the parent. FIX THIS.
"""
depth = " " * self.depthbegin
- print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
- headpath, tail = os.path.split(includelabel)
+ print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
+ headpath = os.path.dirname(svxid)
- newfile = models_survex.SurvexFile(path=includelabel)
+ newfile = models_survex.SurvexFile(path=svxid)
newfile.save() # until we do this there is no internal id so no foreign key works
self.currentsurvexfile = newfile
newdirectory = self.GetSurvexDirectory(headpath)
@@ -383,10 +384,10 @@ class LoadingSurvex():
newfile.cave = cave
#print("\n"+str(newdirectory.cave),file=sys.stderr)
else:
- self.ReportNonCaveIncludes(headpath, includelabel)
+ self.ReportNonCaveIncludes(headpath, svxid)
if not newfile.survexdirectory:
- message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+ message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
@@ -401,7 +402,7 @@ class LoadingSurvex():
def ProcessIncludeLine(self, included):
svxid = included.groups()[0]
#depth = " " * self.depthbegin
- #print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid))
+ #print("{:2}{} - Include survexfile:'{}' {}".format(self.depthbegin, depth, svxid, included))
self.LoadSurvexFile(svxid)
self.stacksvxfiles.append(self.currentsurvexfile)
@@ -426,8 +427,10 @@ class LoadingSurvex():
self.LoadSurvexQM(survexblock, qmline)
included = self.rx_comminc.match(comment)
- # ;*include means we have been included; not 'proceed to include' which *include means
+ # ;*include means 'we have been included'; whereas *include means 'proceed to include'
if included:
+ #depth = " " * self.depthbegin
+ #print("{:2}{} - Include comment:'{}' {}".format(self.depthbegin, depth, comment, included))
self.ProcessIncludeLine(included)
edulcni = self.rx_commcni.match(comment)
@@ -457,7 +460,7 @@ class LoadingSurvex():
def LinearLoad(self, survexblock, path, svxlines):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
- into a single file. Loads the begin/end blocks recursively.
+ into a single file. Loads the begin/end blocks using a stack for labels.
"""
self.relativefilename = path
cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
@@ -466,19 +469,25 @@ class LoadingSurvex():
self.currentsurvexfile.save() # django insists on this although it is already saved !?
blockcount = 0
+ lineno = 0
def tickle():
nonlocal blockcount
blockcount +=1
if blockcount % 10 ==0 :
print(".", file=sys.stderr,end='')
- if blockcount % 500 ==0 :
+ if blockcount % 200 ==0 :
print("\n", file=sys.stderr,end='')
- sys.stderr.flush();
+ print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr)
+ sys.stderr.flush()
for svxline in svxlines:
- sline, comment = self.rx_comment.match(svxline.strip()).groups()
+ lineno += 1
+ sline, comment = self.rx_comment.match(svxline).groups()
if comment:
+ depth = " " * self.depthbegin
+ print("{:4} {:2}{} - Include comment:'{}' {}".format(lineno, self.depthbegin, depth, comment, sline))
self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
+
if not sline:
continue # skip blank lines
@@ -503,10 +512,10 @@ class LoadingSurvex():
pathlist += "." + id
newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock,
survexpath=pathlist,
- title = survexblock.title, # copy parent inititally
cave=self.currentcave, survexfile=self.currentsurvexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
newsurvexblock.save()
+ newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally
survexblock = newsurvexblock
# survexblock.survexfile.save()
survexblock.save() # django insists on this , but we want to save at the end !
@@ -564,7 +573,7 @@ class LoadingSurvex():
else:
pass # ignore all other sorts of data
- def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
+ def RecursiveScan(self, survexblock, path, fin, flinear, fcollate):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree
@@ -577,27 +586,27 @@ class LoadingSurvex():
if self.callcount % 500 ==0 :
print("\n", file=sys.stderr,end='')
- if survexfile in self.svxfileslist:
- message = " * Warning. Survex file already seen: {}".format(survexfile.path)
+ if path in self.svxfileslist:
+ message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path)
print(message)
print(message,file=flinear)
- print(message,file=sys.stderr)
+ print("\n"+message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- if self.svxfileslist.count(survexfile) > 20:
- message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
+ if self.svxfileslist.count(path) > 20:
+ message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
return
- self.svxfileslist.append(survexfile)
+ self.svxfileslist.append(path)
svxlines = fin.read().splitlines()
for svxline in svxlines:
self.lineno += 1
includestmt =self.rx_include.match(svxline)
if not includestmt:
- fcollate.write("{}\n".format(svxline))
+ fcollate.write("{}\n".format(svxline.strip()))
sline, comment = self.rx_comment.match(svxline.strip()).groups()
mstar = self.rx_star.match(sline)
@@ -605,40 +614,35 @@ class LoadingSurvex():
cmd, args = mstar.groups()
cmd = cmd.lower()
if re.match("(?i)include$", cmd):
- includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
- path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
-
- includesurvexfile = models_survex.SurvexFile(path=includepath)
- includesurvexfile.save()
+ includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
+ #path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
- if includesurvexfile.exists():
- # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
+ fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
+ if os.path.isfile(fullpath):
#--------------------------------------------------------
self.depthinclude += 1
- fininclude = includesurvexfile.OpenFile()
- fcollate.write(";*include {}\n".format(includesurvexfile.path))
- flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
- push = includesurvexfile.path.lower()
+ fininclude = open(fullpath,'r')
+ fcollate.write(";*include {}\n".format(includepath))
+ flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
+ push = includepath.lower()
self.stackinclude.append(push)
#-----------------
- self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+ self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate)
#-----------------
pop = self.stackinclude.pop()
if pop != push:
- message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
+ message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.stackinclude)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- includesurvexfile.path += "-TEMP"
- includesurvexfile = None
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
fcollate.write(";*edulcni {}\n".format(pop))
fininclude.close()
self.depthinclude -= 1
#--------------------------------------------------------
else:
- message = " ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath)
+ message = " ! ERROR *include file not found for:'{}'".format(includepath)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
@@ -659,7 +663,7 @@ class LoadingSurvex():
args = " "
popargs = self.stackbegin.pop()
if popargs != args.lower():
- message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
+ message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
@@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot):
fcollate.write(";*include {}\n".format(survexfileroot.path))
flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
#----------------------------------------------------------------
- svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+ svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
#----------------------------------------------------------------
flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
@@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot):
flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
flinear.close()
fcollate.close()
- svx_scan = None
+ svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
mem1 = models.get_process_memory()
@@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot):
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
# look in MapLocations() for how we find the entrances
- print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
+ print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
svx_load = LoadingSurvex()
svx_load.svxdirs[""] = survexfileroot.survexdirectory