SurvexBlocks now importing in deatil

author: Philip Sargent <philip.sargent@klebos.com> 2020-07-01 22:49:38 +0100
committer: Philip Sargent <philip.sargent@klebos.com> 2020-07-01 22:49:38 +0100
commit: df434cd39909d177f98dec5a7575f61ea701c102 (patch)
tree: 3e37aee9a8ab1e4e2515170c774c4fa7f6b20514 /parsers/survex.py
parent: 8cc768e5b6398e4f3fe3211b3f8dc9712e58dd93 (diff)
download: troggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.gz
troggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.bz2
troggle-df434cd39909d177f98dec5a7575f61ea701c102.zip
1 files changed, 47 insertions, 43 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index da0395d..7db8af0 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -324,7 +324,8 @@ class LoadingSurvex():
                     return self.caveslist[g]
             print('    ! Failed to find cave for {}'.format(cavepath.lower()))
         else:
-            print('    ! No regex cave match for %s' % cavepath.lower())
+            # not a cave, but that is fine.
+            # print('    ! No regex(standard identifier) cave match for %s' % cavepath.lower())
             return None
 
     def GetSurvexDirectory(self, headpath):
@@ -353,17 +354,17 @@ class LoadingSurvex():
         print("\n"+message,file=sys.stderr)
         models.DataIssue.objects.create(parser='survex', message=message)
         
-    def LoadSurvexFile(self, includelabel):
+    def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
-        Creates a new current survexblock with valid .survexfile and valid .survexdirectory
+        Creates a new current survexfile and valid .survexdirectory
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         """
         depth = " " * self.depthbegin
-        print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
-        headpath, tail = os.path.split(includelabel)
+        print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
+        headpath = os.path.dirname(svxid)
 
-        newfile = models_survex.SurvexFile(path=includelabel)
+        newfile = models_survex.SurvexFile(path=svxid)
         newfile.save() # until we do this there is no internal id so no foreign key works
         self.currentsurvexfile = newfile 
         newdirectory = self.GetSurvexDirectory(headpath)
@@ -383,10 +384,10 @@ class LoadingSurvex():
             newfile.cave   = cave
             #print("\n"+str(newdirectory.cave),file=sys.stderr)
         else:
-            self.ReportNonCaveIncludes(headpath, includelabel)
+            self.ReportNonCaveIncludes(headpath, svxid)
 
         if not newfile.survexdirectory:
-            message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+            message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid)
             print(message)
             print(message,file=sys.stderr)
             models.DataIssue.objects.create(parser='survex', message=message)
@@ -401,7 +402,7 @@ class LoadingSurvex():
     def ProcessIncludeLine(self, included):
         svxid = included.groups()[0]
         #depth = " " * self.depthbegin
-        #print("{:2}{}   - Include survexfile:'{}'".format(self.depthbegin, depth,  svxid))
+        #print("{:2}{}   - Include survexfile:'{}' {}".format(self.depthbegin, depth,  svxid, included))
         self.LoadSurvexFile(svxid)
         self.stacksvxfiles.append(self.currentsurvexfile)
 
@@ -426,8 +427,10 @@ class LoadingSurvex():
             self.LoadSurvexQM(survexblock, qmline)
             
         included = self.rx_comminc.match(comment)
-        # ;*include means we have been included; not 'proceed to include' which *include means
+        # ;*include means 'we have been included'; whereas *include means 'proceed to include' 
         if included:
+            #depth = " " * self.depthbegin
+            #print("{:2}{}   - Include comment:'{}' {}".format(self.depthbegin, depth,  comment, included))
             self.ProcessIncludeLine(included)
 
         edulcni = self.rx_commcni.match(comment)
@@ -457,7 +460,7 @@ class LoadingSurvex():
 
     def LinearLoad(self, survexblock, path, svxlines):
         """Loads a single survex file. Usually used to import all the survex files which have been collated
-        into a single file. Loads the begin/end blocks recursively.
+        into a single file. Loads the begin/end blocks using a stack for labels.
         """
         self.relativefilename = path
         cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
@@ -466,19 +469,25 @@ class LoadingSurvex():
         self.currentsurvexfile.save() # django insists on this although it is already saved !?
         
         blockcount = 0
+        lineno = 0
         def tickle():
             nonlocal blockcount
             blockcount +=1
             if blockcount % 10 ==0 :
                 print(".", file=sys.stderr,end='')
-            if blockcount % 500 ==0 :
+            if blockcount % 200 ==0 :
                 print("\n", file=sys.stderr,end='')
-            sys.stderr.flush();
+                print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr)
+            sys.stderr.flush()
 
         for svxline in svxlines:
-            sline, comment = self.rx_comment.match(svxline.strip()).groups()
+            lineno += 1
+            sline, comment = self.rx_comment.match(svxline).groups()
             if comment:
+                depth = " " * self.depthbegin
+                print("{:4} {:2}{}   - Include comment:'{}' {}".format(lineno, self.depthbegin, depth,  comment, sline))
                 self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
+
             if not sline:
                 continue # skip blank lines
 
@@ -503,10 +512,10 @@ class LoadingSurvex():
                             pathlist += "." + id
                     newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock, 
                             survexpath=pathlist, 
-                            title = survexblock.title, # copy parent inititally
                             cave=self.currentcave, survexfile=self.currentsurvexfile, 
                             legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
                     newsurvexblock.save()
+                    newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally
                     survexblock = newsurvexblock
                     # survexblock.survexfile.save() 
                     survexblock.save() # django insists on this , but we want to save at the end !
@@ -564,7 +573,7 @@ class LoadingSurvex():
                 else:
                     pass # ignore all other sorts of data
 
-    def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
+    def RecursiveScan(self, survexblock, path, fin, flinear, fcollate):
         """Follows the *include links in all the survex files from the root file 1623.svx
         and reads only the *include and *begin and *end statements. It produces a linearised
         list of the include tree
@@ -577,27 +586,27 @@ class LoadingSurvex():
         if self.callcount % 500 ==0 :
             print("\n", file=sys.stderr,end='')
 
-        if survexfile in self.svxfileslist:
-            message = " * Warning. Survex file already seen: {}".format(survexfile.path)
+        if path in self.svxfileslist:
+            message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path)
             print(message)
             print(message,file=flinear)
-            print(message,file=sys.stderr)
+            print("\n"+message,file=sys.stderr)
             models.DataIssue.objects.create(parser='survex', message=message)
-            if self.svxfileslist.count(survexfile) > 20:
-                message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
+            if self.svxfileslist.count(path) > 20:
+                message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
                 print(message)
                 print(message,file=flinear)
                 print(message,file=sys.stderr)
                 models.DataIssue.objects.create(parser='survex', message=message)
                 return
-        self.svxfileslist.append(survexfile)
+        self.svxfileslist.append(path)
         
         svxlines = fin.read().splitlines()
         for svxline in svxlines:
             self.lineno += 1
             includestmt =self.rx_include.match(svxline)
             if not includestmt:
-                fcollate.write("{}\n".format(svxline))
+                fcollate.write("{}\n".format(svxline.strip()))
 
             sline, comment = self.rx_comment.match(svxline.strip()).groups()
             mstar = self.rx_star.match(sline)
@@ -605,40 +614,35 @@ class LoadingSurvex():
                 cmd, args = mstar.groups()
                 cmd = cmd.lower()
                 if re.match("(?i)include$", cmd):
-                    includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
-                    path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
-
-                    includesurvexfile = models_survex.SurvexFile(path=includepath)
-                    includesurvexfile.save()
+                    includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
+                    #path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
 
-                    if includesurvexfile.exists():
-                        # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
+                    fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
+                    if os.path.isfile(fullpath):
                         #--------------------------------------------------------
                         self.depthinclude += 1
-                        fininclude = includesurvexfile.OpenFile()
-                        fcollate.write(";*include {}\n".format(includesurvexfile.path))
-                        flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
-                        push = includesurvexfile.path.lower()
+                        fininclude = open(fullpath,'r')
+                        fcollate.write(";*include {}\n".format(includepath))
+                        flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
+                        push = includepath.lower()
                         self.stackinclude.append(push)
                         #-----------------
-                        self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+                        self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate)
                         #-----------------
                         pop = self.stackinclude.pop()
                         if pop != push:
-                            message = "!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
+                            message = "!! ERROR mismatch *include pop!=push  {}".format(pop, push, self.stackinclude)
                             print(message)
                             print(message,file=flinear)
                             print(message,file=sys.stderr)
                             models.DataIssue.objects.create(parser='survex', message=message)
-                        includesurvexfile.path += "-TEMP"
-                        includesurvexfile = None
                         flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
                         fcollate.write(";*edulcni {}\n".format(pop))
                         fininclude.close()
                         self.depthinclude -= 1
                         #--------------------------------------------------------
                     else:
-                        message = "    ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath)
+                        message = "    ! ERROR *include file not found for:'{}'".format(includepath)
                         print(message)
                         print(message,file=sys.stderr)
                         models.DataIssue.objects.create(parser='survex', message=message)
@@ -659,7 +663,7 @@ class LoadingSurvex():
                         args = " "
                     popargs = self.stackbegin.pop()
                     if popargs != args.lower():
-                        message = "!!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
+                        message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin)
                         print(message)
                         print(message,file=flinear)
                         print(message,file=sys.stderr)
@@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot):
     fcollate.write(";*include {}\n".format(survexfileroot.path))
     flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     #----------------------------------------------------------------
-    svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+    svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
     #----------------------------------------------------------------
     flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
@@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot):
     flinear.write("    - {:,} survex files in linear include list \n".format(len(svxfileslist)))
     flinear.close()
     fcollate.close()
-    svx_scan = None
+    svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
     print("\n -  {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
 
     mem1 = models.get_process_memory()
@@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot):
     # entrance locations currently loaded after this by LoadPos(), but could better be done before ?
     # look in MapLocations() for how we find the entrances
    
-    print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
+    print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
     svx_load = LoadingSurvex()
     
     svx_load.svxdirs[""] = survexfileroot.survexdirectory
author	Philip Sargent <philip.sargent@klebos.com>	2020-07-01 22:49:38 +0100
committer	Philip Sargent <philip.sargent@klebos.com>	2020-07-01 22:49:38 +0100
commit	df434cd39909d177f98dec5a7575f61ea701c102 (patch)
tree	3e37aee9a8ab1e4e2515170c774c4fa7f6b20514 /parsers/survex.py
parent	8cc768e5b6398e4f3fe3211b3f8dc9712e58dd93 (diff)
download	troggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.gz troggle-df434cd39909d177f98dec5a7575f61ea701c102.tar.bz2 troggle-df434cd39909d177f98dec5a7575f61ea701c102.zip