Working. More fault checking.

author: Philip Sargent <philip.sargent@klebos.com> 2020-06-27 17:55:59 +0100
committer: Philip Sargent <philip.sargent@klebos.com> 2020-06-27 17:55:59 +0100
commit: 4716eaa4b6cc6169f7c662403fbde55247150249 (patch)
tree: 51552da0c4ce5cfa1eb4acae99d96a29d638cc80 /parsers/survex.py
parent: c55716df085c3a66c5eb919f1ea6f74f5cd2c650 (diff)
download: troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.gz
troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.bz2
troggle-4716eaa4b6cc6169f7c662403fbde55247150249.zip
1 files changed, 264 insertions, 55 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index 9e4a275..a88d8e3 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -41,26 +41,35 @@ class LoadSurvex():
     rx_linelen = re.compile(r"[\d\-+.]+$")
     rx_team    = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
     rx_person  = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
-    rx_qm      = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
+    rx_qm      = re.compile(r'(?i)^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
 #   remember there is also QM_PATTERN used in views_other and set in settings.py
 
-    rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
-    rx_ref     = re.compile(r'^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
-    rx_star    = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
+    rx_cave    = re.compile(r'caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/')
+    rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
+    rx_comminc = re.compile(r'(?i)^\s*;\*include[\s](.*)$') # inserted by linear collate ;*include
+    rx_commcni = re.compile(r'(?i)^\s*;\*edulcni[\s](.*)$') # inserted by linear collate ;*edulcni
+    rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
+    rx_ref     = re.compile(r'(?i)^\s*ref[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
+    rx_star    = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
     rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
     rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$')
 
     survexlegsalllength = 0.0
     survexlegsnumber = 0
     depthbegin = 0
-    depthimport = 0
+    depthinclude = 0
     stackbegin =[]
-    stackimport = []
+    stackinclude = []
     svxfileslist =[]
     lineno = 0
     insp = ""
     callcount = 0
     stardata ={}
+    includedfilename =""
+    currenttitle =""
+    currentsurvexblock = None
+    currentsurvexfile = None
+    currentcave = None
 
     def __init__(self):
         pass
@@ -234,7 +243,6 @@ class LoadSurvex():
             print((self.insp+message))
             models.DataIssue.objects.create(parser='survex', message=message)
 
-
     def LoadSurvexQM(self, survexblock, qmline):
         insp = self.insp
         qm_no = qmline.group(1)
@@ -274,9 +282,65 @@ class LoadSurvex():
             print(insp+message)
             models.DataIssue.objects.create(parser='survex', message=message)
 
+    def LoadSurvexDataCmd(survexblock,args):
+        ls = args.lower().split()
+        stardata = { "type":ls[0] }
+        for i in range(0, len(ls)):
+            stardata[self.stardataparamconvert.get(ls[i], ls[i])] = i - 1
+            self.stardata = stardata
+        if ls[0] in ["normal", "cartesian", "nosurvey"]:
+            assert (("from" in stardata and "to" in stardata) or "station" in stardata), args
+        elif ls[0] == "default":
+            stardata = self.stardatadefault
+        else:
+            assert ls[0] == "passage", args
+
+    def LoadSurvexFlags(self, line, cmd):
+        # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
+        # but this data is only used for sense-checking not to actually calculate anything important
+        pass
+
+    def IdentifyCave(self, cavepath):
+        path = os.path.join(os.path.split(cavepath)[0], re.sub(r"\.svx$", "", cavepath))
+        path_match = self.rx_cave.search(path)
+        print('    - Attempting cave match for %s' % path)
+        if path_match:
+            pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+            # print(insp+pos_cave)
+            cave = models_caves.getCaveByReference(pos_cave)
+            if cave:
+                survexfile.cave = cave
+            print('    - Cave matched for %s' % path)
+            return cave
+        else:
+            print('    ! No cave match for %s' % path)
+            return None
+
+    def LoadSurvexFileBlock(self, survexblock, includelabel):
+        """Creates SurvexDirectory and SurvexFile in the database
+        with links to 'cave'
+        Creates a new current survexblock with valid .survexfile and valid .survexdirectory
+        """
+        cave = self.IdentifyCave(self, includelabel)
+        survexdirectory = SurvexDirectory(path=dirpath, cave=cave, primarysurvexfile=self)
+        survexdirectory.save()
+
+        newsurvexfile = models_survex.SurvexFile(path=includelabel)
+        newsurvexfile.survexdirectory = survexdirectory
+        newsurvexfile.save()
+        
+        name = includelabel
+        newsurvexblock = models_survex.SurvexBlock(name=name, parent=survexblock, 
+                            survexpath=survexblock.survexpath+"."+name, 
+                            cave=survexfile.cave, survexfile=newsurvexfile, 
+                            legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+        newsurvexblock.save 
+
+        self.currentsurvexfile  = newsurvexfile
+        self.currentsurvexblock = newsurvexblock
 
     def LoadSurvexComment(self, survexblock, comment):
-        # ignore all comments except ;ref and ;QM
+        # ignore all comments except ;ref and ;QM and ;*include (for collated survex file)
         refline = self.rx_ref.match(comment)
         if refline:
             comment = comment.replace("ref","").strip()
@@ -285,11 +349,17 @@ class LoadSurvex():
         qmline = self.rx_qm.match(comment)
         if qmline:
             self.LoadSurvexQM(survexblock, qmline)
-
-    def LoadSurvexFlags(self, line, cmd):
-        # Here we could set on/off 'splay', 'not splay', 'surface', 'not surface', or 'duplicate'
-        # but this data is only used for sense-checking not to actually calculate anything important
-        pass
+            
+        included = self.rx_comminc.match(comment)
+        # ;*include means we have been included; not 'proceed to include' which *include means
+        if included:
+            self.LoadSurvexFileBlock(survexblock, included)
+
+        edulcni = self.rx_commcni.match(comment)
+        # ;*include means we have been included; not 'proceed to include' which *include means
+        if edulcni:
+            currentsurvexblock = currentsurvexblock.parent
+            currentsurvexfile = currentsurvexblock.parent.survexfile
 
     def LoadSurvexSetup(self,survexblock, survexfile):
         self.depthbegin = 0
@@ -311,12 +381,10 @@ class LoadSurvex():
             if cave:
                 survexfile.cave = cave
 
-
-
-    def RecursiveLoad(self, survexblock, survexfile, fin):
+    def RecursiveRecursiveLoad(self, survexblock, survexfile, fin):
         """Follows the *include links in all the survex files from the root file 1623.svx
         and reads in the survex blocks, other data and the wallet references (scansfolder) as it
-        goes. This part of the data import process is where the maximum memory is used and where it
+        goes. This part of the data include process is where the maximum memory is used and where it
         crashes on memory-constrained machines. Begin-end blocks may also be nested.
         """
         self.LoadSurvexSetup(survexblock, survexfile)
@@ -361,7 +429,7 @@ class LoadSurvex():
                         self.insp += "> "
                         #--------------------------------------------------------
                         fininclude = includesurvexfile.OpenFile()
-                        self.RecursiveLoad(survexblock, includesurvexfile, fininclude)
+                        self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude)
                         fininclude.close()
                         #--------------------------------------------------------
                         self.insp = self.insp[2:]
@@ -402,7 +470,7 @@ class LoadSurvex():
                         print(insp+"   - BLOCK-enter nested *begin/*end block: '{}'".format(name))
                         self.insp += "> "
                         #--------------------------------------------------------
-                        self.RecursiveLoad(survexblockdown, survexfile, fin)
+                        self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin)
                         #--------------------------------------------------------
                         # do not close the file as there may be more blocks in this one
                         # and it is re-read afresh with every nested begin-end block.
@@ -451,12 +519,99 @@ class LoadSurvex():
                 else:
                     pass # ignore all other sorts of data
 
-    def RecursiveScan(self, survexblock, survexfile, fin, flinear):
+    def LinearRecursiveLoad(self, survexblock, path, fin, skipto):
+        """Loads a single survex file. Usually used to import all the survex files which have been collated
+        into a single file. Loads the begin/end blocks recursively.
+        """
+        self.relativefilename = path
+        cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
+
+        svxlines = fin.read().splitlines()
+        for svxline in svxlines:
+            self.lineno += 1
+            if self.lineno < skipto:
+                continue # skip through file to the place we got up to
+                
+            sline, comment = self.rx_comment.match(svxline.strip()).groups()
+            if comment:
+                self.LoadSurvexComment(survexblock, comment)
+            if not sline:
+                continue # skip blank lines
+
+            # detect a star command
+            mstar = self.rx_star.match(sline)
+            if mstar: # yes we are reading a *cmd
+                cmd, args = mstar.groups()
+                cmd = cmd.lower()
+                if re.match("begin$(?i)", cmd):
+                    self.depthbegin += 1
+                    if args:
+                        depth = " " * self.depthbegin
+                        self.stackbegin.append(args.lower())
+
+                        previousnlegs = self.survexlegsnumber
+                        name = args.lower()
+                        print('   - Begin found for:{}, creating new SurvexBlock '.format(name))
+
+                        survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
+                                survexpath=survexblock.survexpath+"."+name, 
+                                cave=self.currentcave, survexfile=self.currentsurvexfile, 
+                                legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+                        survexblockdown.save()
+                        survexblock.save()
+                        survexblock = survexblockdown
+                    else:
+                        self.depthbegin += 1
+
+                elif re.match("end$(?i)", cmd):
+                    # haven#t really thought this through..
+                    if survexblock:
+                        self.currentsurvexblock = survexblock.parent
+                        self.currentsurvexfile = survexblock.parent.survexfile
+
+                    if self.depthbegin:
+                        print("   - End    -return from nested *begin/*end block: '{}'".format(args))
+                        self.depthbegin -= 1
+                    else:
+                        legsinblock = self.survexlegsnumber - previousnlegs
+                        print("  - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
+                        survexblock.legsall = legsinblock
+                        survexblock.save()
+                        return
+
+                elif re.match("title$(?i)", cmd):
+                    self.currenttitle = args
+                elif cmd == "ref":
+                    self.LoadSurvexRef(survexblock, args)
+                elif cmd == "flags":
+                    self.LoadSurvexFlags(args, cmd)
+                elif cmd == "data":
+                    self.LoadSurvexDataCmd(survexblock, args)
+                elif re.match("date$(?i)", cmd):
+                    self.LoadSurvexDate(survexblock, args)
+                elif re.match("team$(?i)", cmd):
+                    self.LoadSurvexTeam(survexblock, args)
+                elif cmd == "set" and re.match("names(?i)", args):
+                    pass
+                elif re.match("include$(?i)", cmd):
+                    message = " ! -ERROR *include command not expected here {}. Re-run a full Survex import.".format(path)
+                    print(message)
+                    print(message,file=sys.stderr)
+                    models.DataIssue.objects.create(parser='survex', message=message)
+                else:
+                    self.LoadSurvexIgnore(survexblock, args, cmd)
+            else: # not a *cmd so we are reading data OR rx_comment failed
+                if "from" in self.stardata: # only interested in survey legs
+                    self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
+                else:
+                    pass # ignore all other sorts of data
+
+    def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
         """Follows the *include links in all the survex files from the root file 1623.svx
-        and reads only the *import and *begin and *end statements. It produces a linearised
-        list of the import tree
+        and reads only the *include and *begin and *end statements. It produces a linearised
+        list of the include tree
         """
-        indent = " " * self.depthimport
+        indent = " " * self.depthinclude
         sys.stderr.flush();
         self.callcount +=1
         if self.callcount % 10 ==0 :
@@ -464,11 +619,28 @@ class LoadSurvex():
         if self.callcount % 500 ==0 :
             print("\n", file=sys.stderr,end='')
 
+        if survexfile in self.svxfileslist:
+            message = " * Warning. Survex file already seen: {}".format(survexfile.path)
+            print(message)
+            print(message,file=flinear)
+            print(message,file=sys.stderr)
+            models.DataIssue.objects.create(parser='survex', message=message)
+            if self.svxfileslist.count(survexfile) > 20:
+                message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
+                print(message)
+                print(message,file=flinear)
+                print(message,file=sys.stderr)
+                models.DataIssue.objects.create(parser='survex', message=message)
+                return
         self.svxfileslist.append(survexfile)
         
         svxlines = fin.read().splitlines()
         for svxline in svxlines:
             self.lineno += 1
+            includestmt =self.rx_include.match(svxline)
+            if not includestmt:
+                fcollate.write("{}\n".format(svxline))
+
             sline, comment = self.rx_comment.match(svxline.strip()).groups()
             mstar = self.rx_star.match(sline)
             if mstar: # yes we are reading a *cmd
@@ -481,24 +653,32 @@ class LoadSurvex():
                     includesurvexfile = models_survex.SurvexFile(path=includepath)
 
                     if includesurvexfile.exists():
+                        # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
                         #--------------------------------------------------------
-                        self.depthimport += 1
+                        self.depthinclude += 1
                         fininclude = includesurvexfile.OpenFile()
-                        flinear.write("{:2} {} *import {}\n".format(self.depthimport, indent, includesurvexfile.path))
+                        fcollate.write(";*include {}\n".format(includesurvexfile.path))
+                        flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
                         push = includesurvexfile.path.lower()
-                        self.stackimport.append(push)
-                        self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear)
-                        pop = self.stackimport.pop()
+                        self.stackinclude.append(push)
+                        self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+                        pop = self.stackinclude.pop()
                         if pop != push:
-                            print("!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackimport))
-                            print("!!!!!!!    ERROR pop != push {} != {} {}\n".format(pop, push, self.stackimport),file=flinear)
-                            print("!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackimport),file=sys.stderr)
-                        flinear.write("{:2} {} *tropmi {}\n".format(self.depthimport, indent, includesurvexfile.path))
+                            message = "!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
+                            print(message)
+                            print(message,file=flinear)
+                            print(message,file=sys.stderr)
+                            models.DataIssue.objects.create(parser='survex', message=message)
+                        flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path))
+                        fcollate.write(";*edulcni {}\n".format(includesurvexfile.path))
                         fininclude.close()
-                        self.depthimport -= 1
+                        self.depthinclude -= 1
                         #--------------------------------------------------------
                     else:
-                        print("    ! ERROR *include file not found for {}".format(includesurvexfile))
+                        message = "    ! ERROR *include file not found for {}".format(includesurvexfile)
+                        print(message)
+                        print(message,file=sys.stderr)
+                        models.DataIssue.objects.create(parser='survex', message=message)
                 elif re.match("begin$(?i)", cmd):
                     self.depthbegin += 1
                     depth = " " * self.depthbegin
@@ -516,14 +696,17 @@ class LoadSurvex():
                         args = " "
                     popargs = self.stackbegin.pop()
                     if popargs != args.lower():
-                        print("!!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin))
-                        print("!!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}\n".format(popargs, args, self. stackbegin), file=flinear)
-                        print(" !!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args,self. stackbegin), file=sys.stderr,)
+                        message = "!!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
+                        print(message)
+                        print(message,file=flinear)
+                        print(message,file=sys.stderr)
+                        models.DataIssue.objects.create(parser='survex', message=message)
 
                     self.depthbegin -= 1
                     pass
 
 
+
 def FindAndLoadAllSurvex(survexblockroot):
     """Follows the *include links recursively to find files
     """
@@ -535,43 +718,69 @@ def FindAndLoadAllSurvex(survexblockroot):
     print(' - SCANNING All Survex Blocks...',file=sys.stderr)
     survexfileroot = survexblockroot.survexfile
 
-    svxl0 = LoadSurvex()
-    svxl0.callcount = 0
-    svxl0.depthimport = 0
+    collatefilename = "_" + survexfileroot.path + ".svx"
+
+    svx_scan = LoadSurvex()
+    svx_scan.callcount = 0
+    svx_scan.depthinclude = 0
     indent=""
-    
+    fcollate = open(collatefilename, 'w')
+
     mem0 = models.get_process_memory()
+    print(" - MEM:{:7.2f} MB START".format(mem0),file=sys.stderr)
     flinear = open('svxlinear.log', 'w')
-    flinear.write("    - MEM:{:.2f} MB START {}\n".format(mem0,survexfileroot.path))
+    flinear.write("    - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path))
 
     finroot = survexfileroot.OpenFile()
-    flinear.write("{:2} {} *import {}\n".format(svxl0.depthimport, indent, survexfileroot.path))
-    svxl0.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear)
-    flinear.write("{:2} {} *tropmi {}\n".format(svxl0.depthimport, indent, survexfileroot.path))
+    fcollate.write(";*include {}\n".format(survexfileroot.path))
+    flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
+    svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+    flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
+    fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
     mem1 = models.get_process_memory()
     flinear.write("    - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path))
     flinear.write("    - MEM:{:.3f} MB USED\n".format(mem1-mem0))
-    svxfileslist = svxl0.svxfileslist
-    flinear.write("    - {:,} survex files in linear import list \n".format(len(svxfileslist)))
+    svxfileslist = svx_scan.svxfileslist
+    flinear.write("    - {:,} survex files in linear include list \n".format(len(svxfileslist)))
     flinear.close()
-    svxl0 = None
-    print("\n -  {:,} survex files in linear import list \n".format(len(svxfileslist)),file=sys.stderr)
+    fcollate.close()
+    svx_scan = None
+    print("\n -  {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
 
-    # INSERT IN HERE linear, not recursive, wrt import loading of all the data using [svxfileslist] #
-    for f in svxfileslist:
-        # Load legs etc. recursive only in BEGIN / END 
-        pass
+    mem1 = models.get_process_memory()
+    print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr)
+    print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
+    svxfileslist = [] # free memory
+ 
+    # Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
+    # entrance locations currently loaded after this by LoadPos(), but could better be done before ?
+    # look in MapLocations() for how we find the entrances
+    print('\n - Loading All Survex Blocks...',file=sys.stderr)
     
+   
+    svx_load = LoadSurvex()
+    with open(collatefilename, "r") as fcollate:
+        #svx_load.LinearRecursiveLoad(survexblockroot,survexfileroot.path,fcollate, 0)
+        pass
+
+    print(" - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
+    print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
+
+    survexlegsnumber = svx_load.survexlegsnumber
+    survexlegsalllength = svx_load.survexlegsalllength
+    mem1 = models.get_process_memory()
+    svx_load = None
+
     print('\n - Loading All Survex Blocks...',file=sys.stderr)
     svxlrl = LoadSurvex()
 
     finroot = survexfileroot.OpenFile()
-    svxlrl.RecursiveLoad(survexblockroot, survexfileroot, finroot)
+    svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
     finroot.close()
-    
     survexlegsnumber = svxlrl.survexlegsnumber
     survexlegsalllength = svxlrl.survexlegsalllength
     svxlrl = None
+    
     # Close the logging file, Restore sys.stdout to our old saved file handle
     sys.stdout.close()
     print("+", file=sys.stderr)
author	Philip Sargent <philip.sargent@klebos.com>	2020-06-27 17:55:59 +0100
committer	Philip Sargent <philip.sargent@klebos.com>	2020-06-27 17:55:59 +0100
commit	4716eaa4b6cc6169f7c662403fbde55247150249 (patch)
tree	51552da0c4ce5cfa1eb4acae99d96a29d638cc80 /parsers/survex.py
parent	c55716df085c3a66c5eb919f1ea6f74f5cd2c650 (diff)
download	troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.gz troggle-4716eaa4b6cc6169f7c662403fbde55247150249.tar.bz2 troggle-4716eaa4b6cc6169f7c662403fbde55247150249.zip