4 files changed, 65 insertions, 49 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index d1e7406..2bb2ccc 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -192,6 +192,8 @@ def readcave(filename):
                          url = url[0],
                          filename = filename)
             except:
+                # this slow db query happens on every cave, but on import we have all this in memory
+                # and don't need to do a db query. Fix this to speed it up!
                 # need to cope with duplicates
                 print(" ! FAILED to get only one CAVE when updating using: "+filename)
                 kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
@@ -206,6 +208,8 @@ def readcave(filename):
                         c = k
                 
             for area_slug in areas:
+                # this slow db query happens on every cave, but on import we have all this in memory
+                # and don't need to do a db query. Fix this to speed it up!
                 area = models_caves.Area.objects.filter(short_name = area_slug)
                 if area:
                     newArea = area[0]
@@ -216,6 +220,8 @@ def readcave(filename):
             primary = True
             for slug in slugs:
                 try:
+                    # this slow db query happens on every cave, but on import we have all this in memory
+                    # and don't need to do a db query. Fix this to speed it up!
                     cs = models_caves.CaveSlug.objects.update_or_create(cave = c,
                               slug = slug,
                               primary = primary)
@@ -225,10 +231,13 @@ def readcave(filename):
                     print(message)
                     
                 primary = False
+
             for entrance in entrances:
                 slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
                 letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
                 try:
+                    # this slow db query happens on every entrance, but on import we have all this in memory
+                    # and don't need to do a db query. Fix this to speed it up!
                     entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug)
                     ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                 except:
diff --git a/parsers/imports.py b/parsers/imports.py
index c2965c2..f8c98a3 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -16,21 +16,21 @@ import troggle.parsers.logbooks
 import troggle.parsers.QMs
 
 def import_caves():
-    print("Importing Caves to ",end="")
+    print("-- Importing Caves to ",end="")
     print(django.db.connections.databases['default']['NAME'])
     troggle.parsers.caves.readcaves()
 
 def import_people():
-    print("Importing People (folk.csv) to ",end="")
+    print("-- Importing People (folk.csv) to ",end="")
     print(django.db.connections.databases['default']['NAME'])
     troggle.parsers.people.LoadPersonsExpos()
 
 def import_surveyscans():
-    print("Importing Survey Scans")
+    print("-- Importing Survey Scans")
     troggle.parsers.surveys.LoadListScans()
 
 def import_logbooks():
-    print("Importing Logbooks")
+    print("-- Importing Logbooks")
     troggle.parsers.logbooks.LoadLogbooks()
 
 def import_QMs():
@@ -40,7 +40,7 @@ def import_QMs():
 def import_survex():
     # when this import is moved to the top with the rest it all crashes horribly
     import troggle.parsers.survex 
-    print("Importing Survex Blocks")
+    print("-- Importing Survex Blocks")
     print(" - Survex Blocks")
     troggle.parsers.survex.LoadSurvexBlocks()
     print(" - Survex entrances x/y/z Positions")
@@ -53,6 +53,6 @@ def import_loadpos():
     troggle.parsers.survex.LoadPos()
 
 def import_drawingsfiles():
-    print("Importing Drawings files")
+    print("-- Importing Drawings files")
     troggle.parsers.surveys.LoadDrawingFiles()
 
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index c4f2c9c..b18e839 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -114,6 +114,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
     expeditionday = expedition.get_expedition_day(date)
     lookupAttribs={'date':date, 'title':title}
     # 'cave' is converted to a string doing this, which renders as the cave slug.
+    # but it is a db query which we should try to avoid - rewrite this
     nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug':slugify(title)[:50], 'entry_type':entry_type}
     lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
 
@@ -356,6 +357,8 @@ def SetDatesFromLogbookEntries(expedition):
     Sets the date_from and date_to field for an expedition based on persontrips.
     Then sets the expedition date_from and date_to based on the personexpeditions.
     """
+    # Probably a faster way to do this. This uses a lot of db queries, but we have all this
+    # in memory..
     for personexpedition in expedition.personexpedition_set.all():
         persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
         # sequencing is difficult to do
diff --git a/parsers/survex.py b/parsers/survex.py
index da0395d..7db8af0 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -324,7 +324,8 @@ class LoadingSurvex():
                     return self.caveslist[g]
             print('    ! Failed to find cave for {}'.format(cavepath.lower()))
         else:
-            print('    ! No regex cave match for %s' % cavepath.lower())
+            # not a cave, but that is fine.
+            # print('    ! No regex(standard identifier) cave match for %s' % cavepath.lower())
             return None
 
     def GetSurvexDirectory(self, headpath):
@@ -353,17 +354,17 @@ class LoadingSurvex():
         print("\n"+message,file=sys.stderr)
         models.DataIssue.objects.create(parser='survex', message=message)
         
-    def LoadSurvexFile(self, includelabel):
+    def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
-        Creates a new current survexblock with valid .survexfile and valid .survexdirectory
+        Creates a new current survexfile and valid .survexdirectory
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         """
         depth = " " * self.depthbegin
-        print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
-        headpath, tail = os.path.split(includelabel)
+        print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
+        headpath = os.path.dirname(svxid)
 
-        newfile = models_survex.SurvexFile(path=includelabel)
+        newfile = models_survex.SurvexFile(path=svxid)
         newfile.save() # until we do this there is no internal id so no foreign key works
         self.currentsurvexfile = newfile 
         newdirectory = self.GetSurvexDirectory(headpath)
@@ -383,10 +384,10 @@ class LoadingSurvex():
             newfile.cave   = cave
             #print("\n"+str(newdirectory.cave),file=sys.stderr)
         else:
-            self.ReportNonCaveIncludes(headpath, includelabel)
+            self.ReportNonCaveIncludes(headpath, svxid)
 
         if not newfile.survexdirectory:
-            message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+            message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid)
             print(message)
             print(message,file=sys.stderr)
             models.DataIssue.objects.create(parser='survex', message=message)
@@ -401,7 +402,7 @@ class LoadingSurvex():
     def ProcessIncludeLine(self, included):
         svxid = included.groups()[0]
         #depth = " " * self.depthbegin
-        #print("{:2}{}   - Include survexfile:'{}'".format(self.depthbegin, depth,  svxid))
+        #print("{:2}{}   - Include survexfile:'{}' {}".format(self.depthbegin, depth,  svxid, included))
         self.LoadSurvexFile(svxid)
         self.stacksvxfiles.append(self.currentsurvexfile)
 
@@ -426,8 +427,10 @@ class LoadingSurvex():
             self.LoadSurvexQM(survexblock, qmline)
             
         included = self.rx_comminc.match(comment)
-        # ;*include means we have been included; not 'proceed to include' which *include means
+        # ;*include means 'we have been included'; whereas *include means 'proceed to include' 
         if included:
+            #depth = " " * self.depthbegin
+            #print("{:2}{}   - Include comment:'{}' {}".format(self.depthbegin, depth,  comment, included))
             self.ProcessIncludeLine(included)
 
         edulcni = self.rx_commcni.match(comment)
@@ -457,7 +460,7 @@ class LoadingSurvex():
 
     def LinearLoad(self, survexblock, path, svxlines):
         """Loads a single survex file. Usually used to import all the survex files which have been collated
-        into a single file. Loads the begin/end blocks recursively.
+        into a single file. Loads the begin/end blocks using a stack for labels.
         """
         self.relativefilename = path
         cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
@@ -466,19 +469,25 @@ class LoadingSurvex():
         self.currentsurvexfile.save() # django insists on this although it is already saved !?
         
         blockcount = 0
+        lineno = 0
         def tickle():
             nonlocal blockcount
             blockcount +=1
             if blockcount % 10 ==0 :
                 print(".", file=sys.stderr,end='')
-            if blockcount % 500 ==0 :
+            if blockcount % 200 ==0 :
                 print("\n", file=sys.stderr,end='')
-            sys.stderr.flush();
+                print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr)
+            sys.stderr.flush()
 
         for svxline in svxlines:
-            sline, comment = self.rx_comment.match(svxline.strip()).groups()
+            lineno += 1
+            sline, comment = self.rx_comment.match(svxline).groups()
             if comment:
+                depth = " " * self.depthbegin
+                print("{:4} {:2}{}   - Include comment:'{}' {}".format(lineno, self.depthbegin, depth,  comment, sline))
                 self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
+
             if not sline:
                 continue # skip blank lines
 
@@ -503,10 +512,10 @@ class LoadingSurvex():
                             pathlist += "." + id
                     newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock, 
                             survexpath=pathlist, 
-                            title = survexblock.title, # copy parent inititally
                             cave=self.currentcave, survexfile=self.currentsurvexfile, 
                             legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
                     newsurvexblock.save()
+                    newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally
                     survexblock = newsurvexblock
                     # survexblock.survexfile.save() 
                     survexblock.save() # django insists on this , but we want to save at the end !
@@ -564,7 +573,7 @@ class LoadingSurvex():
                 else:
                     pass # ignore all other sorts of data
 
-    def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
+    def RecursiveScan(self, survexblock, path, fin, flinear, fcollate):
         """Follows the *include links in all the survex files from the root file 1623.svx
         and reads only the *include and *begin and *end statements. It produces a linearised
         list of the include tree
@@ -577,27 +586,27 @@ class LoadingSurvex():
         if self.callcount % 500 ==0 :
             print("\n", file=sys.stderr,end='')
 
-        if survexfile in self.svxfileslist:
-            message = " * Warning. Survex file already seen: {}".format(survexfile.path)
+        if path in self.svxfileslist:
+            message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path)
             print(message)
             print(message,file=flinear)
-            print(message,file=sys.stderr)
+            print("\n"+message,file=sys.stderr)
             models.DataIssue.objects.create(parser='survex', message=message)
-            if self.svxfileslist.count(survexfile) > 20:
-                message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
+            if self.svxfileslist.count(path) > 20:
+                message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
                 print(message)
                 print(message,file=flinear)
                 print(message,file=sys.stderr)
                 models.DataIssue.objects.create(parser='survex', message=message)
                 return
-        self.svxfileslist.append(survexfile)
+        self.svxfileslist.append(path)
         
         svxlines = fin.read().splitlines()
         for svxline in svxlines:
             self.lineno += 1
             includestmt =self.rx_include.match(svxline)
             if not includestmt:
-                fcollate.write("{}\n".format(svxline))
+                fcollate.write("{}\n".format(svxline.strip()))
 
             sline, comment = self.rx_comment.match(svxline.strip()).groups()
             mstar = self.rx_star.match(sline)
@@ -605,40 +614,35 @@ class LoadingSurvex():
                 cmd, args = mstar.groups()
                 cmd = cmd.lower()
                 if re.match("(?i)include$", cmd):
-                    includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
-                    path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
-
-                    includesurvexfile = models_survex.SurvexFile(path=includepath)
-                    includesurvexfile.save()
+                    includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
+                    #path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
 
-                    if includesurvexfile.exists():
-                        # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
+                    fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
+                    if os.path.isfile(fullpath):
                         #--------------------------------------------------------
                         self.depthinclude += 1
-                        fininclude = includesurvexfile.OpenFile()
-                        fcollate.write(";*include {}\n".format(includesurvexfile.path))
-                        flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
-                        push = includesurvexfile.path.lower()
+                        fininclude = open(fullpath,'r')
+                        fcollate.write(";*include {}\n".format(includepath))
+                        flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
+                        push = includepath.lower()
                         self.stackinclude.append(push)
                         #-----------------
-                        self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+                        self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate)
                         #-----------------
                         pop = self.stackinclude.pop()
                         if pop != push:
-                            message = "!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
+                            message = "!! ERROR mismatch *include pop!=push  {}".format(pop, push, self.stackinclude)
                             print(message)
                             print(message,file=flinear)
                             print(message,file=sys.stderr)
                             models.DataIssue.objects.create(parser='survex', message=message)
-                        includesurvexfile.path += "-TEMP"
-                        includesurvexfile = None
                         flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
                         fcollate.write(";*edulcni {}\n".format(pop))
                         fininclude.close()
                         self.depthinclude -= 1
                         #--------------------------------------------------------
                     else:
-                        message = "    ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath)
+                        message = "    ! ERROR *include file not found for:'{}'".format(includepath)
                         print(message)
                         print(message,file=sys.stderr)
                         models.DataIssue.objects.create(parser='survex', message=message)
@@ -659,7 +663,7 @@ class LoadingSurvex():
                         args = " "
                     popargs = self.stackbegin.pop()
                     if popargs != args.lower():
-                        message = "!!!!!!!    ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
+                        message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin)
                         print(message)
                         print(message,file=flinear)
                         print(message,file=sys.stderr)
@@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot):
     fcollate.write(";*include {}\n".format(survexfileroot.path))
     flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     #----------------------------------------------------------------
-    svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+    svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
     #----------------------------------------------------------------
     flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
     fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
@@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot):
     flinear.write("    - {:,} survex files in linear include list \n".format(len(svxfileslist)))
     flinear.close()
     fcollate.close()
-    svx_scan = None
+    svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
     print("\n -  {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
 
     mem1 = models.get_process_memory()
@@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot):
     # entrance locations currently loaded after this by LoadPos(), but could better be done before ?
     # look in MapLocations() for how we find the entrances
    
-    print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
+    print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
     svx_load = LoadingSurvex()
     
     svx_load.svxdirs[""] = survexfileroot.survexdirectory