3 files changed, 80 insertions, 128 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index 025438e..d96b665 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,7 +1,7 @@
 import csv
 import os
 import re
-#from datetime import *
+from datetime import date
 
 from django.conf import settings
 
diff --git a/parsers/survex.py b/parsers/survex.py
index f236ec8..a290a83 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2,6 +2,8 @@ import sys
 import os
 import re
 import time
+import resource
+
 from datetime import datetime, timedelta
 from subprocess import call, Popen, PIPE
 
@@ -29,14 +31,12 @@ ROOTBLOCK = "rootblock"
 
 
 def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
+    """This reads compass, clino and tape data but only keeps the tape lengths,
+    the rest is discarded after error-checking.
+    """
     global survexlegsalllength
     global survexlegsnumber
-    # The try catches here need replacing as they are relatively expensive
     ls = sline.lower().split()
-    #ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]]) 
-    #ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
-
-#    survexleg = models_survex.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
     survexleg = models_survex.SurvexLeg()
     # this next fails for two surface survey svx files which use / for decimal point 
     # e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
@@ -97,11 +97,9 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
         if cave:
             survexleg.cave = cave
 
-        # only save proper legs
         # No need to save as we are measuring lengths only on parsing now.
         # delete the object so that django autosaving doesn't save it.
         survexleg = None
-        #survexleg.save()
 
     itape = stardata.get("tape")
     if itape:
@@ -111,7 +109,6 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
         except ValueError:
             print("! Length not added")
         # No need to save as we are measuring lengths only on parsing now.
-        #survexblock.save()
 
 
 def LoadSurvexEquate(survexblock, sline):
@@ -146,12 +143,12 @@ rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*
 # $(?i)""", re.X) # the end  (do the whole thing case insensitively)
 
 rx_team    = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
-rx_team_member        = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
+rx_person        = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
 rx_qm      = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
 
 insp = ""
 callcount = 0
-def RecursiveLoad(survexblock, survexfile, fin, textlines):
+def RecursiveLoad(survexblock, survexfile, fin):
     """Follows the *include links in all the survex files from the root file 1623.svx
     and reads in the survex blocks, other data and the wallet references (survexscansfolder) as it
     goes. This part of the data import process is where the maximum memory is used and where it
@@ -165,7 +162,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
     global callcount
     global survexlegsnumber
 
-    print(insp+"  - Reading file: " + survexblock.survexfile.path + " <> " + survexfile.path)
+    print(insp+"  - MEM:{} Reading. parent:{}  <> {} ".format(get_process_memory(),survexblock.survexfile.path,survexfile.path))
     stamp = datetime.now()
     lineno = 0
     
@@ -199,12 +196,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             if len(wallet)<2:
                 wallet = "0" + wallet
             refscan = "%s#%s%s" % (yr, letterx, wallet )
-            survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
-            if survexscansfolders:
-                survexblock.survexscansfolder = survexscansfolders[0]
+            manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
+            if manyscansfolders:
+                survexblock.survexscansfolder = manyscansfolders[0]
                 survexblock.save()
             else:
-                message = ' ! Wallet ; ref {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
+                message = ' ! Wallet ; ref {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
                 print((insp+message))
                 models.DataIssue.objects.create(parser='survex', message=message)
 
@@ -265,12 +262,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
             assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
             refscan = "%s#%s%s" % (yr, letterx, wallet)
-            survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
-            if survexscansfolders:
-                survexblock.survexscansfolder = survexscansfolders[0]
+            manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
+            if manyscansfolders:
+                survexblock.survexscansfolder = manyscansfolders[0]
                 survexblock.save()
             else:
-                message = ' ! Wallet *REF {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
+                message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
                 print((insp+message))
                 models.DataIssue.objects.create(parser='survex', message=message)
             continue
@@ -291,17 +288,18 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
         cmd = cmd.lower()
         if re.match("include$(?i)", cmd):
             includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
-            print((insp+'   - Include path found including - ' + includepath))
-            # Try to find the cave in the DB if not use the string as before
+            print((insp+'   - Include path found, including - ' + includepath))
+            # Try to find the cave in the DB. if not, use the string as before
             path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
             if path_match:
                 pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                # print(insp+pos_cave)
+                print((insp+'    - Match in DB (i) for cave {}.'.format(pos_cave)))
                 cave = models_caves.getCaveByReference(pos_cave)
                 if cave:
                     survexfile.cave = cave
             else:
-                print((insp+'    - No match in DB (i) for %s, so loading..' % includepath))
+                print((insp+'    - NO Match in DB (i) for a cave for {}'.format(includepath)))
+
             includesurvexfile = models_survex.SurvexFile(path=includepath)
             includesurvexfile.save()
             includesurvexfile.SetDirectory()
@@ -309,10 +307,17 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                 survexblock.save()
                 fininclude = includesurvexfile.OpenFile()
                 insp += "> "
-                RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
+                RecursiveLoad(survexblock, includesurvexfile, fininclude)
+                #--------------------------------------------------------
+                fininclude.close()
                 insp = insp[2:]
+            else:
+                print((insp+'    ! ERROR *include file not found for %s' % includesurvexfile))
 
         elif re.match("begin$(?i)", cmd):
+            # On a *begin statement we start a new survexblock.
+            # There should not be any *include inside  a begin-end block, so this is a simple
+            # load not a recursive load. But there may be many blocks in one file.
             if line:
                 newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
                 # Try to find the cave in the DB if not use the string as before
@@ -328,16 +333,16 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
 
                 previousnlegs = survexlegsnumber
                 name = line.lower()
-                print((insp+'   - Begin found for: ' + name))
-#                survexblockdown = models_survex.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
+                print(insp+'   - Begin found for:{}, creating new SurvexBlock '.format(name))
                 survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, survexpath=survexblock.survexpath+"."+name, 
                         cave=survexfile.cave, survexfile=survexfile, legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
                 survexblockdown.save()
                 survexblock.save()
                 survexblock = survexblockdown
-                textlinesdown = [ ]
                 insp += "> "
-                RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
+                RecursiveLoad(survexblockdown, survexfile, fin)
+                #--------------------------------------------------------
+                # do not close the file as there may be more blocks in this one
                 insp = insp[2:]
             else:
                 iblankbegins += 1
@@ -346,7 +351,6 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             if iblankbegins:
                 iblankbegins -= 1
             else:
-                # .text not used, using it for number of legs per block
                 legsinblock = survexlegsnumber - previousnlegs
                 print(insp+"LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,survexlegsnumber))
                 survexblock.legsall = legsinblock
@@ -370,7 +374,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             # print(insp+'   - Team found: ')
             mteammember = rx_team.match(line)
             if mteammember:
-                for tm in rx_team_member.split(mteammember.group(2)):
+                for tm in rx_person.split(mteammember.group(2)):
                     if tm:
                         personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
                         if (personexpedition, tm) not in teammembers:
@@ -431,6 +435,31 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
         timetaken = endstamp - stamp
         # print(insp+'   - Time to process: ' + str(timetaken))
 
+def get_process_memory():
+    usage=resource.getrusage(resource.RUSAGE_SELF)
+    return usage[2]/1024.0 
+
+
+def FindAndLoadAllSurvex(survexblockroot, survexfileroot):
+    """Follows the *include links recursively to find files
+    """
+    print('  - redirecting stdout to loadsurvexblks.log...')
+    stdout_orig = sys.stdout
+    # Redirect sys.stdout to the file
+    sys.stdout = open('loadsurvexblks.log', 'w')
+
+    finroot = survexfileroot.OpenFile()
+    RecursiveLoad(survexblockroot, survexfileroot, finroot)
+    finroot.close()
+    
+    # Close the logging file
+    sys.stdout.close()
+    print("+", file=sys.stderr)
+    sys.stderr.flush();
+    # Restore sys.stdout to our old saved file handler
+    sys.stdout = stdout_orig
+
+
 def LoadAllSurvexBlocks():
     global survexlegsalllength
     global survexlegsnumber
@@ -441,47 +470,28 @@ def LoadAllSurvexBlocks():
     models_survex.SurvexFile.objects.all().delete()
     models_survex.SurvexDirectory.objects.all().delete()
     models_survex.SurvexEquate.objects.all().delete()
-    #models_survex.SurvexLeg.objects.all().delete()
     models_survex.SurvexTitle.objects.all().delete()
     models_survex.SurvexPersonRole.objects.all().delete()
     models_survex.SurvexStation.objects.all().delete()
 
     print(" - Data flushed")
-    # Clear the data issues as we are reloading
     models.DataIssue.objects.filter(parser='survex').delete()
-    print(' - Loading All Survex Blocks...')
-    
-    print('  - redirecting stdout to loadsurvexblks.log...')
-    stdout_orig = sys.stdout
-    # Redirect sys.stdout to the file
-    sys.stdout = open('loadsurvexblks.log', 'w')
-
-    survexfile = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
-    survexfile.save()
-    survexfile.SetDirectory()
-
-    #Load all
-    # this is the first so id=1
-    survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfile, 
+ 
+    survexfileroot = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
+    survexfileroot.save()
+    survexfileroot.SetDirectory()
+    survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot, 
             legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+    # this is the first so id=1
     survexblockroot.save()
-    fin = survexfile.OpenFile()
-    textlines = [ ]
-    # The real work starts here
-    RecursiveLoad(survexblockroot, survexfile, fin, textlines)
-    fin.close()
+
+    print(' - Loading All Survex Blocks...')
+    FindAndLoadAllSurvex(survexblockroot, survexfileroot)
+
     survexblockroot.totalleglength = survexlegsalllength
     survexblockroot.legsall = survexlegsnumber
-    #survexblockroot.text = "".join(textlines) these are all blank
     survexblockroot.save()
     
-    # Close the file
-    sys.stdout.close()
-    print("+", file=sys.stderr)
-    sys.stderr.flush();
-    
-    # Restore sys.stdout to our old saved file handler
-    sys.stdout = stdout_orig
     print(" - total number of survex legs: {}m".format(survexlegsnumber))
     print(" - total leg lengths loaded: {}m".format(survexlegsalllength))
     print(' - Loaded All Survex Blocks.')
@@ -500,48 +510,6 @@ def LoadPos():
     topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
     print((' - Generating a list of Pos from %s.svx and then loading...' % (topdata)))
 
-    # TO DO - remove the cache file apparatus. Not needed. Only laser points and entrances loaded now.
-    
-    # Be careful with the cache file. 
-    # If LoadPos has been run before, 
-    # but without cave import being run before,
-    # then *everything* may be in the fresh  'not found' cache file. 
-    
-    # cachefile = settings.SURVEX_DATA + "posnotfound.cache"
-    # notfoundbefore = {}
-    # if os.path.isfile(cachefile):
-        # # this is not a good test. 1623.svx may never change but *included files may have done.
-        # # When the *include is unrolled, we will be able to get a proper timestamp to use
-        # # and can increase the timeout from 3 days to 30 days.
-        # updtsvx = os.path.getmtime(topdata + ".svx")
-        # updtcache = os.path.getmtime(cachefile)
-        # age = updtcache - updtsvx
-        # print(('   svx: %s    cache: %s    not-found cache is fresher by: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) ))))
-        
-        # now = time.time()
-        # if now - updtcache > 3*24*60*60:
-            # print("   cache is more than 3 days old. Deleting.")
-            # os.remove(cachefile)
-        # elif age < 0 :
-            # print("   cache is stale. Deleting.")
-            # os.remove(cachefile)
-        # else:
-            # print("   cache is fresh. Reading...")
-            # try:
-                # with open(cachefile, "r") as f:
-                    # for line in f:
-                        # l = line.rstrip()
-                        # if l in notfoundbefore:
-                            # notfoundbefore[l] +=1 # should not be duplicates
-                            # print(" DUPLICATE ", line, notfoundbefore[l])
-                        # else:
-                            # notfoundbefore[l] =1
-            # except:
-                # print("   FAILURE READ opening cache file %s" % (cachefile))
-                # raise
-            
-    
-#    notfoundnow =[]
     found = 0
     skip = {}
     print("\n") # extra line because cavern overwrites the text buffer somehow
@@ -615,18 +583,5 @@ def LoadPos():
                         models.DataIssue.objects.create(parser='survex', message=message)
                         raise
 
-    #print(" - %s failed lookups of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip)))
     print(" - {} SurvexStation entrances found.".format(found))
 
-    # if found > 10: # i.e. a previous cave import has been done
-        # try:
-            # with open(cachefile, "w") as f:
-                # c = len(notfoundnow)+len(skip)
-                # for i in notfoundnow:
-                    # pass #f.write("%s\n" % i)
-                # for j in skip:
-                    # pass #f.write("%s\n" % j) # NB skip not notfoundbefore
-                # print(('   Not-found cache file written: %s entries' % c))
-        # except:
-            # print("   FAILURE WRITE opening cache file %s" % (cachefile))
-            # raise
-\ No newline at end of file
diff --git a/parsers/surveys.py b/parsers/surveys.py
index baa8725..f3edebe 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -1,6 +1,3 @@
-from __future__ import (absolute_import, division,
-                        print_function, unicode_literals)
-
 import sys
 import os
 import types
@@ -17,7 +14,7 @@ from functools import reduce
 import settings
 #from troggle.core.models import *
 #from troggle.core.models_caves import *
-#from troggle.core.models_survex import *
+from troggle.core.models_survex import SurvexScanSingle, ScansFolder, TunnelFile
 
 def get_or_create_placeholder(year):
     """ All surveys must be related to a logbookentry. We don't have a way to
@@ -83,15 +80,15 @@ def LoadListScans():
     print(' - Loading Survey Scans')
 
     SurvexScanSingle.objects.all().delete()
-    SurvexScansFolder.objects.all().delete()
+    ScansFolder.objects.all().delete()
     print(' - deleting all scansFolder and scansSingle objects')
 
     # first do the smkhs (large kh survey scans) directory
-    survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") 
+    manyscansfoldersmkhs = ScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") 
     print("smkhs", end=' ')
-    if os.path.isdir(survexscansfoldersmkhs.fpath):
-        survexscansfoldersmkhs.save()
-        LoadListScansFile(survexscansfoldersmkhs)
+    if os.path.isdir(manyscansfoldersmkhs.fpath):
+        manyscansfoldersmkhs.save()
+        LoadListScansFile(manyscansfoldersmkhs)
         
     
     # iterate into the surveyscans directory
@@ -106,13 +103,13 @@ def LoadListScans():
             for fy, ffy, fisdiry in GetListDir(ff):
                 if fisdiry:
                     assert fisdiry, ffy
-                    survexscansfolder = SurvexScansFolder(fpath=ffy, walletname=fy)
+                    survexscansfolder = ScansFolder(fpath=ffy, walletname=fy)
                     survexscansfolder.save()
                     LoadListScansFile(survexscansfolder)
         
         # do the 
         elif f != "thumbs":
-            survexscansfolder = SurvexScansFolder(fpath=ff, walletname=f)
+            survexscansfolder = ScansFolder(fpath=ff, walletname=f)
             survexscansfolder.save()
             LoadListScansFile(survexscansfolder)
             
@@ -121,7 +118,7 @@ def FindTunnelScan(tunnelfile, path):
     scansfolder, scansfile = None, None
     mscansdir = re.search(rb"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
     if mscansdir:
-        scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
+        scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1))
         if len(scansfolderl):
             assert len(scansfolderl) == 1
             scansfolder = scansfolderl[0]
@@ -136,7 +133,7 @@ def FindTunnelScan(tunnelfile, path):
                 scansfile = scansfilel[0]
             
         if scansfolder:
-            tunnelfile.survexscansfolders.add(scansfolder)
+            tunnelfile.manyscansfolders.add(scansfolder)
         if scansfile:
             tunnelfile.survexscans.add(scansfile)