indent recursion and update comments

author: Philip Sargent <philip@Muscogee.localdomain> 2020-05-14 17:21:34 +0100
committer: Philip Sargent <philip@Muscogee.localdomain> 2020-05-14 17:21:34 +0100
commit: ccc5813b3f7e2e07d2792e8e7aabb371005b4a62 (patch)
tree: 8a4ddbf824a7d8b696a0e50bb47acd2a41ab7479
parent: 314d0e8b710703706d41fbc4d2567445214509f1 (diff)
download: troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.tar.gz
troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.tar.bz2
troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.zip
3 files changed, 107 insertions, 71 deletions
diff --git a/databaseReset.py b/databaseReset.py
index 2400048..43d5e04 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -108,6 +108,7 @@ def import_tunnelfiles():
     parsers.surveys.LoadTunnelFiles()
 
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
+# These functions moved to a different file - not used currently.
 #import logbooksdump
 #def import_auto_logbooks():
 #def dumplogbooks():
@@ -172,7 +173,7 @@ class JobQueue():
         return True
 
     def runqonce(self):
-        """Run all the jobs in the queue provided once
+        """Run all the jobs in the queue provided - once
         """
         
         print "** Running job ", self.runlabel
@@ -199,6 +200,12 @@ class JobQueue():
    
     
     def run(self):
+    """First runs all the jobs in the queue against a scratch in-memory db
+    then re-runs the import against the db specified in settings.py
+    Default behaviour is to skip the in-memory phase.
+    When MySQL is the db the in-memory phase crashes as MySQL does not properly
+    relinquish some kind of db connection (not fixed yet)
+    """
         self.loadprofiles()
         # save db settings for later
         dbengine = settings.DATABASES['default']['ENGINE']
@@ -214,15 +221,15 @@ class JobQueue():
         else:
             skipmem = True
         
+        print "--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
+        #print "--  DATABASES.default", settings.DATABASES['default']
+        
         if dbname ==":memory:":
             # just run, and save the sql file
-            print "--  ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
-            print "--  DATABASES.default", settings.DATABASES['default']
             self.runqonce()
-            self.memdumpsql()
+            self.memdumpsql() # saved contents of scratch db, could be imported later..
             self.saveprofiles()
         elif skipmem:
-            print "--  DATABASES.default", settings.DATABASES['default']
             self.runqonce() 
             self.saveprofiles()
         else:
@@ -248,9 +255,9 @@ class JobQueue():
             print "--  DATABASES.default", settings.DATABASES['default']
 
             # but because the user may be expecting to add this to a db with lots of tables already there,
-            # the jobque may not start from scratch so we need to initialise the db properly first
+            # the jobqueue may not start from scratch so we need to initialise the db properly first
             # because we are using an empty :memory: database
-            # But initiating twice crashes, so be sure to do it once only.
+            # But initiating twice crashes it; so be sure to do it once only.
 
 
             # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server.
@@ -259,9 +266,9 @@ class JobQueue():
             if ("dirsredirect",dirsredirect) not in self.queue:
                 dirsredirect()
             if ("caves",import_caves) not in self.queue:
-                import_caves() # sometime extract the initialising code from this and put in reinit
+                import_caves() # sometime extract the initialising code from this and put in reinit...
             if ("people",import_people) not in self.queue:
-                import_people() # sometime extract the initialising code from this and put in reinit
+                import_people() # sometime extract the initialising code from this and put in reinit...
                 
             django.db.close_old_connections() # maybe not needed here
             
@@ -284,7 +291,6 @@ class JobQueue():
 
             django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
             #django.setup()  # should this be needed?
-
             
             self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
             self.saveprofiles()
@@ -292,7 +298,8 @@ class JobQueue():
         return True
 
     def showprofile(self):
-        """Prints out the time it took to run the jobqueue"""
+        """Prints out the time it took to run the jobqueue
+        """
         for k in self.results_order:
             if k =="dirsredirect":
                 break
@@ -306,7 +313,6 @@ class JobQueue():
                 print '%10s (s)' % k,
             percen=0
             r = self.results[k]  
-            #print "min=",min
             
             for i in range(len(r)):
                 if k == "runlabel": 
@@ -432,9 +438,9 @@ if __name__ == "__main__":
         #parse_descriptions() # no longer present
     # elif "writeCaves" in sys.argv:
         # writeCaves() # no longer present
-    elif "autologbooks" in sys.argv:
+    elif "autologbooks" in sys.argv: # untested in 2020
         import_auto_logbooks()
-    elif "dumplogbooks" in sys.argv:
+    elif "dumplogbooks" in sys.argv: # untested in 2020
         dumplogbooks()
     else:
         usage()
diff --git a/parsers/survex.py b/parsers/survex.py
index 42a8a00..e9421c5 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -14,6 +14,10 @@ import time
 from datetime import datetime, timedelta
 import sys
 
+"""A 'survex block' is a *begin...*end set of cave data.
+A 'survexscansfolder' is what we today call a "survey scans folder" or a "wallet".
+"""
+
 line_leg_regex = re.compile(r"[\d\-+.]+$")
 
 def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
@@ -99,19 +103,37 @@ regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
 regex_ref     = re.compile(r'.*?ref.*?(\d+)\s*#\s*(X)?\s*(\d+)')
 regex_star    = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
 # years from 1960 to 2039
-regex_starref = re.compile(r'^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$(?i)') 
+regex_starref = re.compile(r'^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$(?i)')
+# regex_starref = re.compile("""?x   # VERBOSE mode - can't get this to work
+# ^\s*\*ref       # look for *ref at start of line
+# [\s.:]*         # some spaces, stops or colons
+# ((?:19[6789]\d)|(?:20[0123]\d)) # a date from 1960 to 2039 - captured as one field
+# \s*#            # spaces then hash separator 
+# ?\s*(X)         # optional X - captured
+# ?\s*(.*?\d+.*?) # maybe a space, then at least one digit in the string - captured
+# $(?i)""", re.X) # the end  (do the whole thing case insensitively)
+
+ 
 regex_team    = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)")
 regex_team_member        = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)")
 regex_qm      = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
 
+insp = ""
+
 def RecursiveLoad(survexblock, survexfile, fin, textlines):
+    """Follows the *include links in all the survex files from the root file 1623.svx
+    and reads in the survex blocks, other data and the wallet references (survexscansfolder) as it
+    goes. This part of the data import process is where the maximum memory is used and where it
+    crashes on memory-constrained machines.
+    """
     iblankbegins = 0
     text = [ ]
     stardata = stardatadefault
     teammembers = [ ]
+    global insp
 
     # uncomment to print out all files during parsing 
-    print("  - Reading file: " + survexblock.survexfile.path)
+    print(insp+"  - Reading file: " + survexblock.survexfile.path + " <> " + survexfile.path)
     stamp = datetime.now()
     lineno = 0
 
@@ -119,28 +141,28 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
     path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
     if path_match:
         pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-        # print('Match')
-        # print(pos_cave)
+        # print(insp+'Match')
+        # print(insp+os_cave)
         cave = models.getCaveByReference(pos_cave)
         if cave:
             survexfile.cave = cave
     svxlines = ''
     svxlines = fin.read().splitlines()
-    # print('Cave - preloop ' + str(survexfile.cave))
-    # print(survexblock)
+    # print(insp+'Cave - preloop ' + str(survexfile.cave))
+    # print(insp+survexblock)
     for svxline in svxlines:
 
-        # print(survexblock)
+        # print(insp+survexblock)
 
-        # print(svxline)
+        # print(insp+svxline)
         # if not svxline:
-        #     print('   - Not survex')
+        #     print(insp+'   - Not survex')
         #     return
         # textlines.append(svxline)
 
         lineno += 1
 
-        # print('   - Line: %d' % lineno)
+        # print(insp+'   - Line: %d' % lineno)
 
         # break the line at the comment
         sline, comment = regex_comment.match(svxline.strip()).groups()
@@ -155,15 +177,15 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             if len(wallet)<2:
                 wallet = "0" + wallet
             refscan = "%s#%s%s" % (yr, letterx, wallet )
-            #print(' - Wallet ;ref - %s - looking for survexscansfolder' % refscan)
+            #print(insp+' - Wallet ;ref - %s - looking for survexscansfolder' % refscan)
             survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan)
             if survexscansfolders:
                 survexblock.survexscansfolder = survexscansfolders[0]
                 #survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2))
                 survexblock.save()
-                # print(' - Wallet ; ref - %s - found in survexscansfolders' % refscan)
+                # print(insp+' - Wallet ; ref - %s - found in survexscansfolders' % refscan)
             else:
-                print(' - Wallet ; ref - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet))
+                print(insp+' - Wallet ; ref - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet))
 
         # This whole section should be moved if we can have *QM become a proper survex command
         # Spec of QM in SVX files, currently commented out need to add to survex
@@ -173,7 +195,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
         # ;QM1	a	hobnob_hallway_2.42	-	junction of keyhole passage
         qmline = comment and regex_qm.match(comment)
         if qmline:
-            # print(qmline.groups())
+            # print(insp+qmline.groups())
             #(u'1', u'B', u'miraclemaze', u'1.17', u'-', None, u'\tcontinuation of rift')
             qm_no = qmline.group(1)
             qm_grade = qmline.group(2)
@@ -183,34 +205,34 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             qm_resolve_station = qmline.group(7)
             qm_notes = qmline.group(8)
 
-            # print('Cave - %s' % survexfile.cave)
-            # print('QM no %d' % int(qm_no))
-            # print('QM grade %s' % qm_grade)
-            # print('QM section %s' % qm_from_section)
-            # print('QM station %s' % qm_from_station)
-            # print('QM res section %s' % qm_resolve_section)
-            # print('QM res station %s' % qm_resolve_station)
-            # print('QM notes %s' % qm_notes)
+            # print(insp+'Cave - %s' % survexfile.cave)
+            # print(insp+'QM no %d' % int(qm_no))
+            # print(insp+'QM grade %s' % qm_grade)
+            # print(insp+'QM section %s' % qm_from_section)
+            # print(insp+'QM station %s' % qm_from_station)
+            # print(insp+'QM res section %s' % qm_resolve_section)
+            # print(insp+'QM res station %s' % qm_resolve_station)
+            # print(insp+'QM notes %s' % qm_notes)
 
             # If the QM isn't resolved (has a resolving station) then load it
             if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
                 from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
                 # If we can find a section (survex note chunck, named)
                 if len(from_section) > 0:
-                    # print(from_section[0])
+                    # print(insp+from_section[0])
                     from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station)
                     # If we can find a from station then we have the nearest station and can import it
                     if len(from_station) > 0:
-                        # print(from_station[0])
+                        # print(insp+from_station[0])
                         qm = models.QM.objects.create(number=qm_no,
                                                       nearest_station=from_station[0],
                                                       grade=qm_grade.upper(),
                                                       location_description=qm_notes)
             else:
-                # print(' - QM found but resolved')
+                # print(insp+' - QM found but resolved')
                 pass
 
-        #print('Cave -sline ' + str(cave))
+        #print(insp+'Cave -sline ' + str(cave))
         if not sline:
             continue
 
@@ -231,24 +253,24 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             if survexscansfolders:
                 survexblock.survexscansfolder = survexscansfolders[0]
                 survexblock.save()
-                # print(' - Wallet *REF - %s - found in survexscansfolders' % refscan)
+                # print(insp+' - Wallet *REF - %s - found in survexscansfolders' % refscan)
             else:
-                print(' - Wallet *REF - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet))
+                print(insp+' - Wallet *REF - %s - NOT found in survexscansfolders %s-%s-%s' % (refscan,yr,letterx,wallet))
             continue
 
         # detect the star command
         mstar = regex_star.match(sline)
         if not mstar:
             if "from" in stardata:
-                # print('Cave ' + str(survexfile.cave))
-                # print(survexblock)
+                # print(insp+'Cave ' + str(survexfile.cave))
+                # print(insp+survexblock)
                 LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave)
-                # print('   - From: ')
-                # print(stardata)
+                # print(insp+'   - From: ')
+                # print(insp+stardata)
                 pass
             elif stardata["type"] == "passage":
                 LoadSurvexLinePassage(survexblock, stardata, sline, comment)
-                # print('   - Passage: ')
+                # print(insp+'   - Passage: ')
             #Missing "station" in stardata.
             continue
 
@@ -257,24 +279,26 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
         cmd = cmd.lower()
         if re.match("include$(?i)", cmd):
             includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
-            print('   - Include path found including - ' + includepath)
+            print(insp+'   - Include path found including - ' + includepath)
             # Try to find the cave in the DB if not use the string as before
             path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
             if path_match:
                 pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                # print(pos_cave)
+                # print(insp+pos_cave)
                 cave = models.getCaveByReference(pos_cave)
                 if cave:
                     survexfile.cave = cave
             else:
-                print('    - No match in DB (i) for %s, so loading..' % includepath)
+                print(insp+'    - No match in DB (i) for %s, so loading..' % includepath)
             includesurvexfile = models.SurvexFile(path=includepath)
             includesurvexfile.save()
             includesurvexfile.SetDirectory()
             if includesurvexfile.exists():
                 survexblock.save()
                 fininclude = includesurvexfile.OpenFile()
+                insp += "> "
                 RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
+                insp = insp[2:]
 
         elif re.match("begin$(?i)", cmd):
             if line:
@@ -283,23 +307,25 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                 path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
                 if path_match:
                     pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                    # print(pos_cave)
+                    # print(insp+pos_cave)
                     cave = models.getCaveByReference(pos_cave)
                     if cave:
                         survexfile.cave = cave
                 else:
-                    print('    - No match (b) for %s' % newsvxpath)
+                    print(insp+'    - No match (b) for %s' % newsvxpath)
 
                 name = line.lower()
-                print('   - Begin found for: ' + name)
-                # print('Block cave: ' + str(survexfile.cave))
+                print(insp+'   - Begin found for: ' + name)
+                # print(insp+'Block cave: ' + str(survexfile.cave))
                 survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
                 survexblockdown.save()
                 survexblock.save()
                 survexblock = survexblockdown
-                # print(survexblockdown)
+                # print(insp+survexblockdown)
                 textlinesdown = [ ]
+                insp += "> "
                 RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
+                insp = insp[2:]
             else:
                 iblankbegins += 1
 
@@ -309,15 +335,15 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             else:
                 survexblock.text = "".join(textlines)
                 survexblock.save()
-                # print('   - End found: ')
+                # print(insp+'   - End found: ')
                 endstamp = datetime.now()
                 timetaken = endstamp - stamp
-                # print('   - Time to process: ' + str(timetaken))
+                # print(insp+'   - Time to process: ' + str(timetaken))
                 return
 
         elif re.match("date$(?i)", cmd):
             if len(line) == 10:
-                #print('   - Date found: ' + line)
+                #print(insp+'   - Date found: ' + line)
                 survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone())
                 expeditions = models.Expedition.objects.filter(year=line[:4])
                 if expeditions:
@@ -328,7 +354,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
 
         elif re.match("team$(?i)", cmd):
             pass
-            # print('   - Team found: ')
+            # print(insp+'   - Team found: ')
             mteammember = regex_team.match(line)
             if mteammember:
                 for tm in regex_team_member.split(mteammember.group(2)):
@@ -343,7 +369,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                             personrole.save()
 
         elif cmd == "title":
-            #print('   - Title found: ')
+            #print(insp+'   - Title found: ')
             survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave)
             survextitle.save()
             pass
@@ -353,11 +379,11 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
             pass
 
         elif cmd == "data":
-            #print('   - Data found: ')
+            #print(insp+'   - Data found: ')
             ls = line.lower().split()
             stardata = { "type":ls[0] }
-            #print('    - Star data: ', stardata)
-            #print(ls)
+            #print(insp+'    - Star data: ', stardata)
+            #print(insp+ls)
             for i in range(0, len(ls)):
                 stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1
             if ls[0] in ["normal", "cartesian", "nosurvey"]:
@@ -368,21 +394,21 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                 assert ls[0] == "passage", line
 
         elif cmd == "equate":
-            #print('   - Equate found: ')
+            #print(insp+'   - Equate found: ')
             LoadSurvexEquate(survexblock, line)
 
         elif cmd == "fix":
-            #print('   - Fix found: ')
+            #print(insp+'   - Fix found: ')
             survexblock.MakeSurvexStation(line.split()[0])
 
         else:
-            #print('   - Stuff')
+            #print(insp+'   - Stuff')
             if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument",
                            "calibrate", "set", "infer", "alias", "cs", "declination", "case"]:
-                print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
+                print(insp+"Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
         endstamp = datetime.now()
         timetaken = endstamp - stamp
-        # print('   - Time to process: ' + str(timetaken))
+        # print(insp+'   - Time to process: ' + str(timetaken))
 
 def LoadAllSurvexBlocks():
 
@@ -448,7 +474,7 @@ def LoadPos():
     notfoundbefore = {}
     if os.path.isfile(cachefile):
         # this is not a good test. 1623.svx may never change but *included files may have done.
-        # When the *include is unrolled, we will have a proper timestamp to use
+        # When the *include is unrolled, we will be able to get a proper timestamp to use
         # and can increase the timeout from 3 days to 30 days.
         updtsvx = os.path.getmtime(topdata + ".svx")
         updtcache = os.path.getmtime(cachefile)
diff --git a/templates/survexscansfolders.html b/templates/survexscansfolders.html
index 6250897..05c65ba 100644
--- a/templates/survexscansfolders.html
+++ b/templates/survexscansfolders.html
@@ -2,11 +2,15 @@
 {% load wiki_markup %}
 {% load survex_markup %}
 
-{% block title %}All Survey scans folders{% endblock %}
+{% block title %}All Survey scans folders (wallets){% endblock %}
 
 {% block content %}
 
-<h3>All Survey scans folders</h3>
+<h3>All Survey scans folders (wallets)</h3>
+<p>Each wallet contains the scanned original in-cave survey notes and sketches of 
+plans and elevations. It also contains scans of centre-line survex output on which
+hand-drawn passage sections are drawn. These hand-drawn passages will eventually be 
+traced to produce Tunnel or Therion drawings and eventually the final complete cave survey.
 <table>
 <tr><th>Scans folder</th><th>Files</th><th>Survex blocks</th></tr>
 {% for survexscansfolder in survexscansfolders %}
author	Philip Sargent <philip@Muscogee.localdomain>	2020-05-14 17:21:34 +0100
committer	Philip Sargent <philip@Muscogee.localdomain>	2020-05-14 17:21:34 +0100
commit	ccc5813b3f7e2e07d2792e8e7aabb371005b4a62 (patch)
tree	8a4ddbf824a7d8b696a0e50bb47acd2a41ab7479
parent	314d0e8b710703706d41fbc4d2567445214509f1 (diff)
download	troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.tar.gz troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.tar.bz2 troggle-ccc5813b3f7e2e07d2792e8e7aabb371005b4a62.zip