4 files changed, 128 insertions, 84 deletions
diff --git a/databaseReset.py b/databaseReset.py
index a4687cd..dadb2dc 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -18,7 +18,10 @@ expouser=settings.EXPOUSER
 expouserpass=settings.EXPOUSERPASS
 expouseremail=settings.EXPOUSER_EMAIL
 
-def reload_db():
+def reinit_db():
+    """Rebuild database from scratch. Deletes the file first if sqlite is used,
+    otherwise it drops the database and creates it.
+    """
     if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
         try:
             os.remove(databasename)
@@ -30,26 +33,27 @@ def reload_db():
         cursor.execute("CREATE DATABASE %s" % databasename)
         cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
         cursor.execute("USE %s" % databasename)
-    management.call_command('syncdb', interactive=False)
-    user = User.objects.create_user(expouser, expouseremail, expouserpass)
-    user.is_staff = True
-    user.is_superuser = True
-    user.save()
+    syncuser()
 
 def syncuser():
-    """Sync user - needed after reload"""
+    """Sync user - needed after reload
+    """
+    print("Synchronizing user")
     management.call_command('syncdb', interactive=False)
     user = User.objects.create_user(expouser, expouseremail, expouserpass)
     user.is_staff = True
     user.is_superuser = True
     user.save()
 
-
-def make_dirs():
-    """Make directories that troggle requires"""
+def dirsredirect():
+    """Make directories that troggle requires and sets up page redirects
+    """
     #should also deal with permissions here.
     if not os.path.isdir(settings.PHOTOS_ROOT):
         os.mkdir(settings.PHOTOS_ROOT)
+    for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
+        f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
+        f.save()
 
 def import_caves():
     import parsers.caves
@@ -58,38 +62,49 @@ def import_caves():
 
 def import_people():
     import parsers.people
+    print("Importing People (folk.csv)")
     parsers.people.LoadPersonsExpos()
 
 def import_logbooks():
     import parsers.logbooks
+    print("Importing Logbooks")
     parsers.logbooks.LoadLogbooks()
 
+def import_QMs():
+    print("Importing QMs (old caves)")
+    import parsers.QMs
+    # import process itself runs on qm.csv in only 3 caves, not 264!
+    
 def import_survex():
     import parsers.survex
+    print("Importing Survex Blocks")
     parsers.survex.LoadAllSurvexBlocks()
+    print("Importing Survex Positions")
+    parsers.survex.LoadPos()
+
+def import_survexpos(): 
+    import parsers.survex
+    print("Importing Survex Positions")
     parsers.survex.LoadPos()
 
-def import_QMs():
-    import parsers.QMs
-    # import process itself runs on qm.csv in only 3 caves, not 264!
-    
 def import_surveys():
+    """This appears to store data in unused objects. The code is kept
+    for future re-working to manage progress against notes, plans and elevs.
+    """
     import parsers.surveys
+    print("Importing surveys")
     parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
 
 def import_surveyscans():
     import parsers.surveys
+    print("Importing Survey Scans")
     parsers.surveys.LoadListScans()
 
 def import_tunnelfiles():
     import parsers.surveys
+    print("Importing Tunnel files")
     parsers.surveys.LoadTunnelFiles()
 
-def pageredirects():
-    for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
-        f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
-        f.save()
-
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 def import_auto_logbooks():
     import parsers.logbooks
@@ -154,9 +169,9 @@ class JobQueue():
         self.queue = [] # tuples of (jobname, jobfunction)
         self.results = {}
         self.results_order=[
-            "date","runlabel","reload", "caves", "people",
+            "date","runlabel","reinit", "caves", "people",
             "logbooks", "scans", "QMs", "survex",
-            "tunnel", "surveys", "test", "makedirs", "redirect", "syncuser" ]
+            "tunnel", "surveys", "test", "dirsredirect", "syncuser", "survexpos" ]
         for k in self.results_order:
             self.results[k]=[]
         self.tfile = "import_profile.json"
@@ -191,12 +206,12 @@ class JobQueue():
         self.results["runlabel"].append(self.runlabel)
 
         for i in self.queue:
-           start = time.time()
-           i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
-           duration = time.time()-start
-           print "\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration
-           self.results[i[0]].append(duration)
-
+            start = time.time()
+            i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
+            duration = time.time()-start
+            print "\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration
+            self.results[i[0]].append(duration)
+               
         with open(self.tfile, 'w') as f:
             json.dump(self.results, f)     
 
@@ -207,7 +222,7 @@ class JobQueue():
         # currently uses django db whatever it was. CHANGE this to explicitly use
         # a new sqlite3 db and then import the sql dump of that into the troggle db
         # instead of loading directly into the troggle sqlite db.
-        # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
+        # in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
         # loaded in 6 minutes that way
         djconn = django.db.connection
         from dump import _iterdump
@@ -221,52 +236,76 @@ class JobQueue():
     def showprofile(self):
         """Prints out the time it took to run the jobqueue"""
         for k in self.results_order:
-            percen=0
-            lst = self.results[k]  
-            if k == "runlabel": 
-                r =   lst[len(lst)-1]
-                print '%15s %s' % (k,r)
+            if k =="dirsredirect":
+                break
+            elif k =="syncuser":
+                break
+            elif k =="test":
+                break
             elif k =="date":
-                # Calculate dates as days before present to one decimal place
-                r =   lst[len(lst)-1]
-                if len(lst)>2:
-                    days = (lst[len(lst)-2]-r)/(24*60*60)
-                    print '%15s %8.1f days ago' % (k,days)
-            elif len(lst)>2:        
-                e = len(lst)-1
-                percen = 100* (lst[e] - lst[e-1])/lst[e-1]
-                if abs(percen) >0.1:
-                    print '%15s %8.1f%%' % (k,  percen)
-                else:
-                    print '%15s ' % (k)
+                print " days ago     ",
+            else:
+                print '%9s (s)' % k,
+            percen=0
+            r = self.results[k]  
+            #print "min=",min
+            
+            for i in range(len(r)):
+                if k == "runlabel": 
+                    if r[i]:
+                        rp =   r[i]
+                    else:
+                        rp = "      - "
+                    print '%8s' % rp,
+                elif k =="date":
+                    # Calculate dates as days before present
+                    if r[i]:
+                        if i == len(r)-1:
+                            print "    this",
+                        else:
+                            # prints one place to the left of where you expect
+                            days = (r[i]-r[len(r)-1])/(24*60*60)
+                            print '%8.2f' % days,
+                elif r[i]: 
+                    print '%8.1f' % r[i],
+                    if i == len(r)-1 and r[i-1]:
+                        percen = 100* (r[i] - r[i-1])/r[i-1]
+                        if abs(percen) >0.1:
+                            print '%8.1f%%' % percen,
+                        else:
+                            print "      -  ",
+            print ""
         return True
 
 
 def usage():
     print("""Usage is 'python databaseReset.py <command> [runlabel]'
              where command is:
-             reset     - this is normal usage, clear database and reread everything from files - time-consuming
+             reset     - normal usage: clear database and reread everything from files - time-consuming
              caves     - read in the caves 
-             logbooks  - read in just the logbooks
+             logbooks  - read in the logbooks
              people    - read in the people from folk.csv
-             QMs       - read in the QM csv files
-             reload_db - clear database (delete everything) and make empty tables
-             scans     - NOT the scanned surveynotes ?!
+             QMs       - read in the QM csv files (older caves only)
+             reinit    - clear database (delete everything) and make empty tables. Import nothing.
+             scans     - the survey scans in all the wallets
              survex    - read in the survex files - all the survex blocks
-             surveys   - read in the scanned surveynotes
-             tunnel    - read in the Tunnel files - which scans the surveyscans too
-
              survexpos - just the Pos out of the survex files (not part of reset)
 
+             tunnel    - read in the Tunnel files - which scans the survey scans too
+
              resetend     - (archaic?)
              writecaves   - *disabled* (archaic?)
              autologbooks - read in autologbooks (what are these?)
              dumplogbooks - write out autologbooks (not working?)
-             syncuser     - needed after reloading database rom SQL backup
+             syncuser     - needed after reloading database from SQL backup
+             surveys      - read in scans by expo, must run after "people". Not used.
              test         - testing...
 
              and [runlabel] is an optional string identifying this run of the script
              in the stored profiling data 'import-profile.json'
+             
+             caves and logbooks must be run on an empty db before the others as they
+             set up db tables used by the others.
              """)
 
 if __name__ == "__main__":
@@ -275,31 +314,35 @@ if __name__ == "__main__":
     import django
     django.setup()
 
-    runlabel = sys.argv[len(sys.argv)-1]
+    if len(sys.argv)>2:
+        runlabel = sys.argv[len(sys.argv)-1]
+    else: 
+        runlabel=None
+        
     jq = JobQueue(runlabel)
-
-    if "test" in sys.argv:
-        jq.enq("reload",reload_db)
-        jq.enq("makedirs",make_dirs)
+    
+    if len(sys.argv)==1:
+        usage()
+        exit()
+    elif "test" in sys.argv:
+        jq.enq("reinit",reinit_db)
+        jq.enq("dirsredirect",dirsredirect)
         jq.enq("caves",import_caves)
-        jq.enq("survex",import_survex)
-        jq.enq("surveys",import_surveys)
-
+        #jq.enq("people",import_people)
+        #jq.enq("logbooks",import_logbooks)
     elif "caves" in sys.argv:
         jq.enq("caves",import_caves)
     elif "logbooks" in sys.argv:
-        # management.call_command('syncdb', interactive=False)  # this sets the path so that import settings works in import_survex
         jq.enq("logbooks",import_logbooks)
     elif "people" in sys.argv:
-        jq.enq("logbooks",import_people)
+        jq.enq("people",import_people)
     elif "QMs" in sys.argv:
         jq.enq("QMs",import_QMs)
     elif "reload_db" in sys.argv:
         jq.enq("reload",reload_db)
     elif "reset" in sys.argv:
-        jq.enq("reload",reload_db)
-        jq.enq("makedirs",make_dirs)
-        jq.enq("redirect",pageredirects)
+        jq.enq("reinit",reinit_db)
+        jq.enq("dirsredirect",dirsredirect)
         jq.enq("caves",import_caves)
         jq.enq("people",import_people)
         jq.enq("scans",import_surveyscans)
@@ -307,16 +350,12 @@ if __name__ == "__main__":
         jq.enq("QMs",import_QMs)
         jq.enq("survex",import_survex)
         jq.enq("tunnel",import_tunnelfiles)
-        jq.enq("surveys",import_surveys)
     elif "scans" in sys.argv:
         jq.enq("scans",import_surveyscans)
     elif "survex" in sys.argv:
-        # management.call_command('syncdb', interactive=False)  # this sets the path so that import settings works in import_survex
         jq.enq("survex",import_survex)
     elif "survexpos" in sys.argv:
-        # management.call_command('syncdb', interactive=False)  # this sets the path so that import settings works in import_survex
-        import parsers.survex
-        jq.enq("survexpos",parsers.survex.LoadPos)
+        jq.enq("survexpos",import_survexpos)
     elif "surveys" in sys.argv:
         jq.enq("surveys",import_surveys)
     elif "tunnel" in sys.argv:
@@ -336,8 +375,9 @@ if __name__ == "__main__":
     elif "dumplogbooks" in sys.argv:
         dumplogbooks()
     else:
-        print("%s not recognised" % sys.argv)
         usage()
+        print("%s not recognised as a command." % sys.argv[1])
+        exit()
 
     jq.run()
     jq.showprofile()
diff --git a/parsers/caves.py b/parsers/caves.py
index 606007f..4f65675 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -152,7 +152,7 @@ def readcave(filename):
                               slug = slug,
                               primary = primary)
                 except:
-                    message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+                    message = " ! Can't find text (slug): %s, skipping %s" % (slug, context)
                     models.DataIssue.objects.create(parser='caves', message=message)
                     print(message)
                     
@@ -164,7 +164,7 @@ def readcave(filename):
                     entrance = models.Entrance.objects.get(entranceslug__slug = slug)
                     ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                 except:
-                    message = "Entrance text (slug) %s missing %s" % (slug, context)
+                    message = " ! Entrance text (slug) %s missing %s" % (slug, context)
                     models.DataIssue.objects.create(parser='caves', message=message)
                     print(message)
                 
@@ -172,14 +172,14 @@ def readcave(filename):
 def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
     items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
     if len(items) < minItems and printwarnings:
-        message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
+        message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                            "itemname": itemname,
                                                                            "min": minItems} + context
         models.DataIssue.objects.create(parser='caves', message=message)
         print(message)
         
     if maxItems is not None and len(items) > maxItems and printwarnings:
-        message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
+        message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
                                                                                "itemname": itemname,
                                                                                "max": maxItems} + context
         models.DataIssue.objects.create(parser='caves', message=message)
diff --git a/parsers/people.py b/parsers/people.py
index f7e2f50..32ab2c5 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -58,7 +58,7 @@ def LoadPersonsExpos():
     header = dict(zip(headers, range(len(headers))))
     
     # make expeditions
-    print("Loading expeditions")
+    print(" - Loading expeditions")
     years = headers[5:]
     
     for year in years:
@@ -68,7 +68,7 @@ def LoadPersonsExpos():
         save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
 
     # make persons
-    print("Loading personexpeditions")
+    print(" - Loading personexpeditions")
 
     for personline in personreader:
         name = personline[header["Name"]]
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 9bd063d..450725c 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None):
                 scanObj.save()
     except (IOError, OSError):
         yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
-        print("No folder found for " + expedition.year + " at:- " + yearPath)
+        print(" ! No folder found for " + expedition.year + " at:- " + yearPath)
 
 # dead
 def parseSurveys(logfile=None):
     try:
         readSurveysFromCSV()
     except (IOError, OSError):
-        print("Survey CSV not found..")
+        print(" ! Survey CSV not found..")
         pass
     
+    print " - Loading scans by expedition year"
     for expedition in Expedition.objects.filter(year__gte=2000):   #expos since 2000, because paths and filenames were nonstandard before then
+        print "%s" % expedition,
         parseSurveyScans(expedition)
 
 # dead
@@ -190,7 +192,7 @@ def GetListDir(sdir):
 def LoadListScansFile(survexscansfolder):
     gld = [ ]
     
-    # flatten out any directories in these book files
+    # flatten out any directories in these wallet folders - should not be any
     for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
         if fisdiryf:
             gld.extend(GetListDir(ffyf))
@@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder):
     
     for (fyf, ffyf, fisdiryf) in gld:
         #assert not fisdiryf, ffyf
-        if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
+        if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
             survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
             survexscansingle.save()
 
@@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder):
 # and builds up the models we can access later
 def LoadListScans():
 
-    print('Loading Survey Scans...')
+    print(' - Loading Survey Scans... (deleting all objects first)')
 
     SurvexScanSingle.objects.all().delete()
     SurvexScansFolder.objects.all().delete()
@@ -221,12 +223,14 @@ def LoadListScans():
         
     
     # iterate into the surveyscans directory
+    print ' - ',
     for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
         if not fisdir:
             continue
         
         # do the year folders
         if re.match(r"\d\d\d\d$", f):
+            print "%s" % f,
             for fy, ffy, fisdiry in GetListDir(ff):
                 if fisdiry:
                     assert fisdiry, ffy