2 files changed, 84 insertions, 30 deletions
diff --git a/databaseReset.py b/databaseReset.py
index 1eed075..c27ee7c 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -127,7 +127,7 @@ def import_auto_logbooks():
                 print(os.path.join(root, filename))
                 parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
 
-#Temporary function until definative source of data transfered.
+#Temporary function until definitive source of data transfered.
 from django.template.defaultfilters import slugify
 from django.template import Context, loader
 def dumplogbooks():
@@ -177,16 +177,16 @@ def usage():
              caves    - read in the caves 
              folklog  - read in the people (folk) and then the logbooks
              logbooks - read in just the logbooks
-             autologbooks - read in autologbooks
+             autologbooks - read in autologbooks (what are these?)
              dumplogbooks - write out autologbooks (not working?)
              people   - read in the people from folk.csv
              QMs      - read in the QM files
              resetend
              scans   - NOT the scanned surveynotes ?!
-             survex   - read in the survex files
-             survexpos
+             survex   - read in the survex files - all the survex blocks
+             survexpos - just the Pos out of the survex files
              surveys  - read in the scanned surveynotes
-             tunnel   - read in the Tunnel files
+             tunnel   - read in the Tunnel files - which scans the surveyscans too
              """)
 
 if __name__ == "__main__":
@@ -214,10 +214,7 @@ if __name__ == "__main__":
     elif "resetend" in sys.argv:
         #import_logbooks()
         import_QMs()
-        try:
-            import_tunnelfiles()
-        except:
-            print("Tunnel files parser broken.")
+        import_tunnelfiles()
         import_surveys()
         import_descriptions()
         parse_descriptions()
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 9dfa31b..9f47d4f 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -12,8 +12,9 @@ from django.utils.timezone import make_aware
 
 import csv
 import re
-import datetime
+import datetime, time
 import os
+import pickle
 
 from utils import save_carefully
 
@@ -78,10 +79,17 @@ def GetTripCave(place):  #need to be fuzzier about matching here. Already a very
         print("No cave found for place " , place)
         return
 
-
+logentries = [] # the entire logbook is a single object: a list of entries
 noncaveplaces = [ "Journey", "Loser Plateau" ]
+
 def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
     """ saves a logbook entry and related persontrips """
+    global logentries
+
+    entrytuple = (date, place, title, text, 
+        trippeople, expedition, logtime_underground, entry_type)
+    logentries.append(entrytuple)
+
     trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
     if not author:
         print("   - Skipping logentry: " + title + " - no author for entry")
@@ -100,12 +108,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
     lookupAttribs={'date':date, 'title':title}
     nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
     lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
+    #logentries.append(models.LogbookEntry)
+
     
     for tripperson, time_underground in trippersons:
         lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
         nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
-        #print nonLookupAttribs
         save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
+        #logentries.append(models.PersonTrip)
 
 
 def ParseDate(tripdate, year):
@@ -189,7 +199,7 @@ def Parseloghtmltxt(year, expedition, txt):
                           trippeople=trippeople, expedition=expedition, logtime_underground=0,
                           entry_type="html")
     if logbook_entry_count == 0:
-        print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
+        print(" - No trip entries found in logbook, check the syntax matches htmltxt format")
 
 
 # main parser for 1991 - 2001.  simpler because the data has been hacked so much to fit it
@@ -293,40 +303,87 @@ def SetDatesFromLogbookEntries(expedition):
 
 def LoadLogbookForExpedition(expedition):
     """ Parses all logbook entries for one expedition """
-        
+
+    global logentries
+    
     expowebbase = os.path.join(settings.EXPOWEB, "years")
     yearlinks = settings.LOGBOOK_PARSER_SETTINGS
 
     logbook_parseable = False
-
+    logbook_cached = False
+ 
     if expedition.year in yearlinks:
+        # print " - Valid logbook year: ", expedition.year
         year_settings = yearlinks[expedition.year]
-        file_in = open(os.path.join(expowebbase, year_settings[0]))
-        txt = file_in.read().decode("latin1")
-        file_in.close()
-        parsefunc = year_settings[1]
-        logbook_parseable = True
-        print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
-    else:
         try:
-            file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
+            bad_cache = False
+            cache_filename = os.path.join(expowebbase, year_settings[0])+".cache"
+            now = time.time()
+            cache_t = os.path.getmtime(cache_filename)
+            file_t  = os.path.getmtime(os.path.join(expowebbase, year_settings[0]))
+            if file_t - cache_t > 2: # at least 2 secs later
+                #print " - Cache is stale."
+                bad_cache= True
+            if now - cache_t > 30*24*60*60:
+                #print " - Cache is more than 30 days old."
+                bad_cache= True
+            if bad_cache:
+                print " - Cache is either stale or more than 30 days old. Deleting it."
+                os.remove(cache_filename)
+                logentries=[]
+                raise
+            print(" - Reading cache: " + cache_filename )
+            try:
+                with open(cache_filename, "rb") as f:
+                    logentries = pickle.load(f) 
+                print " - Loaded ", len(logentries), " objects"
+                logbook_cached = True
+            except:
+                print " - Failed to load corrupt cache. Deleting it.\n"
+                os.remove(cache_filename)
+                logentries=[]
+        except:
+            print(" - Opening logbook: ")
+            file_in = open(os.path.join(expowebbase, year_settings[0]))
             txt = file_in.read().decode("latin1")
             file_in.close()
+            parsefunc = year_settings[1]
             logbook_parseable = True
-            print("No set parser found using default")
-            parsefunc = settings.DEFAULT_LOGBOOK_PARSER
-        except (IOError):
-            logbook_parseable = False
-            print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
+            print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
+    else:
+            try:
+                file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
+                txt = file_in.read().decode("latin1")
+                file_in.close()
+                logbook_parseable = True
+                print("No set parser found using default")
+                parsefunc = settings.DEFAULT_LOGBOOK_PARSER
+            except (IOError):
+                logbook_parseable = False
+                print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
 
     if logbook_parseable:
         parser = globals()[parsefunc]
         parser(expedition.year, expedition, txt)
         SetDatesFromLogbookEntries(expedition)
+        # and this has also stored all the objects in logentries[]
+        print " - Storing " , len(logentries), " log entries"
+        with open(cache_filename, "wb") as f:
+            pickle.dump(logentries, f, 2)
+        logentries=[] # flush for next year
+
+    if logbook_cached:
+        i=0
+        for entrytuple in range(len(logentries)):
+            date, place, title, text, trippeople, expedition, logtime_underground, \
+                entry_type = logentries[i]
+            #print " - - obj ", i, date, title
+            EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\
+                 entry_type)
+            i +=1
 
     #return "TOLOAD: " + year + "  " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + "  " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
 
-
 def LoadLogbooks():
     """ This is the master function for parsing all logbooks into the Troggle database. """
 
@@ -372,7 +429,7 @@ def parseAutoLogBookEntry(filename):
         except models.Expedition.DoesNotExist:
             errors.append("Expedition not in database")   
     else:
-        errors.append("Expediton Year could not be parsed")   
+        errors.append("Expedition Year could not be parsed")   
 
     titleMatch = titleRegex.search(contents)
     if titleMatch: