summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--databaseReset.py15
-rw-r--r--parsers/logbooks.py99
2 files changed, 84 insertions, 30 deletions
diff --git a/databaseReset.py b/databaseReset.py
index 1eed075..c27ee7c 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -127,7 +127,7 @@ def import_auto_logbooks():
print(os.path.join(root, filename))
parsers.logbooks.parseAutoLogBookEntry(os.path.join(root, filename))
-#Temporary function until definative source of data transfered.
+#Temporary function until definitive source of data transfered.
from django.template.defaultfilters import slugify
from django.template import Context, loader
def dumplogbooks():
@@ -177,16 +177,16 @@ def usage():
caves - read in the caves
folklog - read in the people (folk) and then the logbooks
logbooks - read in just the logbooks
- autologbooks - read in autologbooks
+ autologbooks - read in autologbooks (what are these?)
dumplogbooks - write out autologbooks (not working?)
people - read in the people from folk.csv
QMs - read in the QM files
resetend
scans - NOT the scanned surveynotes ?!
- survex - read in the survex files
- survexpos
+ survex - read in the survex files - all the survex blocks
+ survexpos - just the Pos out of the survex files
surveys - read in the scanned surveynotes
- tunnel - read in the Tunnel files
+ tunnel - read in the Tunnel files - which scans the surveyscans too
""")
if __name__ == "__main__":
@@ -214,10 +214,7 @@ if __name__ == "__main__":
elif "resetend" in sys.argv:
#import_logbooks()
import_QMs()
- try:
- import_tunnelfiles()
- except:
- print("Tunnel files parser broken.")
+ import_tunnelfiles()
import_surveys()
import_descriptions()
parse_descriptions()
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 9dfa31b..9f47d4f 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -12,8 +12,9 @@ from django.utils.timezone import make_aware
import csv
import re
-import datetime
+import datetime, time
import os
+import pickle
from utils import save_carefully
@@ -78,10 +79,17 @@ def GetTripCave(place): #need to be fuzzier about matching here. Already a very
print("No cave found for place " , place)
return
-
+logentries = [] # the entire logbook is a single object: a list of entries
noncaveplaces = [ "Journey", "Loser Plateau" ]
+
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
""" saves a logbook entry and related persontrips """
+ global logentries
+
+ entrytuple = (date, place, title, text,
+ trippeople, expedition, logtime_underground, entry_type)
+ logentries.append(entrytuple)
+
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
if not author:
print(" - Skipping logentry: " + title + " - no author for entry")
@@ -100,12 +108,14 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
lookupAttribs={'date':date, 'title':title}
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
+ #logentries.append(models.LogbookEntry)
+
for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
- #print nonLookupAttribs
save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
+ #logentries.append(models.PersonTrip)
def ParseDate(tripdate, year):
@@ -189,7 +199,7 @@ def Parseloghtmltxt(year, expedition, txt):
trippeople=trippeople, expedition=expedition, logtime_underground=0,
entry_type="html")
if logbook_entry_count == 0:
- print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
+ print(" - No trip entries found in logbook, check the syntax matches htmltxt format")
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
@@ -293,40 +303,87 @@ def SetDatesFromLogbookEntries(expedition):
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """
-
+
+ global logentries
+
expowebbase = os.path.join(settings.EXPOWEB, "years")
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
logbook_parseable = False
-
+ logbook_cached = False
+
if expedition.year in yearlinks:
+ # print " - Valid logbook year: ", expedition.year
year_settings = yearlinks[expedition.year]
- file_in = open(os.path.join(expowebbase, year_settings[0]))
- txt = file_in.read().decode("latin1")
- file_in.close()
- parsefunc = year_settings[1]
- logbook_parseable = True
- print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
- else:
try:
- file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
+ bad_cache = False
+ cache_filename = os.path.join(expowebbase, year_settings[0])+".cache"
+ now = time.time()
+ cache_t = os.path.getmtime(cache_filename)
+ file_t = os.path.getmtime(os.path.join(expowebbase, year_settings[0]))
+ if file_t - cache_t > 2: # at least 2 secs later
+ #print " - Cache is stale."
+ bad_cache= True
+ if now - cache_t > 30*24*60*60:
+ #print " - Cache is more than 30 days old."
+ bad_cache= True
+ if bad_cache:
+ print " - Cache is either stale or more than 30 days old. Deleting it."
+ os.remove(cache_filename)
+ logentries=[]
+ raise
+ print(" - Reading cache: " + cache_filename )
+ try:
+ with open(cache_filename, "rb") as f:
+ logentries = pickle.load(f)
+ print " - Loaded ", len(logentries), " objects"
+ logbook_cached = True
+ except:
+ print " - Failed to load corrupt cache. Deleting it.\n"
+ os.remove(cache_filename)
+ logentries=[]
+ except:
+ print(" - Opening logbook: ")
+ file_in = open(os.path.join(expowebbase, year_settings[0]))
txt = file_in.read().decode("latin1")
file_in.close()
+ parsefunc = year_settings[1]
logbook_parseable = True
- print("No set parser found using default")
- parsefunc = settings.DEFAULT_LOGBOOK_PARSER
- except (IOError):
- logbook_parseable = False
- print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
+ print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
+ else:
+ try:
+ file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
+ txt = file_in.read().decode("latin1")
+ file_in.close()
+ logbook_parseable = True
+ print("No set parser found using default")
+ parsefunc = settings.DEFAULT_LOGBOOK_PARSER
+ except (IOError):
+ logbook_parseable = False
+ print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
if logbook_parseable:
parser = globals()[parsefunc]
parser(expedition.year, expedition, txt)
SetDatesFromLogbookEntries(expedition)
+ # and this has also stored all the objects in logentries[]
+ print " - Storing " , len(logentries), " log entries"
+ with open(cache_filename, "wb") as f:
+ pickle.dump(logentries, f, 2)
+ logentries=[] # flush for next year
+
+ if logbook_cached:
+ i=0
+ for entrytuple in range(len(logentries)):
+ date, place, title, text, trippeople, expedition, logtime_underground, \
+ entry_type = logentries[i]
+ #print " - - obj ", i, date, title
+ EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\
+ entry_type)
+ i +=1
#return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
-
def LoadLogbooks():
""" This is the master function for parsing all logbooks into the Troggle database. """
@@ -372,7 +429,7 @@ def parseAutoLogBookEntry(filename):
except models.Expedition.DoesNotExist:
errors.append("Expedition not in database")
else:
- errors.append("Expediton Year could not be parsed")
+ errors.append("Expedition Year could not be parsed")
titleMatch = titleRegex.search(contents)
if titleMatch: