summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@klebos.com>2022-11-21 16:41:52 +0000
committerPhilip Sargent <philip.sargent@klebos.com>2022-11-21 16:41:52 +0000
commita795707552026b66072ff75abfa5ddc77a2cac97 (patch)
tree4dc622813d2f2e9d110676f994efd25c06e04e43 /parsers/logbooks.py
parentbcb61f9cd93030aa30bcc021e6051a2fd1202410 (diff)
downloadtroggle-a795707552026b66072ff75abfa5ddc77a2cac97.tar.gz
troggle-a795707552026b66072ff75abfa5ddc77a2cac97.tar.bz2
troggle-a795707552026b66072ff75abfa5ddc77a2cac97.zip
cache tidy and move settings into parser
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py164
1 files changed, 59 insertions, 105 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 018c051..a1df040 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -45,12 +45,43 @@ todo='''
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
'''
-
-logentries = [] # the entire logbook for one year is a single object: a list of entries
-noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
- 'base camp', 'basecamp', 'top camp', 'topcamp' ]
-logdataissues = TROG['issues']['logdataissues']
-trips ={}
+MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
+DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
+DEFAULT_LOGBOOK_FILE = "logbook.html"
+# All years since 2010 use the default value for Logbook parser
+# but several don't work, and are skipped by the parsing code, e.g. 1983
+LOGBOOK_PARSER_SETTINGS = {
+ "2010": ("logbook.html", "Parseloghtmltxt"),
+ "2009": ("2009logbook.txt", "Parselogwikitxt"),
+ "2008": ("2008logbook.txt", "Parselogwikitxt"),
+ "2007": ("logbook.html", "Parseloghtmltxt"),
+ "2006": ("logbook.html", "Parseloghtmltxt"),
+# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"),
+ "2006": ("logbook.html", "Parseloghtmltxt"),
+ "2005": ("logbook.html", "Parseloghtmltxt"),
+ "2004": ("logbook.html", "Parseloghtmltxt"),
+ "2003": ("logbook.html", "Parseloghtml03"),
+ "2002": ("logbook.html", "Parseloghtmltxt"),
+ "2001": ("log.htm", "Parseloghtml01"),
+ "2000": ("log.htm", "Parseloghtml01"),
+ "1999": ("log.htm", "Parseloghtml01"),
+ "1998": ("log.htm", "Parseloghtml01"),
+ "1997": ("log.htm", "Parseloghtml01"),
+ "1996": ("log.htm", "Parseloghtml01"),
+ "1995": ("log.htm", "Parseloghtml01"),
+ "1994": ("log.htm", "Parseloghtml01"),
+ "1993": ("log.htm", "Parseloghtml01"),
+ "1992": ("log.htm", "Parseloghtml01"),
+ "1991": ("log.htm", "Parseloghtml01"),
+ "1990": ("log.htm", "Parseloghtml01"),
+ "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL
+ "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL
+ "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL
+ "1985": ("log.htm", "Parseloghtml01"),
+ "1984": ("log.htm", "Parseloghtml01"),
+ "1983": ("log.htm", "Parseloghtml01"),
+ "1982": ("log.htm", "Parseloghtml01"),
+ }
entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
@@ -60,6 +91,11 @@ entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015":
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
+logentries = [] # the entire logbook for one year is a single object: a list of entries
+noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
+ 'base camp', 'basecamp', 'top camp', 'topcamp' ]
+logdataissues = TROG['issues']['logdataissues']
+trips ={}
#
# the logbook loading section
@@ -169,7 +205,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
# this creates the PersonTrip instance.
- save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) # PersonTrip also saved in SetDatesFromLogbookEntries
+ save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year):
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
@@ -465,34 +501,6 @@ def Parseloghtml03(year, expedition, txt):
trippeople, expedition, tu, "html03", tid)
logentries.append(entrytuple)
-def SetDatesFromLogbookEntries(expedition):
- """Sets the next and previous entry for a persontrip by setting
- persontrip_prev
- persontrip_next
- for each persontrip instance.
-
- This is ONLY needed when a logbook entry is displayed. So could be called lazily
- only when one of these entries is requested.
-
- It does NOT do what the docstring says here:
- Sets the date_from and date_to field for an expedition based on persontrips.
- Then sets the expedition date_from and date_to based on the personexpeditions.
- """
- # Probably a faster way to do this. This uses a lot of db queries, but we have all this
- # in memory..
- for personexpedition in expedition.personexpedition_set.all():
- persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
- # sequencing is difficult to do
- lprevpersontrip = None
- for persontrip in persontrips:
- persontrip.persontrip_prev = lprevpersontrip
- if lprevpersontrip:
- lprevpersontrip.persontrip_next = persontrip
- lprevpersontrip.save()
- persontrip.persontrip_next = None
- lprevpersontrip = persontrip
- #persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
-
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition
@@ -505,7 +513,7 @@ def LoadLogbookForExpedition(expedition):
logbook_parseable = False
logbook_cached = False
- yearlinks = settings.LOGBOOK_PARSER_SETTINGS
+ yearlinks = LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[]
@@ -555,85 +563,33 @@ def LoadLogbookForExpedition(expedition):
expedition.logbookfile = yearlinks[year][0]
parsefunc = yearlinks[year][1]
else:
- logbookpath = Path(expologbase) / year / settings.DEFAULT_LOGBOOK_FILE
- expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
- parsefunc = settings.DEFAULT_LOGBOOK_PARSER
+ logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
+ expedition.logbookfile = DEFAULT_LOGBOOK_FILE
+ parsefunc = DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache")
if not cache_filename.is_file():
print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
expedition.save()
logbook_cached = False
- if False: # enable cache system
- now = time.time()
- bad_cache = False # temporarily disable reading the cache - buggy
- try:
- cache_t = os.path.getmtime(cache_filename)
- if os.path.getmtime(logbookpath) - cache_t > 2: # at least 2 secs later
- print(" - ! Cache is older than the logbook file")
- bad_cache= True
- if now - cache_t > 30*24*60*60:
- print(" - ! Cache is > 30 days old")
- bad_cache= True
- if bad_cache:
- print(" - so cache is either stale or more than 30 days old. Deleting it.")
- os.remove(cache_filename)
- logentries=[]
- print(" - Deleted stale or corrupt cache file")
- raise
- try:
- # print(" - Reading cache: " + str(cache_filename), end='')
- with open(cache_filename, "rb") as f:
- year, lbsize, n, logentries = pickle.load(f)
- if validcache(year, n, lbsize):
- print(f" -- {year} : Loaded {len(logentries)} log entries")
- logbook_cached = True
- else:
- print(" !- {year} : Cache failed validity checks")
- raise
- except:
- print(" ! Failed to load corrupt cache (or I was told to ignore it). Deleting it.")
- os.remove(cache_filename)
- logentries=[]
- raise
- except :
- print(" - Cache old or de-pickle failure \"" + str(cache_filename) +"\"")
- try:
- file_in = open(logbookpath,'rb')
- txt = file_in.read().decode("utf-8")
- file_in.close()
- logbook_parseable = True
- except (IOError):
- logbook_parseable = False
- print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
- else:
- try:
- file_in = open(logbookpath,'rb')
- txt = file_in.read().decode("utf-8")
- file_in.close()
- logbook_parseable = True
- except (IOError):
- logbook_parseable = False
- print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
- except:
- logbook_parseable = False
- print(" ! Very Bad Error opening " + logbookpath)
+
+ try:
+ file_in = open(logbookpath,'rb')
+ txt = file_in.read().decode("utf-8")
+ file_in.close()
+ logbook_parseable = True
+ except (IOError):
+ logbook_parseable = False
+ print(" ! Couldn't open logbook as UTF-8 " + logbookpath)
+ except:
+ logbook_parseable = False
+ print(" ! Very Bad Error opening " + logbookpath)
if logbook_parseable:
parser = globals()[parsefunc]
print(f' - {year} parsing with {parsefunc}')
parser(year, expedition, txt) # this launches the right parser for this year
- # SetDatesFromLogbookEntries(expedition)
- # if len(logentries) >0:
- # print(" - Cacheing " , len(logentries), " log entries")
- # lbsize = logbookpath.stat().st_size
- # with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
- # logbk=(expedition,lbsize,len(logentries),logentries)
- # pickle.dump(logbk, fc, protocol=4)
- # else:
- # print(" ! NO TRIP entries found in logbook, check the syntax.")
-
i=0
for entrytuple in logentries:
# date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1 = entrytuple
@@ -644,7 +600,6 @@ def LoadLogbookForExpedition(expedition):
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
tripid1)
i +=1
- # SetDatesFromLogbookEntries(expedition)
if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
@@ -656,7 +611,6 @@ def LoadLogbookForExpedition(expedition):
def LoadLogbooks():
""" This is the master function for parsing all logbooks into the Troggle database.
- Parser settings appropriate for each year are set in settings.py LOGBOOK_PARSER_SETTINGS.
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
but must be serialised to write to database as sqlite is single-user.
"""