summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py117
1 files changed, 66 insertions, 51 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index e91c69e..0a3443a 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -31,6 +31,8 @@ todo='''
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
+-- far too many uses of Django field dereferencing to get values, which is SLOW
+
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
or it is broken/incomplete and need hand-editing.
@@ -61,6 +63,14 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
logdataissues = TROG['issues']['logdataissues']
trips ={}
+entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
+ "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
+ "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
+ "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
+ "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
+ "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
+# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
+
#
# the logbook loading section
@@ -534,19 +544,25 @@ def SetDatesFromLogbookEntries(expedition):
#persontrip.save() # also saved in EnterLogIntoDbase. MERGE these to speed up import.
-def LoadLogbookForExpedition(expedition, expect):
+def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition
If a cache is found it uses it. If not found, or fails sanity checks, parses source file.
"""
# absolutely horrid. REFACTOR THIS (all my fault..)
global logentries
global logdataissues
+ global entries
+
logbook_parseable = False
logbook_cached = False
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
expologbase = os.path.join(settings.EXPOWEB, "years")
logentries=[]
+ year = expedition.year
+ expect = entries[year]
+ # print(" - Logbook for: " + year)
+
def validcache(year,n):
if year != expedition:
print(" ! year != expedition ",year, expedition )
@@ -578,14 +594,14 @@ def LoadLogbookForExpedition(expedition, expect):
for i in dellist:
del logdataissues[i]
- cleanerrors(expedition.year)
+ cleanerrors(year)
- if expedition.year in yearlinks:
- logbookpath = Path(expologbase) / expedition.year / yearlinks[expedition.year][0]
- expedition.logbookfile = yearlinks[expedition.year][0]
- parsefunc = yearlinks[expedition.year][1]
+ if year in yearlinks:
+ logbookpath = Path(expologbase) / year / yearlinks[year][0]
+ expedition.logbookfile = yearlinks[year][0]
+ parsefunc = yearlinks[year][1]
else:
- logbookpath = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE)
+ logbookpath = os.path.join(expologbase, year, settings.DEFAULT_LOGBOOK_FILE)
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
cache_filename = Path(str(logbookpath) + ".cache")
@@ -616,7 +632,7 @@ def LoadLogbookForExpedition(expedition, expect):
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
if validcache(year,n):
- print(" -- Loaded ", len(logentries), " log entries")
+ print(f" -- {year} : Loaded {len(logentries)} log entries")
logbook_cached = True
else:
print(" !- Told to expect ", expect, " but ", len(logentries), " found in cache")
@@ -640,7 +656,7 @@ def LoadLogbookForExpedition(expedition, expect):
if logbook_parseable:
parser = globals()[parsefunc]
print(f' - Using parser {parsefunc}')
- parser(expedition.year, expedition, txt) # this launches the right parser for this year
+ parser(year, expedition, txt) # this launches the right parser for this year
SetDatesFromLogbookEntries(expedition)
if len(logentries) >0:
@@ -656,10 +672,17 @@ def LoadLogbookForExpedition(expedition, expect):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = entrytuple
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
entry_type, tripid1)
- EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
+ EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, logtime_underground,
entry_type, tripid1, i)
i +=1
SetDatesFromLogbookEntries(expedition)
+
+ if len(logentries) == expect:
+ # print(f"OK {year} {len(logentries):5d} is {expect}\n")
+ pass
+ else:
+ print(f"BAD {year} {len(logentries):5d} is not {expect}\n")
+
return len(logentries)
def LoadLogbooks():
@@ -668,6 +691,7 @@ def LoadLogbooks():
This should be rewritten to use coroutines to load all logbooks from disc in parallel.
"""
global logdataissues
+ global entries
logdataissues = {}
DataIssue.objects.filter(parser='logbooks').delete()
@@ -682,51 +706,42 @@ def LoadLogbooks():
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail
- entries = { "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
- "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
- "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
- "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
- "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
- "1985": 24,"1984": 32,"1983": 52,"1982": 42,}
- # Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
- try:
- os.remove("loadlogbk.log")
- except OSError:
- pass
+
nlbe={}
expd ={}
- with open("loadlogbk.log", "a") as log:
- for expo in expos:
- TROG['pagecache']['expedition'][expo.year] = None # clear cache
- if expo.year in sqlfail:
- print(" - Logbook for: " + expo.year + " NO parsing attempted - known sql failures")
- message = f" ! - Not even attempting to parse logbook for {expo.year} until code fixed"
- DataIssue.objects.create(parser='logbooks', message=message)
- logdataissues[f"sqlfail {expo.year}"]=message
- print(message)
+ actuals = []
+
+ for expo in expos:
+ year = expo.year
+ TROG['pagecache']['expedition'][year] = None # clear cache
+ if year in sqlfail:
+ print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
+ message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues[f"sqlfail {year}"]=message
+ print(message)
- if expo.year not in nologbook:
- print((" - Logbook for: " + expo.year))
- if expo.year in entries:
- numentries = LoadLogbookForExpedition(expo, entries[expo.year]) # this actually loads the logbook for one year
- log.write("{} {:5d} should be {}\n".format(expo.year, numentries, entries[expo.year]))
- nlbe[expo.year]=numentries
- expd[expo.year]= 0
- else:
- print(" - No Logbook yet for: " + expo.year) # catch case when preparing for next expo
- print("** total trips in ObjStore:", len(trips))
- #for i in logdataissues:
- # print("{:15s}: {}".format(i, logdataissues[i]))
+ if year not in nologbook:
+ if year in entries:
+ actuals.append(expo)
+ else:
+ print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
+
+ for ex in actuals:
+ nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
- for lbe in trips:
- year, date, tripcave, triptitle, text, trippeople, tu, formattype = trips[lbe]
- expd[year] += 1
- yt=0
- for y in expd:
- # print("{} {}".format(y, expd[y]), nlbe[y])
- yt += expd[y]
- print("total {} log entries in all expeditions".format(yt))
-
+ # tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
+ # yt = 0
+ # for r in map(LoadLogbookForExpedition, actuals):
+ # yt = r
+
+ yt = 0
+ for e in nlbe:
+ yt += nlbe[e]
+ print(f"total {yt:,} log entries parsed in all expeditions")
+ if yt != len(trips):
+ print(f"** total trips in ObjStore:{len(trips):,}")
+
try:
shelvfilenm = 'logbktrips.shelve' # ".db" automatically apended after python 3.8
with shelve.open(shelvfilenm, writeback=True) as odb: