summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/views/logbooks.py16
-rw-r--r--parsers/logbooks.py64
2 files changed, 55 insertions, 25 deletions
diff --git a/core/views/logbooks.py b/core/views/logbooks.py
index ecf0f6b..82dbf61 100644
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -176,13 +176,17 @@ def personexpedition(request, first_name='', last_name='', year=''):
def logbookentry(request, date, slug):
this_logbookentry = LogbookEntry.objects.filter(date=date, slug=slug)
-
- if len(this_logbookentry)>1:
- return render(request, 'object_list.html',{'object_list':this_logbookentry})
+
+ if this_logbookentry:
+ if len(this_logbookentry)>1:
+ return render(request, 'object_list.html',{'object_list':this_logbookentry})
+ else:
+ this_logbookentry=this_logbookentry[0]
+ return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry})
else:
- this_logbookentry=this_logbookentry[0]
- return render(request, 'logbookentry.html', {'logbookentry': this_logbookentry})
-
+ msg =(f' Logbook entry slug:"{slug}" not found in database on date:"{date}" ')
+ print(msg)
+ return render(request, 'errors/generic.html',{'message':msg})
def logbookSearch(request, extra):
query_string = ''
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index c9d7796..8237bdc 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -71,13 +71,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
tripperson = tripperson.strip()
- if not tid:
- tid = expedition.year + "." + tripperson + datetime.now().strftime("%S%f") # no good. Should be getting the tid
mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
if mul:
tripperson = mul.group(1).strip()
if tripperson and tripperson[0] != '*':
tripperson = re.sub(round_bracket_regex, "", tripperson).strip()
+
+ if tripperson =="Wiggy":
+ tripperson = "Phil Wigglesworth"
+ if tripperson =="Animal":
+ tripperson = "Mike Richardson"
+
+
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
if not personyear:
message = f" ! - {expedition.year} No name match for: '{tripperson}' "
@@ -91,6 +96,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
if not res:
return None, None
author = res[-1][0]
+
return res, author
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki", tid=None):
@@ -107,11 +113,11 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
return
if not author:
- message = f" ! - {expedition.year} Skipping logentry: {title} - - no author for entry in year "
+ message = f" ! - {expedition.year} Warning: logentry: {title} - no author for entry in year "
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues["title"]=message
print(message)
- return
+ #return
# This needs attention. The slug field is derived from 'title'
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
@@ -298,7 +304,7 @@ def Parseloghtmltxt(year, expedition, txt):
"html", tripid1, logbook_entry_count, tid=tid)
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
-# trying it out for years 1982 - 1990 too. Some logbook editing required by hand..
+# trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place
def Parseloghtml01(year, expedition, txt):
global logentries
global logdataissues
@@ -309,7 +315,9 @@ def Parseloghtml01(year, expedition, txt):
for trippara in tripparas:
logbook_entry_count += 1
tid = set_trip_id(year,logbook_entry_count)
+ # print(f" #0 - tid: {tid}")
try:
+ #print(f" #1 - tid: {tid}")
s = re.match(r"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
if not s:
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
@@ -317,22 +325,40 @@ def Parseloghtml01(year, expedition, txt):
logdataissues[tid]=message
print(message)
break
- tripheader, triptext = s.group(1), s.group(2)
- mtripid = re.search(r'<a id="(.*?)"', tripheader)
+ try:
+ tripheader, triptext = s.group(1), s.group(2)
+ except:
+ message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues[tid]=message
+ print(message)
+
+
+ # mtripid = re.search(r'<a id="(.*?)"', tripheader)
# if not mtripid:
- # # not an error, this is probabluy jusyt a different year
- # message = f" ! - Fail id trip:{tid} header:'{tripheader}'"
+ # message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
# DataIssue.objects.create(parser='logbooks', message=message)
# logdataissues[tid]=message
# print(message)
- tripid = mtripid and mtripid.group(1) or ""
- #print(f" # - mtripid: {mtripid}")
+ # tripid = mtripid and mtripid.group(1) or ""
+ # print(f" # - mtripid: {mtripid}")
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
-
- tripdate, triptitle, trippeople = tripheader.split("|")
+ #print(f" #2 - tid: {tid}")
+ try:
+ tripdate, triptitle, trippeople = tripheader.split("|")
+ except:
+ message = f" ! - Fail to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues[tid]=message
+ print(message)
+ tripdate, triptitle = tripheader.split("|")
+ trippeople = "anon"
+ #print(f" #3 - tid: {tid}")
ldate = ParseDate(tripdate.strip(), year)
-
+ #print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
+ #print(f" #4 - tid: {tid}")
+
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
if mtu:
tu = mtu.group(1)
@@ -363,7 +389,7 @@ def Parseloghtml01(year, expedition, txt):
entrytuple = (ldate, tripcave, triptitle, ltriptext,
- trippeople, expedition, tu, "html01", tripid)
+ trippeople, expedition, tu, "html01", tid)
logentries.append(entrytuple)
try:
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
@@ -377,7 +403,7 @@ def Parseloghtml01(year, expedition, txt):
try:
EnterLogIntoObjStore(year, ldate, tripcave, triptitle, ltriptext, trippeople, tu,
- "html01", tripid, logbook_entry_count, tid=tid)
+ "html01", tid, logbook_entry_count, tid=tid)
except:
message = " ! - Enter log entry into ObjectStore FAIL exception in: " + tid
DataIssue.objects.create(parser='logbooks', message=message)
@@ -577,7 +603,7 @@ def LoadLogbookForExpedition(expedition, expect):
SetDatesFromLogbookEntries(expedition)
if len(logentries) >0:
print(" - Cacheing " , len(logentries), " log entries")
- with open(cache_filename, "wb") as fc:
+ with open(cache_filename, "wb") as fc: # we much check that permission are g+w ! or expo can't delete the cache
logbk=(expedition,len(logentries),logentries)
pickle.dump(logbk, fc, protocol=4)
else:
@@ -608,14 +634,14 @@ def LoadLogbooks():
if len(expos) <= 1:
print(" ! No expeditions found. Load 'people' first.\n")
nologbook = ["1976", "1977", "1978", "1979", "1980", "1981",
- "1987", "1988", "1989",
+ "1987", "1988", "1989", # needs more hand-editing of log.htm
"1986", "2020",]
entries = {"2021": 0, "2019": 20, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
- "1985": 21,"1984": 19,"1983": 22,"1982": 42,}
+ "1985": 22,"1984": 32,"1983": 52,"1982": 42,}
# Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, or need hand-editing.
try:
os.remove("loadlogbk.log")