summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py39
1 files changed, 30 insertions, 9 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index f507861..467ff92 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -126,17 +126,26 @@ def ParseDate(tripdate, year):
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
if mdatestandard:
- assert mdatestandard.group(1) == year, (tripdate, year)
- year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
+ if not (mdatestandard.group(1) == year):
+ message = " ! - Bad date (year) in logbook: " + tripdate + " - " + year
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues["tripdate"]=message
+ return datetime.date('1970', '01', '01')
+ else:
+ year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
elif mdategoof:
- assert not mdategoof.group(3) or mdategoof.group(3) == year[:2], mdategoof.groups()
- yadd = int(year[:2]) * 100
- day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
+ if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
+ message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues["tripdate"]=message
+ return datetime.date('1970', '01', '01')
+ else:
+ yadd = int(year[:2]) * 100
+ day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else:
message = " ! - Bad date in logbook: " + tripdate + " - " + year
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues["tripdate"]=message
- assert False, tripdate
return datetime.date(year, month, day)
@@ -150,7 +159,12 @@ def Parselogwikitxt(year, expedition, txt):
for triphead, triptext in trippara:
logbook_entry_count += 1
tripheadp = triphead.split("|")
- assert len(tripheadp) == 3, (tripheadp, triptext)
+ # assert len(tripheadp) == 3, (tripheadp, triptext)
+ if not (len(tripheadp) == 3):
+ message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues["tripdate"]=message
+
tripdate, tripplace, trippeople = tripheadp
tripsplace = tripplace.split(" - ")
tripcave = tripsplace[0].strip()
@@ -339,7 +353,14 @@ def Parseloghtml03(year, expedition, txt):
logbook_entry_count += 1
s = re.match("(?s)\s*<p>(.*?)</p>(.*)$", trippara)
- assert s, trippara
+ #assert s, trippara
+ if not ( s ) :
+ message = " ! - Skipping logentry on failure to parse Parseloghtml03: {} {} {}...".format(tripentry,s,trippara[:300])
+ DataIssue.objects.create(parser='logbooks', message=message)
+ logdataissues[tripentry]=message
+ print(message)
+ break
+
tripheader, triptext = s.group(1), s.group(2)
tripheader = re.sub(r"&nbsp;", " ", tripheader)
tripheader = re.sub(r"\s+", " ", tripheader).strip()
@@ -595,7 +616,7 @@ def parseAutoLogBookEntry(filename):
try:
# this is a slow and uncertain function:
cave = getCaveByReference(caveRef)
- except AssertionError:
+ except:
cave = None
errors.append(" - Cave not found in database")
else: