diff options
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r-- | parsers/logbooks.py | 39 |
1 files changed, 30 insertions, 9 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index f507861..467ff92 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -126,17 +126,26 @@ def ParseDate(tripdate, year): mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate) if mdatestandard: - assert mdatestandard.group(1) == year, (tripdate, year) - year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) + if not (mdatestandard.group(1) == year): + message = " ! - Bad date (year) in logbook: " + tripdate + " - " + year + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues["tripdate"]=message + return datetime.date('1970', '01', '01') + else: + year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) elif mdategoof: - assert not mdategoof.group(3) or mdategoof.group(3) == year[:2], mdategoof.groups() - yadd = int(year[:2]) * 100 - day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd + if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]): + message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3) + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues["tripdate"]=message + return datetime.date('1970', '01', '01') + else: + yadd = int(year[:2]) * 100 + day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: message = " ! - Bad date in logbook: " + tripdate + " - " + year DataIssue.objects.create(parser='logbooks', message=message) logdataissues["tripdate"]=message - assert False, tripdate return datetime.date(year, month, day) @@ -150,7 +159,12 @@ def Parselogwikitxt(year, expedition, txt): for triphead, triptext in trippara: logbook_entry_count += 1 tripheadp = triphead.split("|") - assert len(tripheadp) == 3, (tripheadp, triptext) + # assert len(tripheadp) == 3, (tripheadp, triptext) + if not (len(tripheadp) == 3): + message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues["tripdate"]=message + tripdate, tripplace, trippeople = tripheadp tripsplace = tripplace.split(" - ") tripcave = tripsplace[0].strip() @@ -339,7 +353,14 @@ def Parseloghtml03(year, expedition, txt): logbook_entry_count += 1 s = re.match("(?s)\s*<p>(.*?)</p>(.*)$", trippara) - assert s, trippara + #assert s, trippara + if not ( s ) : + message = " ! - Skipping logentry on failure to parse Parseloghtml03: {} {} {}...".format(tripentry,s,trippara[:300]) + DataIssue.objects.create(parser='logbooks', message=message) + logdataissues[tripentry]=message + print(message) + break + tripheader, triptext = s.group(1), s.group(2) tripheader = re.sub(r" ", " ", tripheader) tripheader = re.sub(r"\s+", " ", tripheader).strip() @@ -595,7 +616,7 @@ def parseAutoLogBookEntry(filename): try: # this is a slow and uncertain function: cave = getCaveByReference(caveRef) - except AssertionError: + except: cave = None errors.append(" - Cave not found in database") else: |