diff options
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r-- | parsers/logbooks.py | 23 |
1 files changed, 14 insertions, 9 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 59711c4..0a497ad 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -22,6 +22,9 @@ Parses and imports logbooks in all their wonderful confusion https://expo.survex.com/handbook/computing/logbooks-parsing.html """ todo = """ +- make id= for each entry persistent and unchanging, and check cross-references in other logbooks and other HTML frahments +e.g. cave descriptions + - Most of the time is during the database writing (6s out of 8s). - profile the code to find bad repetitive things, of which there are many. @@ -37,8 +40,7 @@ todo = """ file_in = open(logbookfile,'rb') txt = file_in.read().decode("latin1") -- use Fixtures https://docs.djangoproject.com/en/dev/ref/django-admin/#django-admin-loaddata to cache - data for old logbooks? Not worth it.. + """ MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 BLOG_PARSER_SETTINGS = { # no default, must be explicit @@ -127,7 +129,8 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): if tripperson[0] != "*": # a name prefix of "*" is special tripperson = re.sub(rx_round_bracket, "", tripperson).strip() - # Whacky aliases all handled in GetPersonExpeditionNameLookup() + # Whacky aliases all resolved in GetPersonExpeditionNameLookup() + nickname_used = tripperson try: personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) if not personyear: @@ -138,9 +141,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year." print(message) DataIssue.objects.create(parser="logbooks", message=message) - res.append((personyear, logtime_underground)) + res.append((personyear, nickname_used, logtime_underground)) except: - message = f" ! - {expedition.year} EXCEPTION: '{tripperson}' in entry {tid=} for this year." + message = f" ! - {expedition.year} EXCEPTION: '{tripperson}' ({nickname_used}) in entry {tid=} for this year." print(message) DataIssue.objects.create(parser="logbooks", message=message) raise @@ -179,7 +182,7 @@ def tidy_time_underground(logtime_underground): def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid): try: trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid) - # print(f" - {author} - {logtime_underground}") + # trippersons is a list of tuples (personyear, nickname_used, logtime_underground) except: message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL" DataIssue.objects.create(parser="logbooks", message=message) @@ -247,14 +250,16 @@ def store_entry_into_database(date, place, tripcave, title, text, trippersons, a lbo = LogbookEntry.objects.create(**nonLookupAttribs, **lookupAttribs) pt_list = [] - for tripperson, time_underground in trippersons: - lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo} # lbo is primary key + for tripperson, nickname_used, time_underground in trippersons: + lookupAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)} pt_list.append(PersonLogEntry(**nonLookupAttribs, **lookupAttribs)) PersonLogEntry.objects.bulk_create(pt_list) def parser_date(tripdate, year): - """Interprets dates in the expo logbooks and returns a correct datetime.date object""" + """Interprets dates in the expo logbooks and returns a correct datetime.date object + Does NOT actually check that it is a truly valid date.. + """ dummydate = date(1970, 1, 1) # replace with _EPOCH month = 1 day = 1 |