summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py23
1 files changed, 14 insertions, 9 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 59711c4..0a497ad 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -22,6 +22,9 @@ Parses and imports logbooks in all their wonderful confusion
https://expo.survex.com/handbook/computing/logbooks-parsing.html
"""
todo = """
+- make id= for each entry persistent and unchanging, and check cross-references in other logbooks and other HTML frahments
+e.g. cave descriptions
+
- Most of the time is during the database writing (6s out of 8s).
- profile the code to find bad repetitive things, of which there are many.
@@ -37,8 +40,7 @@ todo = """
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
-- use Fixtures https://docs.djangoproject.com/en/dev/ref/django-admin/#django-admin-loaddata to cache
- data for old logbooks? Not worth it..
+
"""
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { # no default, must be explicit
@@ -127,7 +129,8 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
if tripperson[0] != "*": # a name prefix of "*" is special
tripperson = re.sub(rx_round_bracket, "", tripperson).strip()
- # Whacky aliases all handled in GetPersonExpeditionNameLookup()
+ # Whacky aliases all resolved in GetPersonExpeditionNameLookup()
+ nickname_used = tripperson
try:
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
if not personyear:
@@ -138,9 +141,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year."
print(message)
DataIssue.objects.create(parser="logbooks", message=message)
- res.append((personyear, logtime_underground))
+ res.append((personyear, nickname_used, logtime_underground))
except:
- message = f" ! - {expedition.year} EXCEPTION: '{tripperson}' in entry {tid=} for this year."
+ message = f" ! - {expedition.year} EXCEPTION: '{tripperson}' ({nickname_used}) in entry {tid=} for this year."
print(message)
DataIssue.objects.create(parser="logbooks", message=message)
raise
@@ -179,7 +182,7 @@ def tidy_time_underground(logtime_underground):
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
- # print(f" - {author} - {logtime_underground}")
+ # trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
except:
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
DataIssue.objects.create(parser="logbooks", message=message)
@@ -247,14 +250,16 @@ def store_entry_into_database(date, place, tripcave, title, text, trippersons, a
lbo = LogbookEntry.objects.create(**nonLookupAttribs, **lookupAttribs)
pt_list = []
- for tripperson, time_underground in trippersons:
- lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo} # lbo is primary key
+ for tripperson, nickname_used, time_underground in trippersons:
+ lookupAttribs = {"personexpedition": tripperson, "nickname_used": nickname_used, "logbook_entry": lbo} # lbo is primary key
nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
pt_list.append(PersonLogEntry(**nonLookupAttribs, **lookupAttribs))
PersonLogEntry.objects.bulk_create(pt_list)
def parser_date(tripdate, year):
- """Interprets dates in the expo logbooks and returns a correct datetime.date object"""
+ """Interprets dates in the expo logbooks and returns a correct datetime.date object
+ Does NOT actually check that it is a truly valid date..
+ """
dummydate = date(1970, 1, 1) # replace with _EPOCH
month = 1
day = 1