diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2025-01-15 20:21:49 +0000 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2025-01-15 20:21:49 +0000 |
commit | c5b08ce80f6ea17556bbc45844706c94a9b542e9 (patch) | |
tree | 994017f7ade798c7388fc98213395cf3ea29a8d4 /parsers | |
parent | f3bd9024cfdc1e4ff6b7a4cabc5fc2ddaea2baad (diff) | |
download | troggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.tar.gz troggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.tar.bz2 troggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.zip |
stumbled on bug when no entries in logbook, fixed.
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/logbooks.py | 65 |
1 files changed, 39 insertions, 26 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 3d96b3b..08e2710 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -58,6 +58,11 @@ LOGBOOK_PARSER_SETTINGS = { LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB ENTRIES = { + "2029": 0, + "2028": 0, + "2027": 0, + "2026": 0, + "2025": 0, "2024": 125, "2023": 131, "2022": 94, @@ -379,8 +384,10 @@ def parser_html(year, expedition, txt, seq=""): dupl = {} # extract front material and stash for later use when rebuilding from list of entries - headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt) - headpara = headmatch.groups()[0].strip() + if headmatch := re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt): # WALRUS OPERATOR + headpara = headmatch.groups()[0].strip() + else: + headpara = "" if len(headpara) > 0: frontpath = Path(settings.EXPOWEB, LOGBOOKS_DIR, year, "frontmatter.html") @@ -390,8 +397,10 @@ def parser_html(year, expedition, txt, seq=""): front.write(headpara + "\n") # extract END material and stash for later use when rebuilding from list of entries - endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt) - endpara = endmatch.groups()[0].strip() + if endmatch := re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt): # WALRUS OPERATOR + endpara = endmatch.groups()[0].strip() + else: + endpara = "" if len(endpara) > 0: print(f"\n - {year} endpara:\n'{endpara}'") @@ -399,7 +408,10 @@ def parser_html(year, expedition, txt, seq=""): with open(endpath, "w") as end: end.write(endpara + "\n") - tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt) + if tripparas := re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt): # WALRUS OPERATOR + pass + else: + return None logbook_entry_count = 0 for trippara in tripparas: logbook_entry_count += 1 @@ -656,11 +668,12 @@ def parse_logbook_for_expedition(expedition, blog=False): logentries = parser(year, expedition, txt, sq) # this launches the right parser # -------------------- - if len(logentries) == expect: - # print(f"OK {year} {len(logentries):5d} is {expect}\n") - pass - else: - print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n") + if logentries: + if len(logentries) == expect: + # print(f"OK {year} {len(logentries):5d} is {expect}\n") + pass + else: + print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n") return logentries @@ -674,23 +687,23 @@ def LoadLogbook(year): clean_logbook_for_expedition(expo) logentries = [] - logentries = parse_logbook_for_expedition(expo) # this actually loads the logbook for one expo - print(f" - Loaded logbook. {len(logentries)} entries." ) - if year in BLOG_PARSER_SETTINGS: - print(f" - Loading blog.." ) - logentries += parse_logbook_for_expedition(expo, blog=True) # this loads the blog logbook - else: - print( - f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}" - ) - for entrytuple in logentries: - date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple - if expo == expedition: # unneeded check, we zeroed it before filling it - # print(f" -- {triptitle}") - store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) + if logentries := parse_logbook_for_expedition(expo): # this actually loads the logbook for one expo # WALRUS + print(f" - Loaded logbook. {len(logentries)} entries." ) + if year in BLOG_PARSER_SETTINGS: + print(f" - Loading blog.." ) + logentries += parse_logbook_for_expedition(expo, blog=True) # this loads the blog logbook else: - print(f" ! unexpected log entry labelled as '{expedition}' {tid}" ) - expo.save() # to save logbook name property + print( + f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}" + ) + for entrytuple in logentries: + date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple + if expo == expedition: # unneeded check, we zeroed it before filling it + # print(f" -- {triptitle}") + store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid) + else: + print(f" ! unexpected log entry labelled as '{expedition}' {tid}" ) + expo.save() # to save logbook name property def LoadLogbooks(): """This is the master function for parsing all logbooks into the Troggle database. |