summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2025-01-15 20:21:49 +0000
committerPhilip Sargent <philip.sargent@gmail.com>2025-01-15 20:21:49 +0000
commitc5b08ce80f6ea17556bbc45844706c94a9b542e9 (patch)
tree994017f7ade798c7388fc98213395cf3ea29a8d4 /parsers
parentf3bd9024cfdc1e4ff6b7a4cabc5fc2ddaea2baad (diff)
downloadtroggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.tar.gz
troggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.tar.bz2
troggle-c5b08ce80f6ea17556bbc45844706c94a9b542e9.zip
stumbled on bug when no entries in logbook, fixed.
Diffstat (limited to 'parsers')
-rw-r--r--parsers/logbooks.py65
1 files changed, 39 insertions, 26 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 3d96b3b..08e2710 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -58,6 +58,11 @@ LOGBOOK_PARSER_SETTINGS = {
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
ENTRIES = {
+ "2029": 0,
+ "2028": 0,
+ "2027": 0,
+ "2026": 0,
+ "2025": 0,
"2024": 125,
"2023": 131,
"2022": 94,
@@ -379,8 +384,10 @@ def parser_html(year, expedition, txt, seq=""):
dupl = {}
# extract front material and stash for later use when rebuilding from list of entries
- headmatch = re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt)
- headpara = headmatch.groups()[0].strip()
+ if headmatch := re.match(r"(?i)(?s).*<body[^>]*>(.*?)<hr.*", txt): # WALRUS OPERATOR
+ headpara = headmatch.groups()[0].strip()
+ else:
+ headpara = ""
if len(headpara) > 0:
frontpath = Path(settings.EXPOWEB, LOGBOOKS_DIR, year, "frontmatter.html")
@@ -390,8 +397,10 @@ def parser_html(year, expedition, txt, seq=""):
front.write(headpara + "\n")
# extract END material and stash for later use when rebuilding from list of entries
- endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
- endpara = endmatch.groups()[0].strip()
+ if endmatch := re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt): # WALRUS OPERATOR
+ endpara = endmatch.groups()[0].strip()
+ else:
+ endpara = ""
if len(endpara) > 0:
print(f"\n - {year} endpara:\n'{endpara}'")
@@ -399,7 +408,10 @@ def parser_html(year, expedition, txt, seq=""):
with open(endpath, "w") as end:
end.write(endpara + "\n")
- tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
+ if tripparas := re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt): # WALRUS OPERATOR
+ pass
+ else:
+ return None
logbook_entry_count = 0
for trippara in tripparas:
logbook_entry_count += 1
@@ -656,11 +668,12 @@ def parse_logbook_for_expedition(expedition, blog=False):
logentries = parser(year, expedition, txt, sq) # this launches the right parser
# --------------------
- if len(logentries) == expect:
- # print(f"OK {year} {len(logentries):5d} is {expect}\n")
- pass
- else:
- print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
+ if logentries:
+ if len(logentries) == expect:
+ # print(f"OK {year} {len(logentries):5d} is {expect}\n")
+ pass
+ else:
+ print(f"Mismatch in number of log entries: {year} {len(logentries):5d} is not {expect}\n")
return logentries
@@ -674,23 +687,23 @@ def LoadLogbook(year):
clean_logbook_for_expedition(expo)
logentries = []
- logentries = parse_logbook_for_expedition(expo) # this actually loads the logbook for one expo
- print(f" - Loaded logbook. {len(logentries)} entries." )
- if year in BLOG_PARSER_SETTINGS:
- print(f" - Loading blog.." )
- logentries += parse_logbook_for_expedition(expo, blog=True) # this loads the blog logbook
- else:
- print(
- f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
- )
- for entrytuple in logentries:
- date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
- if expo == expedition: # unneeded check, we zeroed it before filling it
- # print(f" -- {triptitle}")
- store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
+ if logentries := parse_logbook_for_expedition(expo): # this actually loads the logbook for one expo # WALRUS
+ print(f" - Loaded logbook. {len(logentries)} entries." )
+ if year in BLOG_PARSER_SETTINGS:
+ print(f" - Loading blog.." )
+ logentries += parse_logbook_for_expedition(expo, blog=True) # this loads the blog logbook
else:
- print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
- expo.save() # to save logbook name property
+ print(
+ f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
+ )
+ for entrytuple in logentries:
+ date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
+ if expo == expedition: # unneeded check, we zeroed it before filling it
+ # print(f" -- {triptitle}")
+ store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
+ else:
+ print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
+ expo.save() # to save logbook name property
def LoadLogbooks():
"""This is the master function for parsing all logbooks into the Troggle database.