summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2022-12-15 00:35:48 +0000
committerPhilip Sargent <philip.sargent@gmail.com>2022-12-15 00:35:48 +0000
commit5cc6c26606313d18caa5fc19009a0878341ad468 (patch)
treeae9589a5999054e6d57d391e02a7ec222c5d23c4 /parsers/logbooks.py
parentcb50528e2d6bc7a215a8b55b46a9859aae7f4f83 (diff)
downloadtroggle-5cc6c26606313d18caa5fc19009a0878341ad468.tar.gz
troggle-5cc6c26606313d18caa5fc19009a0878341ad468.tar.bz2
troggle-5cc6c26606313d18caa5fc19009a0878341ad468.zip
blog parsing working
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py39
1 files changed, 27 insertions, 12 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 7e2870b..c221fe1 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt):
datestamp = match_datetime.group(1)
tripdate = datetime.fromisoformat(datestamp)
- print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
+ # print(f" - tid: {tid} '{trippeople}' '{tripdate}'")
tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date
@@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt):
logentries.append(entrytuple)
-def LoadLogbookForExpedition(expedition):
+def LoadLogbookForExpedition(expedition, clean=True):
""" Parses all logbook entries for one expedition
"""
global logentries
@@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition):
dellist.append(key)
for i in dellist:
del logdataissues[i]
-
- cleanerrors(year)
+ if (clean):
+ cleanerrors(year)
if year in yearlinks:
yearfile, yearparser = yearlinks[year]
logbookpath = Path(expologbase) / year / yearfile
expedition.logbookfile = yearfile
parsefunc = yearparser
- print(f" - Logbook file {yearfile} using parser {yearparser}")
+ # print(f" - Logbook file {yearfile} using parser {yearparser}")
else:
logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE
@@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition):
expedition.save()
lbes = LogbookEntry.objects.filter(expedition=expedition)
- for lbe in lbes:
- lbe.delete()
+ if (clean):
+ for lbe in lbes:
+ lbe.delete()
try:
file_in = open(logbookpath,'rb')
@@ -659,15 +660,19 @@ def LoadLogbooks():
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues[f"sqlfail 0000"]=message
print(message)
+ return
noexpo = ["1986", "2020", "2021",] #no expo
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first]
nologbook = noexpo + lostlogbook + sqlfail
+ blogs = ["2019"]
+
nlbe={}
expd ={}
- actuals = []
+ loglist = []
+ bloglist = []
for expo in expos: # pointless as we explicitly know the years in this code.
year = expo.year
@@ -681,16 +686,26 @@ def LoadLogbooks():
if year not in nologbook:
if year in entries:
- actuals.append(expo)
+ loglist.append(expo)
else:
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
+
+ if year in blogs:
+ bloglist.append(expo)
- for ex in actuals:
+
+ for ex in loglist:
nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo
-
+
+ for b in bloglist:
+ orig = LOGBOOK_PARSER_SETTINGS[str(b)]
+ LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog")
+ nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo
+ LOGBOOK_PARSER_SETTINGS[str(b)] = orig
+
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
# yt = 0
- # for r in map(LoadLogbookForExpedition, actuals):
+ # for r in map(LoadLogbookForExpedition, loglist):
# yt = r
yt = 0