diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/logbooks.py | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 7e2870b..c221fe1 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -525,7 +525,7 @@ def parser_blog(year, expedition, txt): datestamp = match_datetime.group(1) tripdate = datetime.fromisoformat(datestamp) - print(f" - tid: {tid} '{trippeople}' '{tripdate}'") + # print(f" - tid: {tid} '{trippeople}' '{tripdate}'") tripname = f"UK Caving Blog post {logbook_entry_count}" # must be unique for a given date @@ -534,7 +534,7 @@ def parser_blog(year, expedition, txt): logentries.append(entrytuple) -def LoadLogbookForExpedition(expedition): +def LoadLogbookForExpedition(expedition, clean=True): """ Parses all logbook entries for one expedition """ global logentries @@ -571,15 +571,15 @@ def LoadLogbookForExpedition(expedition): dellist.append(key) for i in dellist: del logdataissues[i] - - cleanerrors(year) + if (clean): + cleanerrors(year) if year in yearlinks: yearfile, yearparser = yearlinks[year] logbookpath = Path(expologbase) / year / yearfile expedition.logbookfile = yearfile parsefunc = yearparser - print(f" - Logbook file {yearfile} using parser {yearparser}") + # print(f" - Logbook file {yearfile} using parser {yearparser}") else: logbookpath = Path(expologbase) / year / DEFAULT_LOGBOOK_FILE @@ -589,8 +589,9 @@ def LoadLogbookForExpedition(expedition): expedition.save() lbes = LogbookEntry.objects.filter(expedition=expedition) - for lbe in lbes: - lbe.delete() + if (clean): + for lbe in lbes: + lbe.delete() try: file_in = open(logbookpath,'rb') @@ -659,15 +660,19 @@ def LoadLogbooks(): DataIssue.objects.create(parser='logbooks', message=message) logdataissues[f"sqlfail 0000"]=message print(message) + return noexpo = ["1986", "2020", "2021",] #no expo lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"] sqlfail = ["1987", "1988", "1989"] # breaks mysql with db constraint fail - debug locally first] nologbook = noexpo + lostlogbook + sqlfail + blogs = ["2019"] + nlbe={} expd ={} - actuals = [] + loglist = [] + bloglist = [] for expo in expos: # pointless as we explicitly know the years in this code. year = expo.year @@ -681,16 +686,26 @@ def LoadLogbooks(): if year not in nologbook: if year in entries: - actuals.append(expo) + loglist.append(expo) else: print(" - No Logbook yet for: " + year) # catch case when preparing for next expo + + if year in blogs: + bloglist.append(expo) - for ex in actuals: + + for ex in loglist: nlbe[ex] = LoadLogbookForExpedition(ex) # this actually loads the logbook for one expo - + + for b in bloglist: + orig = LOGBOOK_PARSER_SETTINGS[str(b)] + LOGBOOK_PARSER_SETTINGS[str(b)] = ("ukcavingblog.html", "parser_blog") + nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this actually loads the logbook for one expo + LOGBOOK_PARSER_SETTINGS[str(b)] = orig + # tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock # yt = 0 - # for r in map(LoadLogbookForExpedition, actuals): + # for r in map(LoadLogbookForExpedition, loglist): # yt = r yt = 0 |