diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2022-11-21 16:47:25 +0000 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2022-11-21 16:47:25 +0000 |
commit | 259f85742aa0ffabe300329ca0e671ecaa80ef79 (patch) | |
tree | 333adcc18f898337d668d869b377c67bf1b224fb /parsers/logbooks.py | |
parent | a795707552026b66072ff75abfa5ddc77a2cac97 (diff) | |
download | troggle-259f85742aa0ffabe300329ca0e671ecaa80ef79.tar.gz troggle-259f85742aa0ffabe300329ca0e671ecaa80ef79.tar.bz2 troggle-259f85742aa0ffabe300329ca0e671ecaa80ef79.zip |
moved parser settings
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r-- | parsers/logbooks.py | 72 |
1 files changed, 36 insertions, 36 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index a1df040..d79a989 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -46,41 +46,41 @@ todo=''' ''' MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 -DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt" +DEFAULT_LOGBOOK_PARSER = "parser_html" DEFAULT_LOGBOOK_FILE = "logbook.html" # All years since 2010 use the default value for Logbook parser # but several don't work, and are skipped by the parsing code, e.g. 1983 LOGBOOK_PARSER_SETTINGS = { - "2010": ("logbook.html", "Parseloghtmltxt"), - "2009": ("2009logbook.txt", "Parselogwikitxt"), - "2008": ("2008logbook.txt", "Parselogwikitxt"), - "2007": ("logbook.html", "Parseloghtmltxt"), - "2006": ("logbook.html", "Parseloghtmltxt"), -# "2006": ("logbook/logbook_06.txt", "Parselogwikitxt"), - "2006": ("logbook.html", "Parseloghtmltxt"), - "2005": ("logbook.html", "Parseloghtmltxt"), - "2004": ("logbook.html", "Parseloghtmltxt"), - "2003": ("logbook.html", "Parseloghtml03"), - "2002": ("logbook.html", "Parseloghtmltxt"), - "2001": ("log.htm", "Parseloghtml01"), - "2000": ("log.htm", "Parseloghtml01"), - "1999": ("log.htm", "Parseloghtml01"), - "1998": ("log.htm", "Parseloghtml01"), - "1997": ("log.htm", "Parseloghtml01"), - "1996": ("log.htm", "Parseloghtml01"), - "1995": ("log.htm", "Parseloghtml01"), - "1994": ("log.htm", "Parseloghtml01"), - "1993": ("log.htm", "Parseloghtml01"), - "1992": ("log.htm", "Parseloghtml01"), - "1991": ("log.htm", "Parseloghtml01"), - "1990": ("log.htm", "Parseloghtml01"), - "1989": ("log.htm", "Parseloghtml01"), #crashes MySQL - "1988": ("log.htm", "Parseloghtml01"), #crashes MySQL - "1987": ("log.htm", "Parseloghtml01"), #crashes MySQL - "1985": ("log.htm", "Parseloghtml01"), - "1984": ("log.htm", "Parseloghtml01"), - "1983": ("log.htm", "Parseloghtml01"), - "1982": ("log.htm", "Parseloghtml01"), + "2010": ("logbook.html", "parser_html"), + "2009": ("2009logbook.txt", "parser_wiki"), + "2008": ("2008logbook.txt", "parser_wiki"), + "2007": ("logbook.html", "parser_html"), + "2006": ("logbook.html", "parser_html"), +# "2006": ("logbook/logbook_06.txt", "parser_wiki"), + "2006": ("logbook.html", "parser_html"), + "2005": ("logbook.html", "parser_html"), + "2004": ("logbook.html", "parser_html"), + "2003": ("logbook.html", "parser_html_03"), + "2002": ("logbook.html", "parser_html"), + "2001": ("log.htm", "parser_html_01"), + "2000": ("log.htm", "parser_html_01"), + "1999": ("log.htm", "parser_html_01"), + "1998": ("log.htm", "parser_html_01"), + "1997": ("log.htm", "parser_html_01"), + "1996": ("log.htm", "parser_html_01"), + "1995": ("log.htm", "parser_html_01"), + "1994": ("log.htm", "parser_html_01"), + "1993": ("log.htm", "parser_html_01"), + "1992": ("log.htm", "parser_html_01"), + "1991": ("log.htm", "parser_html_01"), + "1990": ("log.htm", "parser_html_01"), + "1989": ("log.htm", "parser_html_01"), #crashes MySQL + "1988": ("log.htm", "parser_html_01"), #crashes MySQL + "1987": ("log.htm", "parser_html_01"), #crashes MySQL + "1985": ("log.htm", "parser_html_01"), + "1984": ("log.htm", "parser_html_01"), + "1983": ("log.htm", "parser_html_01"), + "1982": ("log.htm", "parser_html_01"), } entries = { "2022": 64, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, @@ -248,7 +248,7 @@ def ParseDate(tripdate, year): return datetime.date(1970, 1, 1) # (2006 - not any more), 2008 - 2009 -def Parselogwikitxt(year, expedition, txt): +def parser_wiki(year, expedition, txt): global logentries global logdataissues @@ -290,7 +290,7 @@ def Parselogwikitxt(year, expedition, txt): # 2002, 2004, 2005, 2007, 2010 - now # 2006 wiki text is incomplete, but the html all there. So using this parser now. -def Parseloghtmltxt(year, expedition, txt): +def parser_html(year, expedition, txt): global logentries global logdataissues @@ -349,7 +349,7 @@ def Parseloghtmltxt(year, expedition, txt): # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it # trying it out for years 1982 - 1990 too. Some logbook editing required by hand.. place -def Parseloghtml01(year, expedition, txt): +def parser_html_01(year, expedition, txt): global logentries global logdataissues errorcount = 0 @@ -457,7 +457,7 @@ def Parseloghtml01(year, expedition, txt): return # parser for 2003 -def Parseloghtml03(year, expedition, txt): +def parser_html_03(year, expedition, txt): global logentries global logdataissues @@ -469,7 +469,7 @@ def Parseloghtml03(year, expedition, txt): s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara) if not ( s ) : - message = " ! - Skipping logentry {year} on failure to parse Parseloghtml03: {} {} {}...".format(tid,s,trippara[:300]) + message = " ! - Skipping logentry {year} on failure to parse parser_html_03: {} {} {}...".format(tid,s,trippara[:300]) DataIssue.objects.create(parser='logbooks', message=message) logdataissues[tid]=message print(message) |