summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
authorSam Wenham <sam@wenhams.co.uk>2019-03-30 13:58:38 +0000
committerSam Wenham <sam@wenhams.co.uk>2019-03-30 13:58:38 +0000
commita4532a29da6c2a92553daeafbd3c7eca5b42f861 (patch)
tree49efb937e79620d1931ba21db86ca17a1fba36c7 /parsers/logbooks.py
parent705dd51f30bb764eda6efc480d97e2121ebab09f (diff)
downloadtroggle-a4532a29da6c2a92553daeafbd3c7eca5b42f861.tar.gz
troggle-a4532a29da6c2a92553daeafbd3c7eca5b42f861.tar.bz2
troggle-a4532a29da6c2a92553daeafbd3c7eca5b42f861.zip
Update new management command for DB reset
Switch to content_type from mimetype Make DB reset not nuke so much Tidy logbook parser
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index ffd8e21..fbe00a3 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -115,7 +115,7 @@ def ParseDate(tripdate, year):
assert False, tripdate
return datetime.date(year, month, day)
-# 2007, 2008, 2006
+# 2006, 2008 - 2010
def Parselogwikitxt(year, expedition, txt):
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara:
@@ -140,9 +140,9 @@ def Parselogwikitxt(year, expedition, txt):
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-# 2002, 2004, 2005
+# 2002, 2004, 2005, 2007, 2011 - 2018
def Parseloghtmltxt(year, expedition, txt):
- print(" - Using log html parser")
+ #print(" - Starting log html parser")
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
logbook_entry_count = 0
for trippara in tripparas:
@@ -163,7 +163,6 @@ def Parseloghtmltxt(year, expedition, txt):
print("can't parse: ", trippara) # this is 2007 which needs editing
#assert s, trippara
continue
-
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
ldate = ParseDate(tripdate.strip(), year)
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
@@ -174,7 +173,7 @@ def Parseloghtmltxt(year, expedition, txt):
tripcave = triptitles[0]
else:
tripcave = "UNKNOWN"
- #print "\n", tripcave, "--- ppp", trippeople, len(triptext)
+ #print("\n", tripcave, "--- ppp", trippeople, len(triptext))
ltriptext = re.sub(r"</p>", "", triptext)
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
@@ -183,7 +182,7 @@ def Parseloghtmltxt(year, expedition, txt):
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
-# main parser for pre-2001. simpler because the data has been hacked so much to fit it
+# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
def Parseloghtml01(year, expedition, txt):
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -229,7 +228,7 @@ def Parseloghtml01(year, expedition, txt):
# could includ the tripid (url link for cross referencing)
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
+# parser for 2003
def Parseloghtml03(year, expedition, txt):
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -281,8 +280,7 @@ def SetDatesFromLogbookEntries(expedition):
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """
- expowebbase = os.path.join(settings.EXPOWEB, "years")
- #year = str(expedition.year)
+ expowebbase = os.path.join(settings.EXPOWEB, "years")
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
logbook_parseable = False
@@ -294,6 +292,7 @@ def LoadLogbookForExpedition(expedition):
file_in.close()
parsefunc = year_settings[1]
logbook_parseable = True
+ print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
else:
try:
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
@@ -304,7 +303,7 @@ def LoadLogbookForExpedition(expedition):
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
except (IOError):
logbook_parseable = False
- print("Couldn't open default logbook file and nothing set for expo " + expedition.year)
+ print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
if logbook_parseable:
parser = globals()[parsefunc]