summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
authorWookey <wookey@wookware.org>2019-04-02 00:57:54 +0100
committerWookey <wookey@wookware.org>2019-04-02 00:57:54 +0100
commitc4301cf6df56ba1bef4f2c908b949a2b45ea65dc (patch)
tree9c6bb4a4530824c8e072984a0346509298188030 /parsers/logbooks.py
parentde7d68b1eb70542f66092cb0048af3d096e6980c (diff)
parentbb8dbb381fe87c3a63e9586a1bf1e993b09c965b (diff)
downloadtroggle-c4301cf6df56ba1bef4f2c908b949a2b45ea65dc.tar.gz
troggle-c4301cf6df56ba1bef4f2c908b949a2b45ea65dc.tar.bz2
troggle-c4301cf6df56ba1bef4f2c908b949a2b45ea65dc.zip
Merge lots of troggle fixes
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py47
1 files changed, 26 insertions, 21 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index ffd8e21..cecbdb3 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -45,7 +45,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
author = res[-1][0]
return res, author
-def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
+def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
# print "Getting cave for " , place
try:
katastNumRes=[]
@@ -74,23 +74,23 @@ def GetTripCave(place): #need to be fuzzier about matching h
noncaveplaces = [ "Journey", "Loser Plateau" ]
-def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
""" saves a logbook entry and related persontrips """
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
if not author:
- print(" - skipping logentry" + title + " no author for entry")
+ print(" - Skipping logentry: " + title + " no author for entry")
return
-
-# tripCave = GetTripCave(place)
- #
+
+ #tripCave = GetTripCave(place)
+
lplace = place.lower()
if lplace not in noncaveplaces:
cave=GetCaveLookup().get(lplace)
#Check for an existing copy of the current entry, and save
expeditionday = expedition.get_expedition_day(date)
- lookupAttribs={'date':date, 'title':title}
- nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
+ lookupAttribs={'date':date, 'title':title}
+ nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons:
@@ -115,7 +115,7 @@ def ParseDate(tripdate, year):
assert False, tripdate
return datetime.date(year, month, day)
-# 2007, 2008, 2006
+# 2006, 2008 - 2010
def Parselogwikitxt(year, expedition, txt):
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara:
@@ -140,9 +140,9 @@ def Parselogwikitxt(year, expedition, txt):
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-# 2002, 2004, 2005
+# 2002, 2004, 2005, 2007, 2011 - 2018
def Parseloghtmltxt(year, expedition, txt):
- print(" - Using log html parser")
+ #print(" - Starting log html parser")
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
logbook_entry_count = 0
for trippara in tripparas:
@@ -163,7 +163,6 @@ def Parseloghtmltxt(year, expedition, txt):
print("can't parse: ", trippara) # this is 2007 which needs editing
#assert s, trippara
continue
-
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
ldate = ParseDate(tripdate.strip(), year)
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
@@ -174,16 +173,18 @@ def Parseloghtmltxt(year, expedition, txt):
tripcave = triptitles[0]
else:
tripcave = "UNKNOWN"
- #print "\n", tripcave, "--- ppp", trippeople, len(triptext)
+ #print("\n", tripcave, "--- ppp", trippeople, len(triptext))
ltriptext = re.sub(r"</p>", "", triptext)
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
- EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
+ trippeople=trippeople, expedition=expedition, logtime_underground=0,
+ entry_type="html")
if logbook_entry_count == 0:
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
-# main parser for pre-2001. simpler because the data has been hacked so much to fit it
+# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
def Parseloghtml01(year, expedition, txt):
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -227,9 +228,11 @@ def Parseloghtml01(year, expedition, txt):
#print ldate, trippeople.strip()
# could includ the tripid (url link for cross referencing)
- EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
+ EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
+ trippeople=trippeople, expedition=expedition, logtime_underground=0,
+ entry_type="html")
+# parser for 2003
def Parseloghtml03(year, expedition, txt):
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -256,7 +259,9 @@ def Parseloghtml03(year, expedition, txt):
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
- EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
+ text = ltriptext, trippeople=trippeople, expedition=expedition,
+ logtime_underground=0, entry_type="html")
def SetDatesFromLogbookEntries(expedition):
@@ -281,8 +286,7 @@ def SetDatesFromLogbookEntries(expedition):
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """
- expowebbase = os.path.join(settings.EXPOWEB, "years")
- #year = str(expedition.year)
+ expowebbase = os.path.join(settings.EXPOWEB, "years")
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
logbook_parseable = False
@@ -294,6 +298,7 @@ def LoadLogbookForExpedition(expedition):
file_in.close()
parsefunc = year_settings[1]
logbook_parseable = True
+ print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
else:
try:
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
@@ -304,7 +309,7 @@ def LoadLogbookForExpedition(expedition):
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
except (IOError):
logbook_parseable = False
- print("Couldn't open default logbook file and nothing set for expo " + expedition.year)
+ print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
if logbook_parseable:
parser = globals()[parsefunc]