From e4496e4cd8dd7d6ea809aa2142f2d7adcbeca213 Mon Sep 17 00:00:00 2001 From: goatchurch Date: Wed, 5 Aug 2009 11:58:36 +0100 Subject: [svn] latest hacking for various statistics --- parsers/logbooks.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'parsers/logbooks.py') diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 0867686..88816d4 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -40,6 +40,8 @@ def GetTripPersons(trippeople, expedition, logtime_underground): if mul: author = personyear if not author: + if not res: + return None, None author = res[-1][0] return res, author @@ -75,6 +77,10 @@ noncaveplaces = [ "Journey", "Loser Plateau" ] def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground): """ saves a logbook entry and related persontrips """ trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground) + if not author: + print "skipping logentry", title + return + # tripCave = GetTripCave(place) # lplace = place.lower() @@ -135,15 +141,20 @@ def Parselogwikitxt(year, expedition, txt): def Parseloghtmltxt(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?=)? - \s*(.*?) + + s = re.match('''(?x)(?:\s*.*?\s*

)? # second date + \s*(?:)? + \s*(.*?)(?:

)? \s*\s*(.*?) \s*\s*(.*?) ([\s\S]*?) \s*(?:\s*(.*?))? \s*$ ''', trippara) - assert s, trippara + if not s: + print "can't parse: ", trippara # this is 2007 which needs editing + #assert s, trippara + continue tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() ldate = ParseDate(tripdate.strip(), year) @@ -240,7 +251,7 @@ def Parseloghtml03(year, expedition, txt): yearlinks = [ ("2008", "2008/2008logbook.txt", Parselogwikitxt), - ("2007", "2007/2007logbook.txt", Parselogwikitxt), + ("2007", "2007/logbook.html", Parseloghtmltxt), ("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt), ("2005", "2005/logbook.html", Parseloghtmltxt), ("2004", "2004/logbook.html", Parseloghtmltxt), @@ -326,6 +337,7 @@ def LoadLogbookForExpedition(expedition): if lyear == year: break fin = open(os.path.join(expowebbase, lloc)) + print "opennning", lloc txt = fin.read().decode("latin1") fin.close() parsefunc(year, expedition, txt) -- cgit v1.2.3