From b503d3d588474cc41bffc01eca7654bb8c6f4a42 Mon Sep 17 00:00:00 2001 From: substantialnoninfringinguser Date: Wed, 13 May 2009 05:13:38 +0100 Subject: [svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM --- parsers/logbooks.py | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 parsers/logbooks.py (limited to 'parsers/logbooks.py') diff --git a/parsers/logbooks.py b/parsers/logbooks.py new file mode 100644 index 0000000..5c38d41 --- /dev/null +++ b/parsers/logbooks.py @@ -0,0 +1,197 @@ +#.-*- coding: utf-8 -*- + +import settings +import expo.models as models +import csv +import sqlite3 +import re +import os +import datetime + +# Dave Johnson (Stonker) is hacked -- are there two of this DJ name +# Dave Collins (Scout) is hacked +# Letty ten Harkel has middle , tu = timeug or ""name removed +# the have been removed +# Dave Milne (Lummat) +# Ben van Millingen +# Rebecca Lawson (Becka) + +persontab = open(os.path.join(settings.EXPOWEB, "noinfo", "folk.csv")) +personreader = csv.reader(persontab) +headers = personreader.next() +header = dict(zip(headers, range(len(headers)))) + + +def LoadExpos(): + models.Expedition.objects.all().delete() + y = models.Expedition(year = "2008", name = "CUCC expo2008") + y.save() + for year in headers[5:]: + y = models.Expedition(year = year, name = "CUCC expo%s" % y) + y.save() + +def LoadPersons(): + models.Person.objects.all().delete() + models.PersonExpedition.objects.all().delete() + expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") + expomissing = set(expoers2008) + + for person in personreader: + name = person[header["Name"]] + name = re.sub("<.*?>", "", name) + lname = name.split() + if len(lname) >= 2: + firstname, lastname = lname[0], lname[1] + else: + firstname, lastname = lname[0], "" + print firstname, lastname + #assert lastname == person[header[""]], person + pObject = models.Person(first_name = firstname, + last_name = lastname, + is_guest = person[header["Guest"]] == "1", + is_vfho = person[header["VfHO member"]], + mug_shot = person[header["Mugshot"]]) + pObject.save() + + for year, attended in zip(headers, person)[5:]: + yo = models.Expedition.objects.filter(year = year)[0] + if attended == "1" or attended == "-1": + pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo.save() + + if name in expoers2008: + print "2008:", name + expomissing.discard(name) + yo = models.Expedition.objects.filter(year = "2008")[0] + pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo.save() + + + print expomissing + for name in expomissing: + firstname, lastname = name.split() + pObject = models.Person(first_name = firstname, + last_name = lastname, + is_guest = name in ["Eeva Makiranta", "Kieth Curtis"], + is_vfho = False, + mug_shot = "") + pObject.save() + yo = models.Expedition.objects.filter(year = "2008")[0] + pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo.save() + + +# +# the logbook loading section +# +def GetTripPersons(trippeople, expedition): + res = [ ] + author = None + for tripperson in re.split(",|\+|&| and ", trippeople): + tripperson = tripperson.strip() + mul = re.match("(.*?)$", tripperson) + if mul: + tripperson = mul.group(1) + if tripperson and tripperson[0] != '*': + #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) + personyear = expedition.GetPersonExpedition(tripperson) + print personyear + res.append(personyear) + if mul: + author = personyear + if not author: + author = res[-1] + return res, author + +def Parselogwikitxt(year, personyearmap, txt): + trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt) + for triphead, triptext in trippara: + tripheadp = triphead.split("|") + assert len(tripheadp) == 3, tripheadp + tripdate, tripplace, trippeople = tripheadp + tripsplace = tripplace.split(" - ") + tripcave = tripsplace[0] + + tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext) + if tul: + #assert len(tul) <= 1, (triphead, triptext) + #assert tul[0][1] in ["hrs", "hours"], (triphead, triptext) + triptime = tul[0][0] + else: + triptime = "" + #assert tripcave == "Journey", (triphead, triptext) + + assert re.match("\d\d\d\d-\d\d-\d\d", tripdate), tripdate + ldate = datetime.date(int(tripdate[:4]), int(tripdate[5:7]), int(tripdate[8:10])) + lbo = models.LogbookEntry(date = ldate, cave = tripcave, title = tripsplace[-1], text = triptext, tu = triptime) + lbo.save() + + trippersons, author = GetTripPersons(trippeople, personyearmap) + for tripperson in trippersons: + lbo.cavers.add(tripperson) + # add the author + +def Parseloghtmltxt(year, expedition, txt): + tripparas = re.findall("([\s\S]*?)(?=)? + \s*(.*?) + \s*(.*?) + \s*(.*?) + ([\s\S]*?) + \s*(?:(.*?))? + \s*$ + ''', trippara) + assert s, trippara + + tripid, tripid1, tripdate, trippeople, triptitle, triptext, timeug = s.groups() + mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) + mdategoof = re.match("(\d\d?)/(\d)/(\d\d)", tripdate) + if mdatestandard: + year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) + elif mdategoof: + day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(3)) + 2000 + else: + assert False, tripdate + ldate = datetime.date(year, month, day) + #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) + trippersons, author = GetTripPersons(trippeople, expedition) + tripcave = "" + lbo = models.LogbookEntry(date = ldate, place = tripcave, title = triptitle, text = triptext, author=author) + lbo.save() + tu = timeug or "" + + for tripperson in trippersons: + pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo) + pto.save() + + + +def LoadLogbooks(): + models.LogbookEntry.objects.all().delete() + expowebbase = os.path.join(settings.EXPOWEB, "years") # this could be a url + yearlinks = [ +# ("2008", "2008/logbook/2008logbook.txt"), +# ("2007", "2007/logbook/2007logbook.txt"), +# ("2005", "2005/logbook.html"), + ("2004", "2004/logbook.html"), +# ("2003", "2003/logbook.html"), + ] + + for year, lloc in yearlinks: + expedition = models.Expedition.objects.filter(year = year)[0] + fin = open(os.path.join(expowebbase, lloc)) + txt = fin.read() + fin.close() + #print personyearmap + if year >= "2007": + Parselogwikitxt(year, personyearmap, txt) + else: + Parseloghtmltxt(year, expedition, txt) + +# command line run through the loading stages +LoadExpos() +LoadPersons() +LoadLogbooks() + + -- cgit v1.2.3