From fa6bf04522d2606adbbaf7ff2175326059c55f22 Mon Sep 17 00:00:00 2001 From: substantialnoninfringinguser Date: Wed, 13 May 2009 05:21:05 +0100 Subject: [svn] Djangoed Julians code added underscores to field names turned __str__ to __unicode__ Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8076 by julian @ 11/8/2008 6:24 PM --- parsers/logbooks.py | 174 ++++++++++++++++++++++++++-------------------------- 1 file changed, 88 insertions(+), 86 deletions(-) (limited to 'parsers/logbooks.py') diff --git a/parsers/logbooks.py b/parsers/logbooks.py index c9e1651..900022f 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -30,15 +30,15 @@ def LoadPersons(): for person in personreader: name = person[header["Name"]] name = re.sub("<.*?>", "", name) - mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) + mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) if mname.group(3): nickname = mname.group(3) else: nickname = "" - firstname, lastname = mname.group(1), mname.group(2) or "" - + firstname, lastname = mname.group(1), mname.group(2) or "" + #print firstname, lastname, "NNN", nickname #assert lastname == person[header[""]], person @@ -53,14 +53,14 @@ def LoadPersons(): yo = models.Expedition.objects.filter(year = year)[0] if attended == "1" or attended == "-1": pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) - pyo.save() - - # error - elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001": - print "Mike Richardson(2001) error" + pyo.save() + + # error + elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001": + print "Mike Richardson(2001) error" pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) - pyo.save() - + pyo.save() + if name in expoers2008: print "2008:", name @@ -98,8 +98,8 @@ def GetTripPersons(trippeople, expedition): if tripperson and tripperson[0] != '*': #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) personyear = expedition.GetPersonExpedition(tripperson) - if not personyear: - print "NoMatchFor: '%s'" % tripperson + if not personyear: + print "NoMatchFor: '%s'" % tripperson res.append(personyear) if mul: author = personyear @@ -107,31 +107,31 @@ def GetTripPersons(trippeople, expedition): author = res[-1] return res, author -def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu): +def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu): trippersons, author = GetTripPersons(trippeople, expedition) lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author) - lbo.save() + lbo.save() print "ttt", date, place for tripperson in trippersons: - pto = models.PersonTrip(personexpedition = tripperson, place=place, date=date, timeunderground=(tu or ""), - logbookentry=lbo, is_logbookentryauthor=(tripperson == author)) + pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=(tu or ""), + logbook_entry=lbo, is_logbook_entry_author=(tripperson == author)) pto.save() - -def ParseDate(tripdate, year): + +def ParseDate(tripdate, year): mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate) if mdatestandard: - assert mdatestandard.group(1) == year, (tripdate, year) + assert mdatestandard.group(1) == year, (tripdate, year) year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) - elif mdategoof: - assert not mdategoof.group(3) or mdategoof.group(3) == year[:2] + elif mdategoof: + assert not mdategoof.group(3) or mdategoof.group(3) == year[:2] yadd = int(year[:2]) * 100 day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: - assert False, tripdate + assert False, tripdate return datetime.date(year, month, day) - -# 2007, 2008, 2006 + +# 2007, 2008, 2006 def Parselogwikitxt(year, expedition, txt): trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt) for triphead, triptext in trippara: @@ -150,11 +150,11 @@ def Parselogwikitxt(year, expedition, txt): tu = "" #assert tripcave == "Journey", (triphead, triptext) - ldate = ParseDate(tripdate.strip(), year) + ldate = ParseDate(tripdate.strip(), year) #print "\n", tripcave, "--- ppp", trippeople, len(triptext) - EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu) + EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu) -# 2002, 2004, 2005 +# 2002, 2004, 2005 def Parseloghtmltxt(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?== 2: - tripcave = triptitles[0] - else: + ldate = ParseDate(tripdate.strip(), year) + #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) + trippeople = re.sub("Ol(?!l)", "Olly", trippeople) + trippeople = re.sub("Wook(?!e)", "Wookey", trippeople) + triptitles = triptitle.split(" - ") + if len(triptitles) >= 2: + tripcave = triptitles[0] + else: tripcave = "UNKNOWN" #print "\n", tripcave, "--- ppp", trippeople, len(triptext) ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub("

", "\n\n", ltriptext).strip() EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + ltriptext = re.sub("

", "\n\n", ltriptext).strip() + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + - -# main parser for pre-2001. simpler because the data has been hacked so much to fit it +# main parser for pre-2001. simpler because the data has been hacked so much to fit it def Parseloghtml01(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?=)?(.*?)(.*)$(?i)", trippara) - assert s, trippara[:100] + assert s, trippara[:100] tripheader, triptext = s.group(1), s.group(2) - mtripid = re.search(']*>", "", tripheader) - - #print [tripheader] - #continue - + mtripid = re.search(']*>", "", tripheader) + + #print [tripheader] + #continue + tripdate, triptitle, trippeople = tripheader.split("|") - ldate = ParseDate(tripdate.strip(), year) - - mtu = re.search(']*>(T/?U.*)', triptext) - if mtu: - tu = mtu.group(1) - triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] - else: - tu = "" - - triptitles = triptitle.split(" - ") - tripcave = triptitles[0].strip() - + ldate = ParseDate(tripdate.strip(), year) + + mtu = re.search(']*>(T/?U.*)', triptext) + if mtu: + tu = mtu.group(1) + triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] + else: + tu = "" + + triptitles = triptitle.split(" - ") + tripcave = triptitles[0].strip() + ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) ltriptext = re.sub("

", "\n\n", ltriptext).strip() - #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext) - - #print ldate, trippeople.strip() - # could includ the tripid (url link for cross referencing) - EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) - + #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext) + + #print ldate, trippeople.strip() + # could includ the tripid (url link for cross referencing) + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + def Parseloghtml03(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?=(.*?)

(.*)$", trippara) - assert s, trippara + assert s, trippara tripheader, triptext = s.group(1), s.group(2) - tripheader = re.sub(" ", " ", tripheader) - tripheader = re.sub("\s+", " ", tripheader).strip() - sheader = tripheader.split(" -- ") - tu = "" - if re.match("T/U|Time underwater", sheader[-1]): - tu = sheader.pop() - if len(sheader) != 3: - print sheader - # continue - tripdate, triptitle, trippeople = sheader - ldate = ParseDate(tripdate.strip(), year) - triptitles = triptitle.split(" , ") - if len(triptitles) >= 2: - tripcave = triptitles[0] - else: + tripheader = re.sub(" ", " ", tripheader) + tripheader = re.sub("\s+", " ", tripheader).strip() + sheader = tripheader.split(" -- ") + tu = "" + if re.match("T/U|Time underwater", sheader[-1]): + tu = sheader.pop() + if len(sheader) != 3: + print sheader + # continue + tripdate, triptitle, trippeople = sheader + ldate = ParseDate(tripdate.strip(), year) + triptitles = triptitle.split(" , ") + if len(triptitles) >= 2: + tripcave = triptitles[0] + else: tripcave = "UNKNOWN" #print tripcave, "--- ppp", triptitle, trippeople, len(triptext) ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub("

", "\n\n", ltriptext).strip() - ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) - EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + ltriptext = re.sub("

", "\n\n", ltriptext).strip() + ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) def LoadLogbooks(): models.LogbookEntry.objects.all().delete() @@ -272,12 +273,13 @@ def LoadLogbooks(): expedition = models.Expedition.objects.filter(year = year)[0] fin = open(os.path.join(expowebbase, lloc)) txt = fin.read() - fin.close() - parsefunc(year, expedition, txt) + fin.close() + parsefunc(year, expedition, txt) # command line run through the loading stages # you can comment out these in turn to control what gets reloaded -LoadExpos() LoadPersons() +LoadExpos() +LoadPersons() LoadLogbooks() -- cgit v1.2.3