summaryrefslogtreecommitdiffstats
path: root/parsers/logbooks.py
diff options
context:
space:
mode:
authorsubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 05:21:05 +0100
committersubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 05:21:05 +0100
commitfa6bf04522d2606adbbaf7ff2175326059c55f22 (patch)
tree136ffa6d29485b00f68e5e7c47944b25c6b5394d /parsers/logbooks.py
parent5e6bf2436d8dd9f952d6a219c77b448969d65778 (diff)
downloadtroggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.gz
troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.bz2
troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.zip
[svn] Djangoed Julians code
added underscores to field names turned __str__ to __unicode__ Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8076 by julian @ 11/8/2008 6:24 PM
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r--parsers/logbooks.py174
1 files changed, 88 insertions, 86 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index c9e1651..900022f 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -30,15 +30,15 @@ def LoadPersons():
for person in personreader:
name = person[header["Name"]]
name = re.sub("<.*?>", "", name)
- mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
+ mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
if mname.group(3):
nickname = mname.group(3)
else:
nickname = ""
- firstname, lastname = mname.group(1), mname.group(2) or ""
-
+ firstname, lastname = mname.group(1), mname.group(2) or ""
+
#print firstname, lastname, "NNN", nickname
#assert lastname == person[header[""]], person
@@ -53,14 +53,14 @@ def LoadPersons():
yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1":
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
- pyo.save()
-
- # error
- elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001":
- print "Mike Richardson(2001) error"
+ pyo.save()
+
+ # error
+ elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001":
+ print "Mike Richardson(2001) error"
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
- pyo.save()
-
+ pyo.save()
+
if name in expoers2008:
print "2008:", name
@@ -98,8 +98,8 @@ def GetTripPersons(trippeople, expedition):
if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson)
- if not personyear:
- print "NoMatchFor: '%s'" % tripperson
+ if not personyear:
+ print "NoMatchFor: '%s'" % tripperson
res.append(personyear)
if mul:
author = personyear
@@ -107,31 +107,31 @@ def GetTripPersons(trippeople, expedition):
author = res[-1]
return res, author
-def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu):
+def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu):
trippersons, author = GetTripPersons(trippeople, expedition)
lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author)
- lbo.save()
+ lbo.save()
print "ttt", date, place
for tripperson in trippersons:
- pto = models.PersonTrip(personexpedition = tripperson, place=place, date=date, timeunderground=(tu or ""),
- logbookentry=lbo, is_logbookentryauthor=(tripperson == author))
+ pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=(tu or ""),
+ logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
pto.save()
-
-def ParseDate(tripdate, year):
+
+def ParseDate(tripdate, year):
mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
if mdatestandard:
- assert mdatestandard.group(1) == year, (tripdate, year)
+ assert mdatestandard.group(1) == year, (tripdate, year)
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
- elif mdategoof:
- assert not mdategoof.group(3) or mdategoof.group(3) == year[:2]
+ elif mdategoof:
+ assert not mdategoof.group(3) or mdategoof.group(3) == year[:2]
yadd = int(year[:2]) * 100
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else:
- assert False, tripdate
+ assert False, tripdate
return datetime.date(year, month, day)
-
-# 2007, 2008, 2006
+
+# 2007, 2008, 2006
def Parselogwikitxt(year, expedition, txt):
trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara:
@@ -150,11 +150,11 @@ def Parselogwikitxt(year, expedition, txt):
tu = ""
#assert tripcave == "Journey", (triphead, triptext)
- ldate = ParseDate(tripdate.strip(), year)
+ ldate = ParseDate(tripdate.strip(), year)
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
- EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu)
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu)
-# 2002, 2004, 2005
+# 2002, 2004, 2005
def Parseloghtmltxt(year, expedition, txt):
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -169,85 +169,86 @@ def Parseloghtmltxt(year, expedition, txt):
assert s, trippara
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
- ldate = ParseDate(tripdate.strip(), year)
- #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
- trippeople = re.sub("Ol(?!l)", "Olly", trippeople)
- trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)
- triptitles = triptitle.split(" - ")
- if len(triptitles) >= 2:
- tripcave = triptitles[0]
- else:
+ ldate = ParseDate(tripdate.strip(), year)
+ #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
+ trippeople = re.sub("Ol(?!l)", "Olly", trippeople)
+ trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)
+ triptitles = triptitle.split(" - ")
+ if len(triptitles) >= 2:
+ tripcave = triptitles[0]
+ else:
tripcave = "UNKNOWN"
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
- ltriptext = re.sub("<p>", "\n\n", ltriptext).strip() EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+ ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+
-
-# main parser for pre-2001. simpler because the data has been hacked so much to fit it
+# main parser for pre-2001. simpler because the data has been hacked so much to fit it
def Parseloghtml01(year, expedition, txt):
tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
- assert s, trippara[:100]
+ assert s, trippara[:100]
tripheader, triptext = s.group(1), s.group(2)
- mtripid = re.search('<a id="(.*?)"', tripheader)
- tripid = mtripid and mtripid.group(1) or ""
- tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
-
- #print [tripheader]
- #continue
-
+ mtripid = re.search('<a id="(.*?)"', tripheader)
+ tripid = mtripid and mtripid.group(1) or ""
+ tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
+
+ #print [tripheader]
+ #continue
+
tripdate, triptitle, trippeople = tripheader.split("|")
- ldate = ParseDate(tripdate.strip(), year)
-
- mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
- if mtu:
- tu = mtu.group(1)
- triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
- else:
- tu = ""
-
- triptitles = triptitle.split(" - ")
- tripcave = triptitles[0].strip()
-
+ ldate = ParseDate(tripdate.strip(), year)
+
+ mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
+ if mtu:
+ tu = mtu.group(1)
+ triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
+ else:
+ tu = ""
+
+ triptitles = triptitle.split(" - ")
+ tripcave = triptitles[0].strip()
+
ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
- #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
-
- #print ldate, trippeople.strip()
- # could includ the tripid (url link for cross referencing)
- EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
-
+ #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
+
+ #print ldate, trippeople.strip()
+ # could includ the tripid (url link for cross referencing)
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+
def Parseloghtml03(year, expedition, txt):
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
- assert s, trippara
+ assert s, trippara
tripheader, triptext = s.group(1), s.group(2)
- tripheader = re.sub("&nbsp;", " ", tripheader)
- tripheader = re.sub("\s+", " ", tripheader).strip()
- sheader = tripheader.split(" -- ")
- tu = ""
- if re.match("T/U|Time underwater", sheader[-1]):
- tu = sheader.pop()
- if len(sheader) != 3:
- print sheader
- # continue
- tripdate, triptitle, trippeople = sheader
- ldate = ParseDate(tripdate.strip(), year)
- triptitles = triptitle.split(" , ")
- if len(triptitles) >= 2:
- tripcave = triptitles[0]
- else:
+ tripheader = re.sub("&nbsp;", " ", tripheader)
+ tripheader = re.sub("\s+", " ", tripheader).strip()
+ sheader = tripheader.split(" -- ")
+ tu = ""
+ if re.match("T/U|Time underwater", sheader[-1]):
+ tu = sheader.pop()
+ if len(sheader) != 3:
+ print sheader
+ # continue
+ tripdate, triptitle, trippeople = sheader
+ ldate = ParseDate(tripdate.strip(), year)
+ triptitles = triptitle.split(" , ")
+ if len(triptitles) >= 2:
+ tripcave = triptitles[0]
+ else:
tripcave = "UNKNOWN"
#print tripcave, "--- ppp", triptitle, trippeople, len(triptext)
ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
- ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
- ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
- EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+ ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
+ ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
+ EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
def LoadLogbooks():
models.LogbookEntry.objects.all().delete()
@@ -272,12 +273,13 @@ def LoadLogbooks():
expedition = models.Expedition.objects.filter(year = year)[0]
fin = open(os.path.join(expowebbase, lloc))
txt = fin.read()
- fin.close()
- parsefunc(year, expedition, txt)
+ fin.close()
+ parsefunc(year, expedition, txt)
# command line run through the loading stages
# you can comment out these in turn to control what gets reloaded
-LoadExpos() LoadPersons()
+LoadExpos()
+LoadPersons()
LoadLogbooks()