[svn] Djangoed Julians code

added underscores to field names turned __str__ to __unicode__ Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8076 by julian @ 11/8/2008 6:24 PM
author: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 05:21:05 +0100
committer: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 05:21:05 +0100
commit: fa6bf04522d2606adbbaf7ff2175326059c55f22 (patch)
tree: 136ffa6d29485b00f68e5e7c47944b25c6b5394d /parsers/logbooks.py
parent: 5e6bf2436d8dd9f952d6a219c77b448969d65778 (diff)
download: troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.gz
troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.bz2
troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.zip
1 files changed, 88 insertions, 86 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index c9e1651..900022f 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -30,15 +30,15 @@ def LoadPersons():
     for person in personreader:
         name = person[header["Name"]]
         name = re.sub("<.*?>", "", name)
-        mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
+        mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
 
         if mname.group(3):
             nickname = mname.group(3)
         else:
             nickname = ""
 
-        firstname, lastname = mname.group(1), mname.group(2) or ""
-
+        firstname, lastname = mname.group(1), mname.group(2) or ""
+
         #print firstname, lastname, "NNN", nickname
         #assert lastname == person[header[""]], person
 
@@ -53,14 +53,14 @@ def LoadPersons():
             yo = models.Expedition.objects.filter(year = year)[0]
             if attended == "1" or attended == "-1":
                 pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
-                pyo.save()
-
-            # error
-            elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001":
-                print "Mike Richardson(2001) error"
+                pyo.save()
+
+            # error
+            elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001":
+                print "Mike Richardson(2001) error"
                 pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
-                pyo.save()
-
+                pyo.save()
+
             
         if name in expoers2008:
             print "2008:", name
@@ -98,8 +98,8 @@ def GetTripPersons(trippeople, expedition):
         if tripperson and tripperson[0] != '*':
             #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
             personyear = expedition.GetPersonExpedition(tripperson)
-            if not personyear:
-                print "NoMatchFor: '%s'" % tripperson    
+            if not personyear:
+                print "NoMatchFor: '%s'" % tripperson    
             res.append(personyear)
             if mul:
                 author = personyear
@@ -107,31 +107,31 @@ def GetTripPersons(trippeople, expedition):
         author = res[-1]
     return res, author
 
-def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu):
+def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu):
     trippersons, author = GetTripPersons(trippeople, expedition)
     lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author)
-    lbo.save()
+    lbo.save()
     print "ttt", date, place
     for tripperson in trippersons:
-        pto = models.PersonTrip(personexpedition = tripperson, place=place, date=date, timeunderground=(tu or ""), 
-                                logbookentry=lbo, is_logbookentryauthor=(tripperson == author))
+        pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=(tu or ""),
+                                logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
         pto.save()
-
-def ParseDate(tripdate, year):
+
+def ParseDate(tripdate, year):
     mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
     mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
     if mdatestandard:
-        assert mdatestandard.group(1) == year, (tripdate, year)
+        assert mdatestandard.group(1) == year, (tripdate, year)
         year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
-    elif mdategoof:
-        assert not mdategoof.group(3) or mdategoof.group(3) == year[:2]
+    elif mdategoof:
+        assert not mdategoof.group(3) or mdategoof.group(3) == year[:2]
         yadd = int(year[:2]) * 100
         day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
     else:
-        assert False, tripdate
+        assert False, tripdate
     return datetime.date(year, month, day)
-
-# 2007, 2008, 2006
+
+# 2007, 2008, 2006
 def Parselogwikitxt(year, expedition, txt):
     trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
     for triphead, triptext in trippara:
@@ -150,11 +150,11 @@ def Parselogwikitxt(year, expedition, txt):
             tu = ""
             #assert tripcave == "Journey", (triphead, triptext)
 
-        ldate = ParseDate(tripdate.strip(), year)
+        ldate = ParseDate(tripdate.strip(), year)
         #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu)
 
-# 2002, 2004, 2005
+# 2002, 2004, 2005
 def Parseloghtmltxt(year, expedition, txt):
     tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
     for trippara in tripparas:
@@ -169,85 +169,86 @@ def Parseloghtmltxt(year, expedition, txt):
         assert s, trippara
 
         tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
-        ldate = ParseDate(tripdate.strip(), year)
-        #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
-        trippeople = re.sub("Ol(?!l)", "Olly", trippeople)        
-        trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)        
-        triptitles = triptitle.split(" - ")
-        if len(triptitles) >= 2:
-            tripcave = triptitles[0]
-        else:
+        ldate = ParseDate(tripdate.strip(), year)
+        #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
+        trippeople = re.sub("Ol(?!l)", "Olly", trippeople)        
+        trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)        
+        triptitles = triptitle.split(" - ")
+        if len(triptitles) >= 2:
+            tripcave = triptitles[0]
+        else:
             tripcave = "UNKNOWN"
         #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
         ltriptext = re.sub("</p>", "", triptext)
         ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+
 
-
-# main parser for pre-2001.  simpler because the data has been hacked so much to fit it
+# main parser for pre-2001.  simpler because the data has been hacked so much to fit it
 def Parseloghtml01(year, expedition, txt):
     tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
     for trippara in tripparas:
         s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
-        assert s, trippara[:100]
+        assert s, trippara[:100]
         tripheader, triptext = s.group(1), s.group(2)
-        mtripid = re.search('<a id="(.*?)"', tripheader)
-        tripid = mtripid and mtripid.group(1) or ""
-        tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
-
-        #print [tripheader]
-        #continue
-
+        mtripid = re.search('<a id="(.*?)"', tripheader)
+        tripid = mtripid and mtripid.group(1) or ""
+        tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
+
+        #print [tripheader]
+        #continue
+
         tripdate, triptitle, trippeople = tripheader.split("|")
-        ldate = ParseDate(tripdate.strip(), year)
-
-        mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
-        if mtu:
-            tu = mtu.group(1)
-            triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
-        else:
-            tu = ""
-
-        triptitles = triptitle.split(" - ")
-        tripcave = triptitles[0].strip()
-
+        ldate = ParseDate(tripdate.strip(), year)
+
+        mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
+        if mtu:
+            tu = mtu.group(1)
+            triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
+        else:
+            tu = ""
+
+        triptitles = triptitle.split(" - ")
+        tripcave = triptitles[0].strip()
+
         ltriptext = re.sub("</p>", "", triptext)
         ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
         ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
-        #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
-
-        #print ldate, trippeople.strip()
-            # could includ the tripid (url link for cross referencing)
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
-
+        #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
+
+        #print ldate, trippeople.strip()
+            # could includ the tripid (url link for cross referencing)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+
 def Parseloghtml03(year, expedition, txt):
     tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
     for trippara in tripparas:
         s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
-        assert s, trippara
+        assert s, trippara
         tripheader, triptext = s.group(1), s.group(2)
-        tripheader = re.sub("&nbsp;", " ", tripheader)
-        tripheader = re.sub("\s+", " ", tripheader).strip()
-        sheader = tripheader.split(" -- ")
-        tu = ""
-        if re.match("T/U|Time underwater", sheader[-1]):
-            tu = sheader.pop()
-        if len(sheader) != 3:
-            print sheader
-        #    continue
-        tripdate, triptitle, trippeople = sheader
-        ldate = ParseDate(tripdate.strip(), year)
-        triptitles = triptitle.split(" , ")
-        if len(triptitles) >= 2:
-            tripcave = triptitles[0]
-        else:
+        tripheader = re.sub("&nbsp;", " ", tripheader)
+        tripheader = re.sub("\s+", " ", tripheader).strip()
+        sheader = tripheader.split(" -- ")
+        tu = ""
+        if re.match("T/U|Time underwater", sheader[-1]):
+            tu = sheader.pop()
+        if len(sheader) != 3:
+            print sheader
+        #    continue
+        tripdate, triptitle, trippeople = sheader
+        ldate = ParseDate(tripdate.strip(), year)
+        triptitles = triptitle.split(" , ")
+        if len(triptitles) >= 2:
+            tripcave = triptitles[0]
+        else:
             tripcave = "UNKNOWN"
         #print tripcave, "---   ppp", triptitle, trippeople, len(triptext)
         ltriptext = re.sub("</p>", "", triptext)
         ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
-        ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
+        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
+        ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
 
 def LoadLogbooks():
     models.LogbookEntry.objects.all().delete()
@@ -272,12 +273,13 @@ def LoadLogbooks():
         expedition = models.Expedition.objects.filter(year = year)[0]
         fin = open(os.path.join(expowebbase, lloc))
         txt = fin.read()
-        fin.close()
-        parsefunc(year, expedition, txt)
+        fin.close()
+        parsefunc(year, expedition, txt)
         
 
 # command line run through the loading stages
 # you can comment out these in turn to control what gets reloaded
-LoadExpos()
-LoadPersons()
+LoadExpos()
+LoadPersons()
 LoadLogbooks()
author	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 05:21:05 +0100
committer	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 05:21:05 +0100
commit	fa6bf04522d2606adbbaf7ff2175326059c55f22 (patch)
tree	136ffa6d29485b00f68e5e7c47944b25c6b5394d /parsers/logbooks.py
parent	5e6bf2436d8dd9f952d6a219c77b448969d65778 (diff)
download	troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.gz troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.tar.bz2 troggle-fa6bf04522d2606adbbaf7ff2175326059c55f22.zip