[svn] Initial troggle checkin

This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM
author: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 05:13:38 +0100
committer: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 05:13:38 +0100
commit: b503d3d588474cc41bffc01eca7654bb8c6f4a42 (patch)
tree: 782956fc07f18a13ae24fc0c045e970c6ba03f04 /parsers
download: troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.tar.gz
troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.tar.bz2
troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.zip
4 files changed, 500 insertions, 0 deletions
diff --git a/parsers/__init__.py b/parsers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/parsers/__init__.py
diff --git a/parsers/cavetab.py b/parsers/cavetab.py
new file mode 100644
index 0000000..bd3d81b
--- /dev/null
+++ b/parsers/cavetab.py
@@ -0,0 +1,272 @@
+# -*- coding: utf-8 -*-
+
+import settings
+import expo.models as models
+import csv
+import time
+import sqlite3
+import re
+import os
+
+##format of CAVETAB2.CSV is
+KatasterNumber = 0
+KatStatusCode = 1
+Entrances = 2
+UnofficialNumber = 3
+MultipleEntrances = 4
+AutogenFile = 5
+LinkFile = 6
+LinkEntrance = 7
+Name = 8
+UnofficialName = 9
+Comment = 10
+Area = 11
+Explorers = 12
+UndergroundDescription = 13
+Equipment = 14
+QMList = 15
+KatasterStatus = 16
+References = 17
+UndergroundCentreLine = 18
+UndergroundDrawnSurvey = 19
+SurvexFile = 20
+Length = 21
+Depth = 22
+Extent = 23
+Notes = 24
+EntranceName = 25
+TagPoint = 26
+OtherPoint = 27
+DescriptionOfOtherPoint = 28
+ExactEntrance = 29
+TypeOfFix = 30
+GPSpreSA = 31
+GPSpostSA = 32
+Northing = 33
+Easting = 34
+Altitude = 35
+Bearings = 36
+Map = 37
+Location = 38
+Approach = 39
+EntranceDescription = 40
+PhotoOfLocation = 41
+Marking = 42
+MarkingComment = 43
+Findability = 44
+FindabilityComment = 45
+
+cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
+caveReader = csv.reader(cavetab)
+caveReader.next() # Strip out column headers
+
+
+def save(x):   #There seems to be an intermitent problem with sqlite and Vista, this should fix it
+    try:
+        x.save()
+    except sqlite3.OperationalError:
+        print "Error"
+        time.sleep(1)
+        save(x)
+
+def html_to_wiki(text):
+    if type(text) != str:
+        return text
+    text = unicode(text, "utf-8")
+    #Characters
+    #text = re.sub("&uuml;", u"\xfc", text)
+    #text = re.sub("&ouml;", u"\xf6", text)
+    #text = re.sub("&auml;", u"\xe4", text)
+    #text = re.sub("&deg;", u"\xb0", text)
+    #text = re.sub("&copy;", u"\xa9", text)
+    #text = re.sub("&amp;", u"\x26", text)
+    #text = re.sub("&szlig;", u"\xdf", text)
+    #text = re.sub("&szlig;", u"\xdf", text)
+    #text = re.sub("&lt;", u"<", text)
+    #text = re.sub("&gt;", u">", text)
+    #text = re.sub("&egrave;", u"\xe8", text)
+    #text = re.sub("&eacute;", u"\xe9", text)
+    #text = re.sub("&quote;", u'"', text)
+    #text = re.sub("&quot;", u'"', text)
+    #text = re.sub("&Ouml;", u'\xd6', text)
+    #text = re.sub("&times;", u'"', text)
+
+    #text = re.sub("&(.*);", "/1", text)
+    #if s:
+    #    print s.groups()
+    #Lists
+    text = re.sub("^</p>(.*)", r"\1", text)
+    text = re.sub("(.*)<p>$", r"\1", text)
+    out = ""
+    lists = ""
+    while text:
+        mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+        munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
+        mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+        munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
+        mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+        ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
+        def min_(i, l):
+            try:
+                v = i.groups()[0]
+                l.remove(len(v))
+                return len(v) < min(l, 1000000000)
+            except:
+                return False
+        if min_(mstar, ms):
+            lists += "*"
+            pre, val, post = mstar.groups()
+            out += pre + "\n" + lists + " " + val
+            text = post
+        elif min_(mhash, ms):
+            lists += "#"
+            pre, val, post = mhash.groups()
+            out += pre + "\n" + lists + " " + val
+            text = post
+        elif min_(mitem, ms):
+            pre, val, post = mitem.groups()
+            out += "\n" + lists + " " + val
+            text = post
+        elif min_(munstar, ms):
+            lists = lists[:-1]
+            text = munstar.groups()[1]
+        elif min_(munhash, ms):
+            lists.pop()
+            text = munhash.groups()[1]
+        else:
+            out += text
+            text = ""
+    text2 = out
+    while text2:
+        mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
+        if mtag:
+            text2 = mtag.groups()[2]
+            print mtag.groups()[1]
+        else:
+            text2 = ""
+    return out
+
+for katArea in ['1623', '1626']:
+    if not models.Area.objects.filter(short_name = katArea):
+        newArea = models.Area(short_name = katArea)
+        save(newArea)
+area1626 = models.Area.objects.filter(short_name = '1626')[0]
+area1623 = models.Area.objects.filter(short_name = '1623')[0]
+
+counter=0
+for line in caveReader :
+    if line[Area] == 'nonexistent':
+        continue
+    entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
+    if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
+        args = {}
+        def addToArgs(CSVname, modelName):
+            if line[CSVname]:
+                args[modelName] = html_to_wiki(line[CSVname])
+        addToArgs(KatasterNumber, "kataster_number")
+        addToArgs(KatStatusCode, "kataster_code")
+        addToArgs(UnofficialNumber, "unofficial_number")
+        addToArgs(Name, "official_name")
+        addToArgs(Comment, "notes")
+        addToArgs(Explorers, "explorers")
+        addToArgs(UndergroundDescription, "underground_description")
+        addToArgs(Equipment, "equipment")
+        addToArgs(KatasterStatus, "kataster_status")
+        addToArgs(References, "references")
+        addToArgs(UndergroundCentreLine, "underground_centre_line")
+        addToArgs(UndergroundDrawnSurvey, "survey")
+        addToArgs(Length, "length")
+        addToArgs(Depth, "depth")
+        addToArgs(Extent, "extent")
+        addToArgs(SurvexFile, "survex_file")
+        addToArgs(Notes, "notes")
+
+        newCave = models.Cave(**args)
+        save(newCave)
+
+        if line[Area]:
+            if line[Area] ==  "1626":
+                newCave.area.add(area1626)
+            else:
+                area = models.Area.objects.filter(short_name = line[Area])
+                if area:
+                    newArea = area[0]
+                else:
+                    newArea = models.Area(short_name = line[Area], parent = area1623)
+                    save(newArea)
+                newCave.area.add(newArea)
+        else:
+           newCave.area.add(area1623)
+
+        save(newCave)
+
+        if line[UnofficialName]:
+            newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
+            save(newUnofficialName)
+    if line[MultipleEntrances] == '' or \
+        line[MultipleEntrances] == 'entrance' or \
+        line[MultipleEntrances] == 'last entrance':
+        args = {}
+        def addToArgs(CSVname, modelName):
+            if line[CSVname]:
+                args[modelName] = html_to_wiki(line[CSVname])
+        def addToArgsViaDict(CSVname, modelName, dictionary):
+            if line[CSVname]:
+                args[modelName] = dictionary[html_to_wiki(line[CSVname])]
+        addToArgs(EntranceName, 'name')
+        addToArgs(Explorers, 'explorers')
+        addToArgs(Map, 'map_description')
+        addToArgs(Location, 'location_description')
+        addToArgs(Approach, 'approach')
+        addToArgs(EntranceDescription, 'entrance_description')
+        addToArgs(UndergroundDescription, 'underground_description')
+        addToArgs(PhotoOfLocation, 'photo')
+        addToArgsViaDict(Marking, 'marking', {"Paint": "P",
+                                              "Paint (?)": "P?",
+                                              "Tag": "T",
+                                              "Tag (?)": "T?",
+                                              "Retagged": "R",
+                                              "Retag": "R",
+                                              "Spit": "S",
+                                              "Spit (?)": "S?",
+                                              "Unmarked": "U",
+                                              "": "?",
+                                              })
+        addToArgs(MarkingComment, 'marking_comment')
+        addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
+                                                      "Lost": "L",
+                                                      "Refindable": "R",
+                                                      "": "?",
+                                                      "?": "?",
+                                                      })
+        addToArgs(FindabilityComment, 'findability_description')
+        addToArgs(Easting, 'easting')
+        addToArgs(Northing, 'northing')
+        addToArgs(Altitude, 'alt')
+        addToArgs(DescriptionOfOtherPoint, 'other_description')
+        def addToArgsSurveyStation(CSVname, modelName):
+            if line[CSVname]:
+                surveyPoint = models.SurveyStation(name = line[CSVname])
+                save(surveyPoint)
+                args[modelName] = html_to_wiki(surveyPoint)
+        addToArgsSurveyStation(TagPoint, 'tag_station')
+        addToArgsSurveyStation(ExactEntrance, 'exact_station')
+        addToArgsSurveyStation(OtherPoint, 'other_station')
+        addToArgs(OtherPoint, 'other_description')
+        if line[GPSpreSA]:
+            addToArgsSurveyStation(GPSpreSA, 'other_station')
+            args['other_description'] = 'pre selective availability GPS'
+        if line[GPSpostSA]:
+            addToArgsSurveyStation(GPSpostSA, 'other_station')
+            args['other_description'] = 'post selective availability GPS'
+        addToArgs(Bearings, 'bearings')
+        newEntrance = models.Entrance(**args)
+        save(newEntrance)
+
+        if line[Entrances]:
+            entrance_letter = line[Entrances]
+        else:
+            entrance_letter = ''
+
+        newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
+        save(newCaveAndEntrance)
+\ No newline at end of file
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
new file mode 100644
index 0000000..5c38d41
--- /dev/null
+++ b/parsers/logbooks.py
@@ -0,0 +1,197 @@
+#.-*- coding: utf-8 -*-
+
+import settings
+import expo.models as models
+import csv
+import sqlite3
+import re
+import os
+import datetime
+
+# Dave Johnson (Stonker) is hacked -- are there two of this DJ name
+# Dave Collins (Scout) is hacked
+# Letty ten Harkel has middle , tu = timeug or ""name removed
+# the <span lang=""sv""> have been removed
+# Dave Milne (Lummat)
+# Ben van Millingen
+# Rebecca Lawson (Becka)
+
+persontab = open(os.path.join(settings.EXPOWEB, "noinfo", "folk.csv"))
+personreader = csv.reader(persontab)
+headers = personreader.next()
+header = dict(zip(headers, range(len(headers))))
+
+
+def LoadExpos():
+    models.Expedition.objects.all().delete()
+    y = models.Expedition(year = "2008", name = "CUCC expo2008")
+    y.save()
+    for year in headers[5:]:
+        y = models.Expedition(year = year, name = "CUCC expo%s" % y)
+        y.save()
+
+def LoadPersons():
+    models.Person.objects.all().delete()
+    models.PersonExpedition.objects.all().delete()
+    expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+    expomissing = set(expoers2008)
+
+    for person in personreader:
+        name = person[header["Name"]]
+        name = re.sub("<.*?>", "", name)
+        lname = name.split()
+        if len(lname) >= 2:
+            firstname, lastname = lname[0], lname[1]
+        else:
+            firstname, lastname = lname[0], ""
+        print firstname, lastname
+        #assert lastname == person[header[""]], person
+        pObject = models.Person(first_name = firstname,
+                                last_name = lastname,
+                                is_guest = person[header["Guest"]] == "1",
+                                is_vfho = person[header["VfHO member"]],
+                                mug_shot = person[header["Mugshot"]])
+        pObject.save()
+
+        for year, attended in zip(headers, person)[5:]:
+            yo = models.Expedition.objects.filter(year = year)[0]
+            if attended == "1" or attended == "-1":
+                pyo = models.PersonExpedition(person = pObject, expedition = yo)
+                pyo.save()
+
+        if name in expoers2008:
+            print "2008:", name
+            expomissing.discard(name)
+            yo = models.Expedition.objects.filter(year = "2008")[0]
+            pyo = models.PersonExpedition(person = pObject, expedition = yo)
+            pyo.save()
+
+
+    print expomissing
+    for name in expomissing:
+        firstname, lastname = name.split()
+        pObject = models.Person(first_name = firstname,
+                                last_name = lastname,
+                                is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
+                                is_vfho = False,
+                                mug_shot = "")
+        pObject.save()
+        yo = models.Expedition.objects.filter(year = "2008")[0]
+        pyo = models.PersonExpedition(person = pObject, expedition = yo)
+        pyo.save()
+
+
+#
+# the logbook loading section
+#
+def GetTripPersons(trippeople, expedition):
+    res = [ ]
+    author = None
+    for tripperson in re.split(",|\+|&| and ", trippeople):
+        tripperson = tripperson.strip()
+        mul = re.match("<u>(.*?)</u>$", tripperson)
+        if mul:
+            tripperson = mul.group(1)
+        if tripperson and tripperson[0] != '*':
+            #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
+            personyear = expedition.GetPersonExpedition(tripperson)
+            print personyear
+            res.append(personyear)
+            if mul:
+                author = personyear
+    if not author:
+        author = res[-1]
+    return res, author
+
+def Parselogwikitxt(year, personyearmap, txt):
+    trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
+    for triphead, triptext in trippara:
+        tripheadp = triphead.split("|")
+        assert len(tripheadp) == 3, tripheadp
+        tripdate, tripplace, trippeople = tripheadp
+        tripsplace = tripplace.split(" - ")
+        tripcave = tripsplace[0]
+
+        tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
+        if tul:
+            #assert len(tul) <= 1, (triphead, triptext)
+            #assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
+            triptime = tul[0][0]
+        else:
+            triptime = ""
+            #assert tripcave == "Journey", (triphead, triptext)
+
+        assert re.match("\d\d\d\d-\d\d-\d\d", tripdate), tripdate
+        ldate = datetime.date(int(tripdate[:4]), int(tripdate[5:7]), int(tripdate[8:10]))
+        lbo = models.LogbookEntry(date = ldate, cave = tripcave, title = tripsplace[-1], text = triptext, tu = triptime)
+        lbo.save()
+
+        trippersons, author = GetTripPersons(trippeople, personyearmap)
+        for tripperson in trippersons:
+            lbo.cavers.add(tripperson)
+        # add the author
+
+def Parseloghtmltxt(year, expedition, txt):
+    tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
+    for trippara in tripparas:
+        s = re.match('''(?x)\s*(?:<a\s+id="(.*?)"\s*/>)?
+                            \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>
+                            \s*<div\s+class="trippeople">(.*?)</div>
+                            \s*<div\s+class="triptitle">(.*?)</div>
+                            ([\s\S]*?)
+                            \s*(?:<div\s+class="timeug">(.*?)</div>)?
+                            \s*$
+                     ''', trippara)
+        assert s, trippara
+
+        tripid, tripid1, tripdate, trippeople, triptitle, triptext, timeug = s.groups()
+        mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
+        mdategoof = re.match("(\d\d?)/(\d)/(\d\d)", tripdate)
+        if mdatestandard:
+            year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
+        elif mdategoof:
+            day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(3)) + 2000
+        else:
+            assert False, tripdate
+        ldate = datetime.date(year, month, day)
+        #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
+        trippersons, author = GetTripPersons(trippeople, expedition)
+        tripcave = ""
+        lbo = models.LogbookEntry(date = ldate, place = tripcave, title = triptitle, text = triptext, author=author)
+        lbo.save()
+        tu = timeug or ""
+
+        for tripperson in trippersons:
+            pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo)
+            pto.save()
+
+
+
+def LoadLogbooks():
+    models.LogbookEntry.objects.all().delete()
+    expowebbase = os.path.join(settings.EXPOWEB, "years")  # this could be a url
+    yearlinks = [ 
+#                    ("2008", "2008/logbook/2008logbook.txt"), 
+#                    ("2007", "2007/logbook/2007logbook.txt"), 
+#                    ("2005", "2005/logbook.html"), 
+                    ("2004", "2004/logbook.html"), 
+#                    ("2003", "2003/logbook.html"), 
+                ]
+
+    for year, lloc in yearlinks:
+        expedition = models.Expedition.objects.filter(year = year)[0]
+        fin = open(os.path.join(expowebbase, lloc))
+        txt = fin.read()
+        fin.close()
+        #print personyearmap
+        if year >= "2007":
+            Parselogwikitxt(year, personyearmap, txt)
+        else:
+            Parseloghtmltxt(year, expedition, txt)
+
+# command line run through the loading stages
+LoadExpos()
+LoadPersons()
+LoadLogbooks()
+
+
diff --git a/parsers/survex.py b/parsers/survex.py
new file mode 100644
index 0000000..0f75e06
--- /dev/null
+++ b/parsers/survex.py
@@ -0,0 +1,31 @@
+import settings
+import expo.models as models
+import re
+import os
+
+def readFile(filename):
+    for line in fileIterator(settings.SURVEX_DATA, filename):
+        print line
+
+re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
+re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
+
+def fileIterator(directory, filename):
+    f = open(os.path.join(directory, filename + ".svx"), "rb")
+    for line in f.readlines():
+        include_extension = re_include_extension.match(line)
+        include_no_extension = re_include_no_extension.match(line)
+        def a(include):
+            link = re.split(r"/|\\", include)
+            print os.path.join(directory, *link[:-1]), link[-1]
+            return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
+        if include_extension:
+            for b in a(include_extension.groups()[0]):
+                yield b
+        elif include_no_extension:
+            for b in a(include_no_extension.groups()[0]):
+                yield b
+        else:
+            yield line
+
+readFile("all")
+\ No newline at end of file
author	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 05:13:38 +0100
committer	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 05:13:38 +0100
commit	b503d3d588474cc41bffc01eca7654bb8c6f4a42 (patch)
tree	782956fc07f18a13ae24fc0c045e970c6ba03f04 /parsers
download	troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.tar.gz troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.tar.bz2 troggle-b503d3d588474cc41bffc01eca7654bb8c6f4a42.zip