summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorsubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 05:14:03 +0100
committersubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 05:14:03 +0100
commita33700ca1e69188873af3009c8562ac635c08226 (patch)
tree6df40615cebdcd18d330c023c141e23fa64b4461 /parsers
parent302f4a1a0f9ced0d8e81c5462f7feff65c5b349b (diff)
downloadtroggle-a33700ca1e69188873af3009c8562ac635c08226.tar.gz
troggle-a33700ca1e69188873af3009c8562ac635c08226.tar.bz2
troggle-a33700ca1e69188873af3009c8562ac635c08226.zip
[svn] we can parse one 2004 logbook in here. corrections made to folk.csv
Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8036 by julian @ 10/26/2008 11:27 PM
Diffstat (limited to 'parsers')
-rw-r--r--parsers/logbooks.py56
-rw-r--r--parsers/survex.py137
2 files changed, 106 insertions, 87 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 5c38d41..df2c9a9 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -3,7 +3,6 @@
import settings
import expo.models as models
import csv
-import sqlite3
import re
import os
import datetime
@@ -24,11 +23,12 @@ header = dict(zip(headers, range(len(headers))))
def LoadExpos():
models.Expedition.objects.all().delete()
- y = models.Expedition(year = "2008", name = "CUCC expo2008")
- y.save()
- for year in headers[5:]:
- y = models.Expedition(year = year, name = "CUCC expo%s" % y)
+ years = headers[5:]
+ years.append("2008")
+ for year in years:
+ y = models.Expedition(year = year, name = "CUCC expo%s" % year)
y.save()
+ print "lll", years
def LoadPersons():
models.Person.objects.all().delete()
@@ -40,44 +40,59 @@ def LoadPersons():
name = person[header["Name"]]
name = re.sub("<.*?>", "", name)
lname = name.split()
- if len(lname) >= 2:
- firstname, lastname = lname[0], lname[1]
+ mbrack = re.match("\((.*?)\)", lname[-1])
+
+ if mbrack:
+ nickname = mbrack.group(1)
+ del lname[-1]
+ elif name == "Anthony Day":
+ nickname = "Dour"
else:
+ nickname = ""
+
+ if len(lname) == 3: # van something
+ firstname, lastname = lname[0], "%s %s" % (lname[1], lname[2])
+ elif len(lname) == 2:
+ firstname, lastname = lname[0], lname[1]
+ elif len(lname) == 1:
firstname, lastname = lname[0], ""
- print firstname, lastname
+ else:
+ assert False, lname
+ #print firstname, lastname
#assert lastname == person[header[""]], person
+
pObject = models.Person(first_name = firstname,
last_name = lastname,
- is_guest = person[header["Guest"]] == "1",
is_vfho = person[header["VfHO member"]],
mug_shot = person[header["Mugshot"]])
pObject.save()
+ is_guest = person[header["Guest"]] == "1" # this is really a per-expo catagory; not a permanent state
for year, attended in zip(headers, person)[5:]:
yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1":
- pyo = models.PersonExpedition(person = pObject, expedition = yo)
+ pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
pyo.save()
if name in expoers2008:
print "2008:", name
expomissing.discard(name)
yo = models.Expedition.objects.filter(year = "2008")[0]
- pyo = models.PersonExpedition(person = pObject, expedition = yo)
+ pyo = models.PersonExpedition(person = pObject, expedition = yo, is_guest=is_guest)
pyo.save()
- print expomissing
+ # this fills in those peopl for whom 2008 was their first expo
for name in expomissing:
firstname, lastname = name.split()
+ is_guest = name in ["Eeva Makiranta", "Kieth Curtis"]
pObject = models.Person(first_name = firstname,
last_name = lastname,
- is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
is_vfho = False,
mug_shot = "")
pObject.save()
yo = models.Expedition.objects.filter(year = "2008")[0]
- pyo = models.PersonExpedition(person = pObject, expedition = yo)
+ pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname="", is_guest=is_guest)
pyo.save()
@@ -95,7 +110,7 @@ def GetTripPersons(trippeople, expedition):
if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson)
- print personyear
+ #print personyear
res.append(personyear)
if mul:
author = personyear
@@ -154,6 +169,7 @@ def Parseloghtmltxt(year, expedition, txt):
else:
assert False, tripdate
ldate = datetime.date(year, month, day)
+ print "ttt", tripdate
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
trippersons, author = GetTripPersons(trippeople, expedition)
tripcave = ""
@@ -162,7 +178,8 @@ def Parseloghtmltxt(year, expedition, txt):
tu = timeug or ""
for tripperson in trippersons:
- pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo)
+ pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu,
+ logbookentry=lbo, is_logbookentryauthor=(tripperson == author))
pto.save()
@@ -183,15 +200,16 @@ def LoadLogbooks():
fin = open(os.path.join(expowebbase, lloc))
txt = fin.read()
fin.close()
- #print personyearmap
if year >= "2007":
Parselogwikitxt(year, personyearmap, txt)
else:
Parseloghtmltxt(year, expedition, txt)
+
# command line run through the loading stages
-LoadExpos()
-LoadPersons()
+# you can comment out these in turn to control what gets reloaded
+#LoadExpos()
+#LoadPersons()
LoadLogbooks()
diff --git a/parsers/survex.py b/parsers/survex.py
index f6d11e4..99044cb 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -1,68 +1,69 @@
-import settings
-import expo.models as models
-import re
-import os
-
-re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
-re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
-re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE)
-re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE)
-
-def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
- try:
- x.save()
- except sqlite3.OperationalError:
- print "Error"
- time.sleep(1)
- save(x)
-
-def fileIterator(directory, filename):
- survex_file = os.path.join(directory, filename + ".svx")
- f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb")
- char = 0
- for line in f.readlines():
- line = unicode(line, "latin1")
- include_extension = re_include_extension.match(line)
- include_no_extension = re_include_no_extension.match(line)
- def a(include):
- link = re.split(r"/|\\", include)
- print os.path.join(directory, *link[:-1]), link[-1]
- return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
- if include_extension:
- for sf, c, l in a(include_extension.groups()[0]):
- yield sf, c, l
- elif include_no_extension:
- for sf, c, l in a(include_no_extension.groups()[0]):
- yield sf, c, l
- else:
- yield survex_file, char, line
- char = char + len(line)
-
-def make_model(name, parent, iter_lines, sf, c, l):
- if parent:
- m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l)
- else:
- m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l)
- save(m)
- for survex_file, count, line in iter_lines:
- begin = re_begin.match(line.split(";")[0])
- end = re_end.match(line.split(";")[0])
- if begin:
- make_model(begin.groups()[0], m, iter_lines, survex_file, count, line)
- elif end:
- m.text = m.text + line
- m.end_file = survex_file
- m.end_char = count
- save(m)
- assert (end.groups()[0]).lower() == (name).lower()
- return None
- else:
- m.text = m.text + line
- m.text = m.text + line
- m.end_file = survex_file
- m.end_char = count
- save(m)
-
-
-filename = "all"
-make_model("", None, fileIterator("", filename), filename, 0, "")
+import settings
+import expo.models as models
+import re
+import os
+
+re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
+re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
+re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE)
+re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE)
+
+def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
+ #try:
+ x.save()
+ #except Exception: #sqlite3.OperationalError:
+ # print "Error"
+ # time.sleep(1)
+ # save(x)
+
+def fileIterator(directory, filename):
+ survex_file = os.path.join(directory, filename + ".svx")
+ f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb")
+ char = 0
+ for line in f.readlines():
+ line = unicode(line, "latin1").decode("utf-8")
+ include_extension = re_include_extension.match(line)
+ include_no_extension = re_include_no_extension.match(line)
+ def a(include):
+ link = re.split(r"/|\\", include)
+ print os.path.join(directory, *link[:-1]), link[-1]
+ return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
+ if include_extension:
+ for sf, c, l in a(include_extension.groups()[0]):
+ yield sf, c, l
+ elif include_no_extension:
+ for sf, c, l in a(include_no_extension.groups()[0]):
+ yield sf, c, l
+ else:
+ yield survex_file, char, line
+ char = char + len(line)
+
+def make_model(name, parent, iter_lines, sf, c, l):
+ if parent:
+ m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l)
+ else:
+ m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l)
+ save(m)
+ for survex_file, count, line in iter_lines:
+ begin = re_begin.match(line.split(";")[0])
+ end = re_end.match(line.split(";")[0])
+ if begin:
+ make_model(begin.groups()[0], m, iter_lines, survex_file, count, line)
+ elif end:
+ m.text = m.text + line
+ m.end_file = survex_file
+ m.end_char = count
+ print len(m.text)
+ save(m)
+ assert (end.groups()[0]).lower() == (name).lower()
+ return None
+ else:
+ m.text = m.text + line
+ m.text = m.text + line
+ m.end_file = survex_file
+ m.end_char = count
+ save(m)
+
+
+filename = "all"
+make_model("", None, fileIterator("", filename), filename, 0, "")