summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorsubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 06:15:48 +0100
committersubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-05-13 06:15:48 +0100
commit625b2156e388a92d57fa446c931bbf410f5a4e29 (patch)
tree8866421569ba567be82b58a45d90e142c46e5c0a /parsers
parent1a36856b407684a9d48f04e170a160b3c04aa706 (diff)
downloadtroggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.gz
troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.bz2
troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.zip
[svn] Weeks of local changes.
- Import is now non-destructive - Parsers write output to a log file (path be specified in settings) - databaseReset.py content been divided into separate functions which can be called for varying levels of deletion and importing - control panel (view, template, urlpattern) added for deleting and importing - Logins and signup fixed - CaveArea model updated, view, hierarchical url patterns, and beginning of template added - New site style Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8324 by cucc @ 5/3/2009 5:56 AM
Diffstat (limited to 'parsers')
-rw-r--r--parsers/QMs.py79
-rw-r--r--parsers/cavetab.py81
-rw-r--r--parsers/logbooks.py44
-rw-r--r--parsers/people.py49
4 files changed, 169 insertions, 84 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index c7e6789..6505ddd 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,28 +1,40 @@
+# -*- coding: UTF-8 -*-
+
import csv
import settings
from expo.models import QM, LogbookEntry, Cave
from datetime import *
+from helpers import save_carefully
import re
-#sorry that the below code is ugly. I'll fix it sometime, really! - AC
+def deleteQMs():
+ QM.objects.all().delete()
-QM.objects.all().delete()
+def parseCaveQMs(cave,inputFile):
+ """Runs through the CSV file at inputFile (which is a relative path from expoweb) and saves each QM as a QM instance."""
-def parseCaveQMs(cave,pathToCSV):
if cave=='stein':
try:
- steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
+ steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
except Cave.DoesNotExist:
print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
return
elif cave=='hauch':
try:
- hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
+ hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
+ except Cave.DoesNotExist:
+ print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
+ return
+ elif cave =='kh':
+ try:
+ kh=Cave.objects.get(official_name="Kaninchenh&ouml;hle")
except Cave.DoesNotExist:
print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
- return
-
- qmPath = settings.EXPOWEB+pathToCSV
+ for file in inputFile:
+ parse_KH_QMs(kh, inputFile=file)
+ return
+
+ qmPath = settings.EXPOWEB+inputFile
qmCSVContents = open(qmPath,'r')
dialect=csv.Sniffer().sniff(qmCSVContents.read())
qmCSVContents.seek(0,0)
@@ -55,13 +67,54 @@ def parseCaveQMs(cave,pathToCSV):
newQM.ticked_off_by=placeholder
newQM.comment=line[6]
- newQM.save()
- print "QM "+str(newQM) + ' added to database\r',
- except KeyError:
+ try:
+ preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it
+ if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it
+ preexistingQM.delete()
+ newQM.save()
+ print "overwriting " + str(preexistingQM) +"\r",
+
+ else: # otherwise, print that it was ignored
+ print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
+
+ except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
+ newQM.save()
+ print "QM "+str(newQM) + ' added to database\r',
+
+ except KeyError: #check on this one
continue
# except IndexError:
# print "Index error in " + str(line)
# continue
-parseCaveQMs(cave='stein',pathToCSV=r"smkridge/204/qm.csv")
-parseCaveQMs(cave='hauch',pathToCSV=r"smkridge/234/qm.csv")
+def parse_KH_QMs(kh, inputFile):
+ """import QMs from the 1623-161 (Kaninchenhöhle) html pages
+ """
+ khQMs=open(settings.EXPOWEB+inputFile,'r')
+ khQMs=khQMs.readlines()
+ for line in khQMs:
+ res=re.search('name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
+ if res:
+ res=res.groupdict()
+ year=int(res['year'])
+ #check if placeholder exists for given year, create it if not
+ placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 161", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date((year), 1, 1),"cave":kh})
+ lookupArgs={
+ 'found_by':placeholder,
+ 'number':res['number']
+ }
+ nonLookupArgs={
+ 'grade':res['grade'],
+ 'nearest_station':res['nearest_station'],
+ 'location_description':res['description']
+ }
+
+ if
+
+ save_carefully(QM,lookupArgs,nonLookupArgs)
+
+
+parseCaveQMs(cave='kh', inputFile=r"smkridge/161/qmtodo.htm")
+parseCaveQMs(cave='stein',inputFile=r"smkridge/204/qm.csv")
+parseCaveQMs(cave='hauch',inputFile=r"smkridge/234/qm.csv")
+
diff --git a/parsers/cavetab.py b/parsers/cavetab.py
index 8fc34d8..940d867 100644
--- a/parsers/cavetab.py
+++ b/parsers/cavetab.py
@@ -9,6 +9,8 @@ import time
import re
import os
+from troggle.helpers import save_carefully
+
##format of CAVETAB2.CSV is
KatasterNumber = 0
KatStatusCode = 1
@@ -136,15 +138,20 @@ def html_to_wiki(text):
text2 = ""
return out
-def LoadCaveTab():
+def LoadCaveTab(logfile=None):
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
caveReader = csv.reader(cavetab)
caveReader.next() # Strip out column headers
-
+
+ if logfile:
+ logfile.write("Beginning to import caves from "+str(cavetab)+"\n"+"-"*60+"\n")
+
for katArea in ['1623', '1626']:
if not models.Area.objects.filter(short_name = katArea):
newArea = models.Area(short_name = katArea)
newArea.save()
+ if logfile:
+ logfile.write("Added area "+str(newArea.short_name)+"\n")
area1626 = models.Area.objects.filter(short_name = '1626')[0]
area1623 = models.Area.objects.filter(short_name = '1623')[0]
@@ -153,33 +160,43 @@ def LoadCaveTab():
if line[Area] == 'nonexistent':
continue
entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
- if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
+ if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='': #When true, this line contains an actual cave, otherwise it is an extra entrance.
args = {}
+ defaultArgs = {}
+
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
+
+ def addToDefaultArgs(CSVname, modelName): #This has to do with the non-destructive import. These arguments will be passed as the "default" dictionary in a get_or_create
+ if line[CSVname]:
+ defaultArgs[modelName] = html_to_wiki(line[CSVname])
+
+ # The attributes added using "addToArgs" will be used to look up an existing cave. Those added using "addToDefaultArgs" will not.
addToArgs(KatasterNumber, "kataster_number")
- addToArgs(KatStatusCode, "kataster_code")
+ addToDefaultArgs(KatStatusCode, "kataster_code")
addToArgs(UnofficialNumber, "unofficial_number")
addToArgs(Name, "official_name")
- addToArgs(Comment, "notes")
- addToArgs(Explorers, "explorers")
- addToArgs(UndergroundDescription, "underground_description")
- addToArgs(Equipment, "equipment")
- addToArgs(KatasterStatus, "kataster_status")
- addToArgs(References, "references")
- addToArgs(UndergroundCentreLine, "underground_centre_line")
- addToArgs(UndergroundDrawnSurvey, "survey")
- addToArgs(Length, "length")
- addToArgs(Depth, "depth")
- addToArgs(Extent, "extent")
- addToArgs(SurvexFile, "survex_file")
- addToArgs(Notes, "notes")
+ addToDefaultArgs(Comment, "notes")
+ addToDefaultArgs(Explorers, "explorers")
+ addToDefaultArgs(UndergroundDescription, "underground_description")
+ addToDefaultArgs(Equipment, "equipment")
+ addToDefaultArgs(KatasterStatus, "kataster_status")
+ addToDefaultArgs(References, "references")
+ addToDefaultArgs(UndergroundCentreLine, "underground_centre_line")
+ addToDefaultArgs(UndergroundDrawnSurvey, "survey")
+ addToDefaultArgs(Length, "length")
+ addToDefaultArgs(Depth, "depth")
+ addToDefaultArgs(Extent, "extent")
+ addToDefaultArgs(SurvexFile, "survex_file")
+ addToDefaultArgs(Notes, "notes")
- newCave = models.Cave(**args)
- newCave.save()
-
- if line[Area]:
+ newCave, created=save_carefully(models.Cave, lookupAttribs=args, nonLookupAttribs=defaultArgs)
+ if logfile:
+ logfile.write("Added cave "+str(newCave)+"\n")
+
+ #If we created a new cave, add the area to it. This does mean that if a cave's identifying features have not changed, areas will not be updated from csv.
+ if created and line[Area]:
if line[Area] == "1626":
newCave.area.add(area1626)
else:
@@ -190,16 +207,20 @@ def LoadCaveTab():
newArea = models.Area(short_name = line[Area], parent = area1623)
newArea.save()
newCave.area.add(newArea)
- else:
+ elif created:
newCave.area.add(area1623)
-
+
newCave.save()
+ if logfile:
+ logfile.write("Added area "+line[Area]+" to cave "+str(newCave)+"\n")
- if line[UnofficialName]:
- newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
- newUnofficialName.save()
-
- if line[MultipleEntrances] == '' or \
+ if created and line[UnofficialName]:
+ newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
+ newUnofficialName.save()
+ if logfile:
+ logfile.write("Added unofficial name "+str(newUnofficialName)+" to cave "+str(newCave)+"\n")
+
+ if created and line[MultipleEntrances] == '' or \
line[MultipleEntrances] == 'entrance' or \
line[MultipleEntrances] == 'last entrance':
args = {}
@@ -258,6 +279,8 @@ def LoadCaveTab():
addToArgs(Bearings, 'bearings')
newEntrance = models.Entrance(**args)
newEntrance.save()
+ if logfile:
+ logfile.write("Added entrance "+str(newEntrance)+"\n")
if line[Entrances]:
entrance_letter = line[Entrances]
@@ -266,6 +289,8 @@ def LoadCaveTab():
newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
newCaveAndEntrance.save()
+ if logfile:
+ logfile.write("Added CaveAndEntrance "+str(newCaveAndEntrance)+"\n")
# lookup function modelled on GetPersonExpeditionNameLookup
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 976d71a..7c8364a 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -13,6 +13,7 @@ import re
import datetime
import os
+from troggle.helpers import save_carefully
#
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
@@ -72,21 +73,23 @@ def GetTripCave(place): #need to be fuzzier about matching h
noncaveplaces = [ "Journey", "Loser Plateau" ]
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+ """ saves a logbook entry and related persontrips """
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
# tripCave = GetTripCave(place)
-
- lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author, expedition=expedition)
+ #
lplace = place.lower()
if lplace not in noncaveplaces:
- lbo.cave=GetCaveLookup().get(lplace)
- #print "pppp %s |%s|" % (lplace, str(lbo.cave))
-
- lbo.save()
- #print "ttt", date, place
+ cave=GetCaveLookup().get(lplace)
+
+ #Check for an existing copy of the current entry, and save
+ lookupAttribs={'date':date, 'title':title[:50]}
+ nonLookupAttribs={'place':place, 'text':text, 'author':author, 'expedition':expedition, 'cave':cave}
+ lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
+
for tripperson, time_underground in trippersons:
- pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=time_underground,
- logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
- pto.save()
+ lookupAttribs={'person_expedition':tripperson, 'date':date}
+ nonLookupAttribs={'place':place,'time_underground':time_underground,'logbook_entry':lbo,'is_logbook_entry_author':(tripperson == author)}
+ save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year):
@@ -235,7 +238,7 @@ def Parseloghtml03(year, expedition, txt):
yearlinks = [
("2008", "2008/2008logbook.txt", Parselogwikitxt),
- ("2007", "2007/2007logbook.txt", Parselogwikitxt),
+ #("2007", "2007/2007logbook.txt", Parselogwikitxt),
("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt),
("2005", "2005/logbook.html", Parseloghtmltxt),
("2004", "2004/logbook.html", Parseloghtmltxt),
@@ -299,15 +302,17 @@ def SetDatesFromLogbookEntries(expedition):
# logbookentry.href = "%s" % logbookentry.date
# logbookentry.save()
# lprevlogbookentry = logbookentry
- for logbookentry in expedition.logbookentry_set.all():
- logbookentry.slug = slugify(logbookentry.title)
- logbookentry.save()
+
def LoadLogbookForExpedition(expedition):
- print "deleting logbooks for", expedition
- expedition.logbookentry_set.all().delete()
- models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+ """ Parses all logbook entries for one expedition """
+
+ #We're checking for stuff that's changed in admin before deleting it now.
+ #print "deleting logbooks for", expedition
+ #expedition.logbookentry_set.all().delete()
+ #models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+
expowebbase = os.path.join(settings.EXPOWEB, "years")
year = str(expedition.year)
for lyear, lloc, parsefunc in yearlinks:
@@ -322,7 +327,10 @@ def LoadLogbookForExpedition(expedition):
def LoadLogbooks():
- models.LogbookEntry.objects.all().delete()
+ """ This is the master function for parsing all logbooks into the Troggle database. Requires yearlinks, which is a list of tuples for each expedition with expedition year, logbook path, and parsing function. """
+
+ #Deletion has been moved to a seperate function to enable the non-destructive importing
+ #models.LogbookEntry.objects.all().delete()
expowebbase = os.path.join(settings.EXPOWEB, "years")
#yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
#yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
diff --git a/parsers/people.py b/parsers/people.py
index 269f13b..23654d2 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -7,6 +7,7 @@ import re
import datetime
import os
import shutil
+from helpers import save_carefully
# Julian: the below code was causing errors and it seems like a duplication of the above. Hope I haven't broken anything by commenting it. -Aaron
#
@@ -72,47 +73,45 @@ def LoadPersonsExpos():
print "Loading personexpeditions"
models.Person.objects.all().delete()
models.PersonExpedition.objects.all().delete()
- expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
- expomissing = set(expoers2008)
+ #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+ #expomissing = set(expoers2008)
for personline in personreader:
name = personline[header["Name"]]
name = re.sub("<.*?>", "", name)
mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
nickname = mname.group(3) or ""
-
- person = models.Person(first_name=mname.group(1), last_name=(mname.group(2) or ""))
- person.is_vfho = personline[header["VfHO member"]]
- #person.Sethref()
- #print "NNNN", person.href
- is_guest = (personline[header["Guest"]] == "1") # this is really a per-expo catagory; not a permanent state
- person.save()
+
+ lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
+ nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
+ person, created = save_carefully(models.Person, lookupAttribs=lookupAttribs, nonLookupAttribs=nonLookupAttribs)
+
parseMugShotAndBlurb(personline=personline, header=header, person=person)
# make person expedition from table
for year, attended in zip(headers, personline)[5:]:
expedition = models.Expedition.objects.get(year=year)
if attended == "1" or attended == "-1":
- personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=is_guest)
+ personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=(personline[header["Guest"]] == "1"))
personexpedition.save()
# this fills in those people for whom 2008 was their first expo
- print "Loading personexpeditions 2008"
- for name in expomissing:
- firstname, lastname = name.split()
- is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
- print "2008:", name
- persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
- if not persons:
- person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
- #person.Sethref()
- person.save()
- else:
- person = persons[0]
- expedition = models.Expedition.objects.get(year="2008")
- personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
- personexpedition.save()
+ #print "Loading personexpeditions 2008"
+ #for name in expomissing:
+ # firstname, lastname = name.split()
+ # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
+ # print "2008:", name
+ # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
+ # if not persons:
+ # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
+ # #person.Sethref()
+ # person.save()
+ # else:
+ # person = persons[0]
+ # expedition = models.Expedition.objects.get(year="2008")
+ # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
+ # personexpedition.save()
#Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
# could rank according to surveying as well