diff options
author | substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> | 2009-05-13 06:15:48 +0100 |
---|---|---|
committer | substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> | 2009-05-13 06:15:48 +0100 |
commit | 625b2156e388a92d57fa446c931bbf410f5a4e29 (patch) | |
tree | 8866421569ba567be82b58a45d90e142c46e5c0a /parsers | |
parent | 1a36856b407684a9d48f04e170a160b3c04aa706 (diff) | |
download | troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.gz troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.bz2 troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.zip |
[svn] Weeks of local changes.
- Import is now non-destructive
- Parsers write output to a log file (path be specified in settings)
- databaseReset.py content been divided into separate functions which can be called for varying levels of deletion and importing
- control panel (view, template, urlpattern) added for deleting and importing
- Logins and signup fixed
- CaveArea model updated, view, hierarchical url patterns, and beginning of template added
- New site style
Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8324 by cucc @ 5/3/2009 5:56 AM
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/QMs.py | 79 | ||||
-rw-r--r-- | parsers/cavetab.py | 81 | ||||
-rw-r--r-- | parsers/logbooks.py | 44 | ||||
-rw-r--r-- | parsers/people.py | 49 |
4 files changed, 169 insertions, 84 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py index c7e6789..6505ddd 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -1,28 +1,40 @@ +# -*- coding: UTF-8 -*- + import csv import settings from expo.models import QM, LogbookEntry, Cave from datetime import * +from helpers import save_carefully import re -#sorry that the below code is ugly. I'll fix it sometime, really! - AC +def deleteQMs(): + QM.objects.all().delete() -QM.objects.all().delete() +def parseCaveQMs(cave,inputFile): + """Runs through the CSV file at inputFile (which is a relative path from expoweb) and saves each QM as a QM instance.""" -def parseCaveQMs(cave,pathToCSV): if cave=='stein': try: - steinBr=Cave.objects.get(official_name="Steinbrückenhöhle") + steinBr=Cave.objects.get(official_name="Steinbrückenhöhle") except Cave.DoesNotExist: print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." return elif cave=='hauch': try: - hauchHl=Cave.objects.get(official_name="Hauchhöhle") + hauchHl=Cave.objects.get(official_name="Hauchhöhle") + except Cave.DoesNotExist: + print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." + return + elif cave =='kh': + try: + kh=Cave.objects.get(official_name="Kaninchenhöhle") except Cave.DoesNotExist: print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." - return - - qmPath = settings.EXPOWEB+pathToCSV + for file in inputFile: + parse_KH_QMs(kh, inputFile=file) + return + + qmPath = settings.EXPOWEB+inputFile qmCSVContents = open(qmPath,'r') dialect=csv.Sniffer().sniff(qmCSVContents.read()) qmCSVContents.seek(0,0) @@ -55,13 +67,54 @@ def parseCaveQMs(cave,pathToCSV): newQM.ticked_off_by=placeholder newQM.comment=line[6] - newQM.save() - print "QM "+str(newQM) + ' added to database\r', - except KeyError: + try: + preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it + if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it + preexistingQM.delete() + newQM.save() + print "overwriting " + str(preexistingQM) +"\r", + + else: # otherwise, print that it was ignored + print "preserving "+ str(preexistingQM) + ", which was edited in admin \r", + + except QM.DoesNotExist: #if there is no pre-existing QM, save the new one + newQM.save() + print "QM "+str(newQM) + ' added to database\r', + + except KeyError: #check on this one continue # except IndexError: # print "Index error in " + str(line) # continue -parseCaveQMs(cave='stein',pathToCSV=r"smkridge/204/qm.csv") -parseCaveQMs(cave='hauch',pathToCSV=r"smkridge/234/qm.csv") +def parse_KH_QMs(kh, inputFile): + """import QMs from the 1623-161 (Kaninchenhöhle) html pages + """ + khQMs=open(settings.EXPOWEB+inputFile,'r') + khQMs=khQMs.readlines() + for line in khQMs: + res=re.search('name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line) + if res: + res=res.groupdict() + year=int(res['year']) + #check if placeholder exists for given year, create it if not + placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 161", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date((year), 1, 1),"cave":kh}) + lookupArgs={ + 'found_by':placeholder, + 'number':res['number'] + } + nonLookupArgs={ + 'grade':res['grade'], + 'nearest_station':res['nearest_station'], + 'location_description':res['description'] + } + + if + + save_carefully(QM,lookupArgs,nonLookupArgs) + + +parseCaveQMs(cave='kh', inputFile=r"smkridge/161/qmtodo.htm") +parseCaveQMs(cave='stein',inputFile=r"smkridge/204/qm.csv") +parseCaveQMs(cave='hauch',inputFile=r"smkridge/234/qm.csv") + diff --git a/parsers/cavetab.py b/parsers/cavetab.py index 8fc34d8..940d867 100644 --- a/parsers/cavetab.py +++ b/parsers/cavetab.py @@ -9,6 +9,8 @@ import time import re
import os
+from troggle.helpers import save_carefully
+
##format of CAVETAB2.CSV is
KatasterNumber = 0
KatStatusCode = 1
@@ -136,15 +138,20 @@ def html_to_wiki(text): text2 = ""
return out
-def LoadCaveTab():
+def LoadCaveTab(logfile=None):
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
caveReader = csv.reader(cavetab)
caveReader.next() # Strip out column headers
-
+
+ if logfile:
+ logfile.write("Beginning to import caves from "+str(cavetab)+"\n"+"-"*60+"\n")
+
for katArea in ['1623', '1626']:
if not models.Area.objects.filter(short_name = katArea):
newArea = models.Area(short_name = katArea)
newArea.save()
+ if logfile:
+ logfile.write("Added area "+str(newArea.short_name)+"\n")
area1626 = models.Area.objects.filter(short_name = '1626')[0]
area1623 = models.Area.objects.filter(short_name = '1623')[0]
@@ -153,33 +160,43 @@ def LoadCaveTab(): if line[Area] == 'nonexistent':
continue
entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
- if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
+ if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='': #When true, this line contains an actual cave, otherwise it is an extra entrance.
args = {}
+ defaultArgs = {}
+
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
+
+ def addToDefaultArgs(CSVname, modelName): #This has to do with the non-destructive import. These arguments will be passed as the "default" dictionary in a get_or_create
+ if line[CSVname]:
+ defaultArgs[modelName] = html_to_wiki(line[CSVname])
+
+ # The attributes added using "addToArgs" will be used to look up an existing cave. Those added using "addToDefaultArgs" will not.
addToArgs(KatasterNumber, "kataster_number")
- addToArgs(KatStatusCode, "kataster_code")
+ addToDefaultArgs(KatStatusCode, "kataster_code")
addToArgs(UnofficialNumber, "unofficial_number")
addToArgs(Name, "official_name")
- addToArgs(Comment, "notes")
- addToArgs(Explorers, "explorers")
- addToArgs(UndergroundDescription, "underground_description")
- addToArgs(Equipment, "equipment")
- addToArgs(KatasterStatus, "kataster_status")
- addToArgs(References, "references")
- addToArgs(UndergroundCentreLine, "underground_centre_line")
- addToArgs(UndergroundDrawnSurvey, "survey")
- addToArgs(Length, "length")
- addToArgs(Depth, "depth")
- addToArgs(Extent, "extent")
- addToArgs(SurvexFile, "survex_file")
- addToArgs(Notes, "notes")
+ addToDefaultArgs(Comment, "notes")
+ addToDefaultArgs(Explorers, "explorers")
+ addToDefaultArgs(UndergroundDescription, "underground_description")
+ addToDefaultArgs(Equipment, "equipment")
+ addToDefaultArgs(KatasterStatus, "kataster_status")
+ addToDefaultArgs(References, "references")
+ addToDefaultArgs(UndergroundCentreLine, "underground_centre_line")
+ addToDefaultArgs(UndergroundDrawnSurvey, "survey")
+ addToDefaultArgs(Length, "length")
+ addToDefaultArgs(Depth, "depth")
+ addToDefaultArgs(Extent, "extent")
+ addToDefaultArgs(SurvexFile, "survex_file")
+ addToDefaultArgs(Notes, "notes")
- newCave = models.Cave(**args)
- newCave.save()
-
- if line[Area]:
+ newCave, created=save_carefully(models.Cave, lookupAttribs=args, nonLookupAttribs=defaultArgs)
+ if logfile:
+ logfile.write("Added cave "+str(newCave)+"\n")
+
+ #If we created a new cave, add the area to it. This does mean that if a cave's identifying features have not changed, areas will not be updated from csv.
+ if created and line[Area]:
if line[Area] == "1626":
newCave.area.add(area1626)
else:
@@ -190,16 +207,20 @@ def LoadCaveTab(): newArea = models.Area(short_name = line[Area], parent = area1623)
newArea.save()
newCave.area.add(newArea)
- else:
+ elif created:
newCave.area.add(area1623)
-
+
newCave.save()
+ if logfile:
+ logfile.write("Added area "+line[Area]+" to cave "+str(newCave)+"\n")
- if line[UnofficialName]:
- newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
- newUnofficialName.save()
-
- if line[MultipleEntrances] == '' or \
+ if created and line[UnofficialName]:
+ newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
+ newUnofficialName.save()
+ if logfile:
+ logfile.write("Added unofficial name "+str(newUnofficialName)+" to cave "+str(newCave)+"\n")
+
+ if created and line[MultipleEntrances] == '' or \
line[MultipleEntrances] == 'entrance' or \
line[MultipleEntrances] == 'last entrance':
args = {}
@@ -258,6 +279,8 @@ def LoadCaveTab(): addToArgs(Bearings, 'bearings')
newEntrance = models.Entrance(**args)
newEntrance.save()
+ if logfile:
+ logfile.write("Added entrance "+str(newEntrance)+"\n")
if line[Entrances]:
entrance_letter = line[Entrances]
@@ -266,6 +289,8 @@ def LoadCaveTab(): newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
newCaveAndEntrance.save()
+ if logfile:
+ logfile.write("Added CaveAndEntrance "+str(newCaveAndEntrance)+"\n")
# lookup function modelled on GetPersonExpeditionNameLookup
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 976d71a..7c8364a 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -13,6 +13,7 @@ import re import datetime
import os
+from troggle.helpers import save_carefully
#
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
@@ -72,21 +73,23 @@ def GetTripCave(place): #need to be fuzzier about matching h noncaveplaces = [ "Journey", "Loser Plateau" ]
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+ """ saves a logbook entry and related persontrips """
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
# tripCave = GetTripCave(place)
-
- lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author, expedition=expedition)
+ #
lplace = place.lower()
if lplace not in noncaveplaces:
- lbo.cave=GetCaveLookup().get(lplace)
- #print "pppp %s |%s|" % (lplace, str(lbo.cave))
-
- lbo.save()
- #print "ttt", date, place
+ cave=GetCaveLookup().get(lplace)
+
+ #Check for an existing copy of the current entry, and save
+ lookupAttribs={'date':date, 'title':title[:50]}
+ nonLookupAttribs={'place':place, 'text':text, 'author':author, 'expedition':expedition, 'cave':cave}
+ lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
+
for tripperson, time_underground in trippersons:
- pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=time_underground,
- logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
- pto.save()
+ lookupAttribs={'person_expedition':tripperson, 'date':date}
+ nonLookupAttribs={'place':place,'time_underground':time_underground,'logbook_entry':lbo,'is_logbook_entry_author':(tripperson == author)}
+ save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year):
@@ -235,7 +238,7 @@ def Parseloghtml03(year, expedition, txt): yearlinks = [
("2008", "2008/2008logbook.txt", Parselogwikitxt),
- ("2007", "2007/2007logbook.txt", Parselogwikitxt),
+ #("2007", "2007/2007logbook.txt", Parselogwikitxt),
("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt),
("2005", "2005/logbook.html", Parseloghtmltxt),
("2004", "2004/logbook.html", Parseloghtmltxt),
@@ -299,15 +302,17 @@ def SetDatesFromLogbookEntries(expedition): # logbookentry.href = "%s" % logbookentry.date
# logbookentry.save()
# lprevlogbookentry = logbookentry
- for logbookentry in expedition.logbookentry_set.all():
- logbookentry.slug = slugify(logbookentry.title)
- logbookentry.save()
+
def LoadLogbookForExpedition(expedition):
- print "deleting logbooks for", expedition
- expedition.logbookentry_set.all().delete()
- models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+ """ Parses all logbook entries for one expedition """
+
+ #We're checking for stuff that's changed in admin before deleting it now.
+ #print "deleting logbooks for", expedition
+ #expedition.logbookentry_set.all().delete()
+ #models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+
expowebbase = os.path.join(settings.EXPOWEB, "years")
year = str(expedition.year)
for lyear, lloc, parsefunc in yearlinks:
@@ -322,7 +327,10 @@ def LoadLogbookForExpedition(expedition): def LoadLogbooks():
- models.LogbookEntry.objects.all().delete()
+ """ This is the master function for parsing all logbooks into the Troggle database. Requires yearlinks, which is a list of tuples for each expedition with expedition year, logbook path, and parsing function. """
+
+ #Deletion has been moved to a seperate function to enable the non-destructive importing
+ #models.LogbookEntry.objects.all().delete()
expowebbase = os.path.join(settings.EXPOWEB, "years")
#yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
#yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
diff --git a/parsers/people.py b/parsers/people.py index 269f13b..23654d2 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -7,6 +7,7 @@ import re import datetime import os import shutil +from helpers import save_carefully # Julian: the below code was causing errors and it seems like a duplication of the above. Hope I haven't broken anything by commenting it. -Aaron # @@ -72,47 +73,45 @@ def LoadPersonsExpos(): print "Loading personexpeditions" models.Person.objects.all().delete() models.PersonExpedition.objects.all().delete() - expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") - expomissing = set(expoers2008) + #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") + #expomissing = set(expoers2008) for personline in personreader: name = personline[header["Name"]] name = re.sub("<.*?>", "", name) mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) nickname = mname.group(3) or "" - - person = models.Person(first_name=mname.group(1), last_name=(mname.group(2) or "")) - person.is_vfho = personline[header["VfHO member"]] - #person.Sethref() - #print "NNNN", person.href - is_guest = (personline[header["Guest"]] == "1") # this is really a per-expo catagory; not a permanent state - person.save() + + lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")} + nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],} + person, created = save_carefully(models.Person, lookupAttribs=lookupAttribs, nonLookupAttribs=nonLookupAttribs) + parseMugShotAndBlurb(personline=personline, header=header, person=person) # make person expedition from table for year, attended in zip(headers, personline)[5:]: expedition = models.Expedition.objects.get(year=year) if attended == "1" or attended == "-1": - personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=is_guest) + personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=(personline[header["Guest"]] == "1")) personexpedition.save() # this fills in those people for whom 2008 was their first expo - print "Loading personexpeditions 2008" - for name in expomissing: - firstname, lastname = name.split() - is_guest = name in ["Eeva Makiranta", "Keith Curtis"] - print "2008:", name - persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname)) - if not persons: - person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "") - #person.Sethref() - person.save() - else: - person = persons[0] - expedition = models.Expedition.objects.get(year="2008") - personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest) - personexpedition.save() + #print "Loading personexpeditions 2008" + #for name in expomissing: + # firstname, lastname = name.split() + # is_guest = name in ["Eeva Makiranta", "Keith Curtis"] + # print "2008:", name + # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname)) + # if not persons: + # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "") + # #person.Sethref() + # person.save() + # else: + # person = persons[0] + # expedition = models.Expedition.objects.get(year="2008") + # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest) + # personexpedition.save() #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09 # could rank according to surveying as well |