[svn] Weeks of local changes.

- Import is now non-destructive - Parsers write output to a log file (path be specified in settings) - databaseReset.py content been divided into separate functions which can be called for varying levels of deletion and importing - control panel (view, template, urlpattern) added for deleting and importing - Logins and signup fixed - CaveArea model updated, view, hierarchical url patterns, and beginning of template added - New site style Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8324 by cucc @ 5/3/2009 5:56 AM
author: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 06:15:48 +0100
committer: substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> 2009-05-13 06:15:48 +0100
commit: 625b2156e388a92d57fa446c931bbf410f5a4e29 (patch)
tree: 8866421569ba567be82b58a45d90e142c46e5c0a /parsers
parent: 1a36856b407684a9d48f04e170a160b3c04aa706 (diff)
download: troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.gz
troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.bz2
troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.zip
4 files changed, 169 insertions, 84 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index c7e6789..6505ddd 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,28 +1,40 @@
+# -*- coding: UTF-8 -*-
+
 import csv
 import settings
 from expo.models import QM, LogbookEntry, Cave
 from datetime import *
+from helpers import save_carefully
 import re
 
-#sorry that the below code is ugly. I'll fix it sometime, really! - AC
+def deleteQMs():
+    QM.objects.all().delete()
 
-QM.objects.all().delete()
+def parseCaveQMs(cave,inputFile):
+    """Runs through the CSV file at inputFile (which is a relative path from expoweb) and saves each QM as a QM instance."""
 
-def parseCaveQMs(cave,pathToCSV):
     if cave=='stein':
         try:
-        	steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
+            steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
         except Cave.DoesNotExist:
             print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
             return
     elif cave=='hauch':
         try:
-        	hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
+            hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
+        except Cave.DoesNotExist:
+            print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
+            return
+    elif cave =='kh':
+        try:
+            kh=Cave.objects.get(official_name="Kaninchenh&ouml;hle")
         except Cave.DoesNotExist:
             print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
-            return	
-    
-    qmPath = settings.EXPOWEB+pathToCSV
+        for file in inputFile:
+            parse_KH_QMs(kh, inputFile=file) 
+        return
+
+    qmPath = settings.EXPOWEB+inputFile
     qmCSVContents = open(qmPath,'r')
     dialect=csv.Sniffer().sniff(qmCSVContents.read())
     qmCSVContents.seek(0,0)
@@ -55,13 +67,54 @@ def parseCaveQMs(cave,pathToCSV):
                 newQM.ticked_off_by=placeholder
 
             newQM.comment=line[6]
-            newQM.save()
-            print "QM "+str(newQM) + ' added to database\r',
-        except KeyError:
+            try:
+                preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year)  #if we don't have this one in the DB, save it
+                if preexistingQM.new_since_parsing==False:  #if the pre-existing QM has not been modified, overwrite it
+                    preexistingQM.delete()
+                    newQM.save()
+                    print "overwriting " + str(preexistingQM) +"\r",
+                
+                else:  # otherwise, print that it was ignored
+                    print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
+                    
+            except QM.DoesNotExist:         #if there is no pre-existing QM, save the new one
+                newQM.save() 
+                print "QM "+str(newQM) + ' added to database\r',
+                
+        except KeyError: #check on this one
             continue
 #        except IndexError:
 #            print "Index error in " + str(line)
 #           continue
 
-parseCaveQMs(cave='stein',pathToCSV=r"smkridge/204/qm.csv")
-parseCaveQMs(cave='hauch',pathToCSV=r"smkridge/234/qm.csv")
+def parse_KH_QMs(kh, inputFile):
+    """import QMs from the 1623-161 (Kaninchenhöhle) html pages
+    """
+    khQMs=open(settings.EXPOWEB+inputFile,'r')
+    khQMs=khQMs.readlines()
+    for line in khQMs:
+        res=re.search('name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
+        if res:
+            res=res.groupdict()
+            year=int(res['year'])
+        #check if placeholder exists for given year, create it if not
+            placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 161", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date((year), 1, 1),"cave":kh})
+            lookupArgs={
+                'found_by':placeholder,
+                'number':res['number']
+                }
+            nonLookupArgs={
+                'grade':res['grade'],
+                'nearest_station':res['nearest_station'],
+                'location_description':res['description']
+                }
+            
+            if 
+            
+            save_carefully(QM,lookupArgs,nonLookupArgs)
+        
+
+parseCaveQMs(cave='kh', inputFile=r"smkridge/161/qmtodo.htm")
+parseCaveQMs(cave='stein',inputFile=r"smkridge/204/qm.csv")
+parseCaveQMs(cave='hauch',inputFile=r"smkridge/234/qm.csv")
+
diff --git a/parsers/cavetab.py b/parsers/cavetab.py
index 8fc34d8..940d867 100644
--- a/parsers/cavetab.py
+++ b/parsers/cavetab.py
@@ -9,6 +9,8 @@ import time
 import re
 import os
 
+from troggle.helpers import save_carefully
+
 ##format of CAVETAB2.CSV is
 KatasterNumber = 0
 KatStatusCode = 1
@@ -136,15 +138,20 @@ def html_to_wiki(text):
             text2 = ""
     return out
 
-def LoadCaveTab():
+def LoadCaveTab(logfile=None):
     cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
     caveReader = csv.reader(cavetab)
     caveReader.next() # Strip out column headers
-    
+
+    if logfile:
+        logfile.write("Beginning to import caves from "+str(cavetab)+"\n"+"-"*60+"\n")
+
     for katArea in ['1623', '1626']:
         if not models.Area.objects.filter(short_name = katArea):
             newArea = models.Area(short_name = katArea)
             newArea.save()
+            if logfile:
+                logfile.write("Added area "+str(newArea.short_name)+"\n")
     area1626 = models.Area.objects.filter(short_name = '1626')[0]
     area1623 = models.Area.objects.filter(short_name = '1623')[0]
     
@@ -153,33 +160,43 @@ def LoadCaveTab():
         if line[Area] == 'nonexistent':
             continue
         entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
-        if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
+        if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='': #When true, this line contains an actual cave, otherwise it is an extra entrance.
             args = {}
+            defaultArgs = {}
+            
             def addToArgs(CSVname, modelName):
                 if line[CSVname]:
                     args[modelName] = html_to_wiki(line[CSVname])
+                    
+            def addToDefaultArgs(CSVname, modelName): #This has to do with the non-destructive import. These arguments will be passed as the "default" dictionary in a get_or_create
+                if line[CSVname]:
+                    defaultArgs[modelName] = html_to_wiki(line[CSVname])
+            
+            # The attributes added using "addToArgs" will be used to look up an existing cave. Those added using "addToDefaultArgs" will not.
             addToArgs(KatasterNumber, "kataster_number")
-            addToArgs(KatStatusCode, "kataster_code")
+            addToDefaultArgs(KatStatusCode, "kataster_code")
             addToArgs(UnofficialNumber, "unofficial_number")
             addToArgs(Name, "official_name")
-            addToArgs(Comment, "notes")
-            addToArgs(Explorers, "explorers")
-            addToArgs(UndergroundDescription, "underground_description")
-            addToArgs(Equipment, "equipment")
-            addToArgs(KatasterStatus, "kataster_status")
-            addToArgs(References, "references")
-            addToArgs(UndergroundCentreLine, "underground_centre_line")
-            addToArgs(UndergroundDrawnSurvey, "survey")
-            addToArgs(Length, "length")
-            addToArgs(Depth, "depth")
-            addToArgs(Extent, "extent")
-            addToArgs(SurvexFile, "survex_file")
-            addToArgs(Notes, "notes")
+            addToDefaultArgs(Comment, "notes")
+            addToDefaultArgs(Explorers, "explorers")
+            addToDefaultArgs(UndergroundDescription, "underground_description")
+            addToDefaultArgs(Equipment, "equipment")
+            addToDefaultArgs(KatasterStatus, "kataster_status")
+            addToDefaultArgs(References, "references")
+            addToDefaultArgs(UndergroundCentreLine, "underground_centre_line")
+            addToDefaultArgs(UndergroundDrawnSurvey, "survey")
+            addToDefaultArgs(Length, "length")
+            addToDefaultArgs(Depth, "depth")
+            addToDefaultArgs(Extent, "extent")
+            addToDefaultArgs(SurvexFile, "survex_file")
+            addToDefaultArgs(Notes, "notes")
 
-            newCave = models.Cave(**args)
-            newCave.save()
-    
-            if line[Area]:
+            newCave, created=save_carefully(models.Cave, lookupAttribs=args, nonLookupAttribs=defaultArgs)
+            if logfile:
+                logfile.write("Added cave "+str(newCave)+"\n")
+
+            #If we created a new cave, add the area to it. This does mean that if a cave's identifying features have not changed, areas will not be updated from csv.
+            if created and line[Area]:
                 if line[Area] ==  "1626":
                     newCave.area.add(area1626)
                 else:
@@ -190,16 +207,20 @@ def LoadCaveTab():
                         newArea = models.Area(short_name = line[Area], parent = area1623)
                         newArea.save()
                     newCave.area.add(newArea)
-            else:
+            elif created:
                 newCave.area.add(area1623)
-    
+
             newCave.save()
+            if logfile:
+                logfile.write("Added area "+line[Area]+" to cave "+str(newCave)+"\n")
 
-        if line[UnofficialName]:
-            newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
-            newUnofficialName.save()
-    
-        if line[MultipleEntrances] == '' or \
+            if created and line[UnofficialName]:
+                newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
+                newUnofficialName.save()
+                if logfile:
+                    logfile.write("Added unofficial name "+str(newUnofficialName)+" to cave "+str(newCave)+"\n")
+
+        if created and line[MultipleEntrances] == '' or \
             line[MultipleEntrances] == 'entrance' or \
             line[MultipleEntrances] == 'last entrance':
             args = {}
@@ -258,6 +279,8 @@ def LoadCaveTab():
             addToArgs(Bearings, 'bearings')
             newEntrance = models.Entrance(**args)
             newEntrance.save()
+            if logfile:
+                logfile.write("Added entrance "+str(newEntrance)+"\n")            
     
             if line[Entrances]:
                 entrance_letter = line[Entrances]
@@ -266,6 +289,8 @@ def LoadCaveTab():
     
             newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
             newCaveAndEntrance.save()
+            if logfile:
+                logfile.write("Added CaveAndEntrance "+str(newCaveAndEntrance)+"\n")
 
 
 # lookup function modelled on GetPersonExpeditionNameLookup
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 976d71a..7c8364a 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -13,6 +13,7 @@ import re
 import datetime
 import os
 
+from troggle.helpers import save_carefully
 
 # 
 # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
@@ -72,21 +73,23 @@ def GetTripCave(place):                     #need to be fuzzier about matching h
 
 noncaveplaces = [ "Journey", "Loser Plateau" ]
 def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+    """ saves a logbook entry and related persontrips """
     trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
 #    tripCave = GetTripCave(place)
-
-    lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author, expedition=expedition)
+    #
     lplace = place.lower()
     if lplace not in noncaveplaces:
-        lbo.cave=GetCaveLookup().get(lplace)
-        #print "pppp %s |%s|" % (lplace, str(lbo.cave))
-    
-    lbo.save()
-    #print "ttt", date, place
+        cave=GetCaveLookup().get(lplace)
+
+    #Check for an existing copy of the current entry, and save
+    lookupAttribs={'date':date, 'title':title[:50]} 
+    nonLookupAttribs={'place':place, 'text':text, 'author':author, 'expedition':expedition, 'cave':cave}
+    lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
+
     for tripperson, time_underground in trippersons:
-        pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=time_underground,
-                                logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
-        pto.save()
+        lookupAttribs={'person_expedition':tripperson, 'date':date}
+        nonLookupAttribs={'place':place,'time_underground':time_underground,'logbook_entry':lbo,'is_logbook_entry_author':(tripperson == author)}
+        save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
 
 
 def ParseDate(tripdate, year):
@@ -235,7 +238,7 @@ def Parseloghtml03(year, expedition, txt):
 
 yearlinks = [ 
                 ("2008", "2008/2008logbook.txt", Parselogwikitxt), 
-                ("2007", "2007/2007logbook.txt", Parselogwikitxt), 
+                #("2007", "2007/2007logbook.txt", Parselogwikitxt), 
                 ("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt), 
                 ("2005", "2005/logbook.html", Parseloghtmltxt), 
                 ("2004", "2004/logbook.html", Parseloghtmltxt), 
@@ -299,15 +302,17 @@ def SetDatesFromLogbookEntries(expedition):
 #            logbookentry.href = "%s" % logbookentry.date
 #        logbookentry.save()
 #        lprevlogbookentry = logbookentry
-    for logbookentry in expedition.logbookentry_set.all():
-        logbookentry.slug = slugify(logbookentry.title)
-        logbookentry.save()
+
         
         
 def LoadLogbookForExpedition(expedition):
-    print "deleting logbooks for", expedition
-    expedition.logbookentry_set.all().delete()
-    models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+    """ Parses all logbook entries for one expedition """
+    
+    #We're checking for stuff that's changed in admin before deleting it now.
+    #print "deleting logbooks for", expedition
+    #expedition.logbookentry_set.all().delete()
+    #models.PersonTrip.objects.filter(person_expedition__expedition=expedition).delete()
+    
     expowebbase = os.path.join(settings.EXPOWEB, "years")  
     year = str(expedition.year)
     for lyear, lloc, parsefunc in yearlinks:
@@ -322,7 +327,10 @@ def LoadLogbookForExpedition(expedition):
 
 
 def LoadLogbooks():
-    models.LogbookEntry.objects.all().delete()
+    """ This is the master function for parsing all logbooks into the Troggle database. Requires yearlinks, which is a list of tuples for each expedition with expedition year, logbook path, and parsing function. """
+    
+    #Deletion has been moved to a seperate function to enable the non-destructive importing
+    #models.LogbookEntry.objects.all().delete()
     expowebbase = os.path.join(settings.EXPOWEB, "years")  
     #yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
     #yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
diff --git a/parsers/people.py b/parsers/people.py
index 269f13b..23654d2 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -7,6 +7,7 @@ import re
 import datetime
 import os
 import shutil
+from helpers import save_carefully
 
 #   Julian: the below code was causing errors and it seems like a duplication of the above. Hope I haven't broken anything by commenting it. -Aaron
 #
@@ -72,47 +73,45 @@ def LoadPersonsExpos():
     print "Loading personexpeditions"
     models.Person.objects.all().delete()
     models.PersonExpedition.objects.all().delete()
-    expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
-    expomissing = set(expoers2008)
+    #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+    #expomissing = set(expoers2008)
 
     for personline in personreader:
         name = personline[header["Name"]]
         name = re.sub("<.*?>", "", name)
         mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
         nickname = mname.group(3) or ""
-
-        person = models.Person(first_name=mname.group(1), last_name=(mname.group(2) or ""))
-        person.is_vfho = personline[header["VfHO member"]]
-        #person.Sethref()
-        #print "NNNN", person.href
-        is_guest = (personline[header["Guest"]] == "1")  # this is really a per-expo catagory; not a permanent state
-        person.save()
+	
+        lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
+        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
+        person, created = save_carefully(models.Person, lookupAttribs=lookupAttribs, nonLookupAttribs=nonLookupAttribs)
+	
         parseMugShotAndBlurb(personline=personline, header=header, person=person)
     
         # make person expedition from table
         for year, attended in zip(headers, personline)[5:]:
             expedition = models.Expedition.objects.get(year=year)
             if attended == "1" or attended == "-1":
-                personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=is_guest)
+                personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname=nickname, is_guest=(personline[header["Guest"]] == "1"))
                 personexpedition.save()
 
 
     # this fills in those people for whom 2008 was their first expo
-    print "Loading personexpeditions 2008"
-    for name in expomissing:
-        firstname, lastname = name.split()
-        is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
-        print "2008:", name
-        persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
-        if not persons:
-            person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
-            #person.Sethref()
-            person.save()
-        else:
-            person = persons[0]
-        expedition = models.Expedition.objects.get(year="2008")
-        personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
-        personexpedition.save()
+    #print "Loading personexpeditions 2008"
+    #for name in expomissing:
+        # firstname, lastname = name.split()
+        # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
+        # print "2008:", name
+        # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
+        # if not persons:
+            # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
+            # #person.Sethref()
+            # person.save()
+        # else:
+            # person = persons[0]
+        # expedition = models.Expedition.objects.get(year="2008")
+        # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
+        # personexpedition.save()
 
     #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
     # could rank according to surveying as well
author	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 06:15:48 +0100
committer	substantialnoninfringinguser <substantialnoninfringinguser@gmail.com>	2009-05-13 06:15:48 +0100
commit	625b2156e388a92d57fa446c931bbf410f5a4e29 (patch)
tree	8866421569ba567be82b58a45d90e142c46e5c0a /parsers
parent	1a36856b407684a9d48f04e170a160b3c04aa706 (diff)
download	troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.gz troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.tar.bz2 troggle-625b2156e388a92d57fa446c931bbf410f5a4e29.zip