diff options
author | substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> | 2009-05-13 05:25:17 +0100 |
---|---|---|
committer | substantialnoninfringinguser <substantialnoninfringinguser@gmail.com> | 2009-05-13 05:25:17 +0100 |
commit | 7aee3fb920a1477332d78c8f3fb546da428be6e8 (patch) | |
tree | cdfacfc1ef181881d2ac1c7f4c8d4bce2ec917eb /parsers | |
parent | 8c818906b5c1228a6fb411cb96d1bd5f1663b49a (diff) | |
download | troggle-7aee3fb920a1477332d78c8f3fb546da428be6e8.tar.gz troggle-7aee3fb920a1477332d78c8f3fb546da428be6e8.tar.bz2 troggle-7aee3fb920a1477332d78c8f3fb546da428be6e8.zip |
[svn] QM parser now parses Hauchhoehle QMs.py
Photo model added.
Logbook parser now puts mugshots in as photo models, and descriptions from the old folk html pages in as "blurbs" on the person model.
Experimented with eye candy and a random logbook quote generator.
Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8094 by aaron @ 12/31/2008 2:59 AM
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/QMs.py | 71 | ||||
-rw-r--r-- | parsers/logbooks.py | 31 |
2 files changed, 78 insertions, 24 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py index 44c38c7..96b91fa 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -8,29 +8,56 @@ import re QM.objects.all().delete() - - -def parseSteinbrQMs(): - try: - steinBr=Cave.objects.get(official_name="Steinbrückenhöhle") - except Cave.DoesNotExist: - print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." - return - +def parseCaveQMs(cave,pathToCSV): + if cave=='stein': + try: + steinBr=Cave.objects.get(official_name="Steinbrückenhöhle") + except Cave.DoesNotExist: + print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." + return + elif cave=='hauch': + try: + hauchHl=Cave.objects.get(official_name="Hauchhöhle") + except Cave.DoesNotExist: + print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first." + return - qmPath = settings.EXPOWEB+r"smkridge/204/qm.csv" - qmReader = csv.reader(open(qmPath,'r'),dialect="excel-tab") + qmPath = settings.EXPOWEB+pathToCSV + qmCSVContents = open(qmPath,'r') + dialect=csv.Sniffer().sniff(qmCSVContents.read()) + qmCSVContents.seek(0,0) + qmReader = csv.reader(qmCSVContents,dialect=dialect) qmReader.next() # Skip header row for line in qmReader: - year=int(line[0][1:5]) - - #check if placeholder exists for given year, create it if not - placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, text="placeholder for QMs in 204", defaults={"date": date(year, 1, 1),"cave":steinBr}) - if hadToCreate: - print "204 placeholder logbook entry for " + str(year) + " added to database" - QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb") - newQM = QM(found_by=placeholder,number=QMnum,grade=line[1],area=line[2],location_description=line[3],nearest_station_description=line[4],completion_description=line[5],comment=line[6]) - newQM.save() - print "QM "+str(newQM) + " added to database" + try: + year=int(line[0][1:5]) + #check if placeholder exists for given year, create it if not + if cave=='stein': + placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, text="placeholder for QMs in 204", defaults={"date": date(year, 1, 1),"cave":steinBr}) + elif cave=='hauch': + placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, text="placeholder for QMs in 234", defaults={"date": date(year, 1, 1),"cave":hauchHl}) + if hadToCreate: + print cave+" placeholder logbook entry for " + str(year) + " added to database" + QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb") + newQM = QM() + newQM.found_by=placeholder + newQM.number=QMnum + if line[1]=="Dig": + newQM.grade="D" + else: + newQM.grade=line[1] + newQM.area=line[2] + newQM.location_description=line[3] + newQM.nearest_station_description=line[4] + newQM.completion_description=line[5] + newQM.comment=line[6] + newQM.save() + print "QM "+str(newQM) + " added to database" + except KeyError: + continue +# except IndexError: +# print "Index error in " + str(line) +# continue -parseSteinbrQMs()
\ No newline at end of file +parseCaveQMs(cave='stein',pathToCSV=r"smkridge/204/qm.csv") +parseCaveQMs(cave='hauch',pathToCSV=r"smkridge/234/qm.csv") diff --git a/parsers/logbooks.py b/parsers/logbooks.py index d0d4f4c..75caeaf 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -45,10 +45,37 @@ def LoadPersons(): pObject = models.Person(first_name = firstname,
last_name = lastname,
is_vfho = person[header["VfHO member"]],
- mug_shot = person[header["Mugshot"]])
- pObject.save()
+ )
+
is_guest = person[header["Guest"]] == "1" # this is really a per-expo catagory; not a permanent state
+ pObject.save()
+ #create mugshot Photo instance
+ mugShotPath = settings.EXPOWEB+"folk/"+person[header["Mugshot"]]
+ if mugShotPath[-3:]=='jpg': #if person just has an image, add it
+ mugShotObj = models.Photo(
+ caption="Mugshot for "+firstname+" "+lastname,
+ is_mugshot=True,
+ file=mugShotPath,
+ )
+ mugShotObj.save()
+ mugShotObj.contains_person.add(pObject)
+ mugShotObj.save()
+ elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
+ personPageOld=open(mugShotPath,'r').read()
+ pObject.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb
+ for photoFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
+ mugShotPath=settings.EXPOWEB+"folk/"+photoFilename
+ mugShotObj = models.Photo(
+ caption="Mugshot for "+firstname+" "+lastname,
+ is_mugshot=True,
+ file=mugShotPath,
+ )
+ mugShotObj.save()
+ mugShotObj.contains_person.add(pObject)
+ mugShotObj.save()
+ pObject.save()
+
for year, attended in zip(headers, person)[5:]:
yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1":
|