diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/cavesM.py | 129 | ||||
-rw-r--r-- | parsers/people.py | 33 |
2 files changed, 32 insertions, 130 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py deleted file mode 100644 index 13cd5d5..0000000 --- a/parsers/cavesM.py +++ /dev/null @@ -1,129 +0,0 @@ - -import troggle.core.models as models #import models for various objects -from django.conf import settings -import xml.etree.ElementTree as ET #this is used to parse XML's -import subprocess -import re - -# -# This parser has to find several things: -# There are files of .html format in expoweb area - they contain some of the important information -# There is a similar number of .svx files in loser are - they contain all the measurements -# -# Previous version was incredibly slow due to various shitty ideas about finding things -# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell -# and handles more sophisticated bits only -# - -def load(): - print('Hi! I\'m caves parser. Ready to work') - - print('Loading caves of 1623 area') - loadarea('caves-1623/') - - -def loadarea(areacode): - - - print('Searching all cave dirs files') - basedir = settings.SURVEX_DATA+areacode - - bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') - - cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories - print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') - ndirs = len(cavedirs) #remember number of dirs for nice debug output - - for cavedir in cavedirs: - if cavedir==basedir: - continue #skip the basedir - a non-proper subdirectory - cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory - - test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence - if 'MISSING' in test: #send error message to the database - msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') - print('Cave missing'+cavename+' :(') - msg.save() - continue - fullname=cavedir+'/'+cavename+'.svx' - print('Found cave:'+cavename) - cavernout = bash('cavern -q '+fullname) #make cavern process the thing - if 'cavern: error:' in cavernout: - msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') - print('Fucked svx'+cavename+' :(') - msg.save() - continue - - cavernout = cavernout.splitlines() - depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) - length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) - surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop() - title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0] - print((('depth','length','surv name'),(depth,length,surveyname))) - print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'') - nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines() - entran = [x for x in nodes if ('ENTRANCE' in x) ] - print(nodes) - - - newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth) - newcave.save() - #end of reading survex masterfiles - - print ("Reading cave descriptions") - cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() - for fn in cavefiles: - f = open(fn, "r") - print(fn) - contents = f.read() - - desc = extractXML(contents,'underground_description') - name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1] - - if desc==None or name==None: - msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') - print('Fucked description '+fn+' :(') - msg.save() - continue - - - updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx') - if len(updatecave)>1: - print('Non unique solution - skipping. Name:'+name) - elif len(updatecave)==0: - print('Cave with no survex data'+name) - newcave = models.CaveM(description = desc, name = name) - newcave.save() - else: #exaclty one match - updatecave = updatecave[0] - updatecave.description = desc - if updatecave.name=="Not found": - updatecave.name=name - updatecave.title=name - updatecave.save() - - - #end of reading cave descriptions - - - -def extractXML(contents,tag): - #find correct lines - lines = contents.splitlines() - beg = [x for x in lines if ('<'+tag+'>' in x)] - end = [x for x in lines if ('</'+tag+'>' in x)] - if (not beg) or (not end): - return None - begi = lines.index(beg[0]) - endi = lines.index(end[0]) - if endi!=begi: - segment = '\n'.join(lines[begi:endi+1]) - else: - segment = lines[begi:endi+1] - return segment[0] - - -def bash(cmd): #calls command in bash shell, returns output - process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) - output, error = process.communicate() - return output diff --git a/parsers/people.py b/parsers/people.py index 4dba3a8..bc18472 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -62,6 +62,8 @@ def LoadPersonsExpos(): # make persons print "Loading personexpeditions" + #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") + #expomissing = set(expoers2008) for personline in personreader: name = personline[header["Name"]] @@ -83,7 +85,36 @@ def LoadPersonsExpos(): nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")} save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs) - + + # this fills in those people for whom 2008 was their first expo + #print "Loading personexpeditions 2008" + #for name in expomissing: + # firstname, lastname = name.split() + # is_guest = name in ["Eeva Makiranta", "Keith Curtis"] + # print "2008:", name + # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname)) + # if not persons: + # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "") + # #person.Sethref() + # person.save() + # else: + # person = persons[0] + # expedition = models.Expedition.objects.get(year="2008") + # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest) + # personexpedition.save() + + #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09 + # could rank according to surveying as well + #print "Setting person notability" + #for person in models.Person.objects.all(): + #person.notability = 0.0 + #for personexpedition in person.personexpedition_set.all(): + #if not personexpedition.is_guest: + #person.notability += 1.0 / (2012 - int(personexpedition.expedition.year)) + #person.bisnotable = person.notability > 0.3 # I don't know how to filter by this + #person.save() + + # used in other referencing parser functions # expedition name lookup cached for speed (it's a very big list) Gpersonexpeditionnamelookup = { } |