diff options
Diffstat (limited to 'parsers/cavesM.py')
-rw-r--r-- | parsers/cavesM.py | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py new file mode 100644 index 0000000..13cd5d5 --- /dev/null +++ b/parsers/cavesM.py @@ -0,0 +1,129 @@ + +import troggle.core.models as models #import models for various objects +from django.conf import settings +import xml.etree.ElementTree as ET #this is used to parse XML's +import subprocess +import re + +# +# This parser has to find several things: +# There are files of .html format in expoweb area - they contain some of the important information +# There is a similar number of .svx files in loser are - they contain all the measurements +# +# Previous version was incredibly slow due to various shitty ideas about finding things +# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell +# and handles more sophisticated bits only +# + +def load(): + print('Hi! I\'m caves parser. Ready to work') + + print('Loading caves of 1623 area') + loadarea('caves-1623/') + + +def loadarea(areacode): + + + print('Searching all cave dirs files') + basedir = settings.SURVEX_DATA+areacode + + bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') + + cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories + print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') + ndirs = len(cavedirs) #remember number of dirs for nice debug output + + for cavedir in cavedirs: + if cavedir==basedir: + continue #skip the basedir - a non-proper subdirectory + cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory + + test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence + if 'MISSING' in test: #send error message to the database + msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') + print('Cave missing'+cavename+' :(') + msg.save() + continue + fullname=cavedir+'/'+cavename+'.svx' + print('Found cave:'+cavename) + cavernout = bash('cavern -q '+fullname) #make cavern process the thing + if 'cavern: error:' in cavernout: + msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') + print('Fucked svx'+cavename+' :(') + msg.save() + continue + + cavernout = cavernout.splitlines() + depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) + length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) + surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop() + title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0] + print((('depth','length','surv name'),(depth,length,surveyname))) + print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'') + nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines() + entran = [x for x in nodes if ('ENTRANCE' in x) ] + print(nodes) + + + newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth) + newcave.save() + #end of reading survex masterfiles + + print ("Reading cave descriptions") + cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() + for fn in cavefiles: + f = open(fn, "r") + print(fn) + contents = f.read() + + desc = extractXML(contents,'underground_description') + name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1] + + if desc==None or name==None: + msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') + print('Fucked description '+fn+' :(') + msg.save() + continue + + + updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx') + if len(updatecave)>1: + print('Non unique solution - skipping. Name:'+name) + elif len(updatecave)==0: + print('Cave with no survex data'+name) + newcave = models.CaveM(description = desc, name = name) + newcave.save() + else: #exaclty one match + updatecave = updatecave[0] + updatecave.description = desc + if updatecave.name=="Not found": + updatecave.name=name + updatecave.title=name + updatecave.save() + + + #end of reading cave descriptions + + + +def extractXML(contents,tag): + #find correct lines + lines = contents.splitlines() + beg = [x for x in lines if ('<'+tag+'>' in x)] + end = [x for x in lines if ('</'+tag+'>' in x)] + if (not beg) or (not end): + return None + begi = lines.index(beg[0]) + endi = lines.index(end[0]) + if endi!=begi: + segment = '\n'.join(lines[begi:endi+1]) + else: + segment = lines[begi:endi+1] + return segment[0] + + +def bash(cmd): #calls command in bash shell, returns output + process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) + output, error = process.communicate() + return output |