diff options
Diffstat (limited to 'parsers/cavesM.py')
-rw-r--r-- | parsers/cavesM.py | 143 |
1 files changed, 31 insertions, 112 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py index 71bda0d..13cd5d5 100644 --- a/parsers/cavesM.py +++ b/parsers/cavesM.py @@ -19,23 +19,16 @@ def load(): print('Hi! I\'m caves parser. Ready to work') print('Loading caves of 1623 area') - loadarea('1623') + loadarea('caves-1623/') def loadarea(areacode): - if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'): - print('Computing master .3d file') - bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') - else: - print('Loading from existing master .3d file') - - master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines() - master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file - master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file print('Searching all cave dirs files') - basedir = settings.SURVEX_DATA+'caves-'+areacode+'/' + basedir = settings.SURVEX_DATA+areacode + + bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') @@ -47,14 +40,14 @@ def loadarea(areacode): cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence - if not file_exists(cavedir+'/'+cavename+'.svx'): + if 'MISSING' in test: #send error message to the database msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') print('Cave missing'+cavename+' :(') msg.save() continue fullname=cavedir+'/'+cavename+'.svx' print('Found cave:'+cavename) - cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing + cavernout = bash('cavern -q '+fullname) #make cavern process the thing if 'cavern: error:' in cavernout: msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') print('Fucked svx'+cavename+' :(') @@ -64,71 +57,28 @@ def loadarea(areacode): cavernout = cavernout.splitlines() depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) - cavefile = open(fullname,'r') - cavefilecontents = cavefile.read().splitlines() - surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower() - try: - title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1] - except: - syrveyname = "Untitled" - - relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] - entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x] - surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x] - location_nodes = [] - print('rel_nodes'+str(len(relevant_nodes))) - if len(entrance_nodes) > 0: - location_nodes = entrance_nodes - elif len(surface_nodes) > 0: - location_nodes = surface_nodes - elif len(relevant_nodes) > 0: - location_nodes = relevant_nodes - - try: - location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop() - except: - print(location_nodes) - location = 'Not found' - - relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] - try: - lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop() - except: - lastleg = ['LINE 1900.01.01'] - try: - lastdate = lastleg.split().pop() - if 'STYLE' in lastdate: - lastdate = lastleg.split().pop().pop() - except: - lastdate = '1900.01.01' - - entrance = ' '.join(location.split()[1:3]) - print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print - - - newcave = models.CaveM( - survex_file = fullname, - total_length = length, - name=areacode+'.'+surveyname, - total_depth = depth, - date = lastdate, - entrance = entrance) + surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop() + title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0] + print((('depth','length','surv name'),(depth,length,surveyname))) + print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'') + nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines() + entran = [x for x in nodes if ('ENTRANCE' in x) ] + print(nodes) + + + newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth) newcave.save() #end of reading survex masterfiles - + print ("Reading cave descriptions") cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() for fn in cavefiles: f = open(fn, "r") print(fn) contents = f.read() - - slug = re.sub(r"\s+", "", extractXML(contents,'caveslug')) + desc = extractXML(contents,'underground_description') - name = slug[5:] #get survex compatible name - area = slug[0:4] - - print([area,name]) + name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1] if desc==None or name==None: msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') @@ -136,56 +86,26 @@ def loadarea(areacode): msg.save() continue - print(area+'/'+name+'/'+name+'.svx') - updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx') + updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx') if len(updatecave)>1: print('Non unique solution - skipping. Name:'+name) elif len(updatecave)==0: - print('Cave with no survex data:'+name) - continue + print('Cave with no survex data'+name) + newcave = models.CaveM(description = desc, name = name) + newcave.save() else: #exaclty one match - print('Adding desc:'+name) updatecave = updatecave[0] - updatecave.description = '/cave/descriptionM/'+slug #area-name + updatecave.description = desc + if updatecave.name=="Not found": + updatecave.name=name updatecave.title=name updatecave.save() + - slugS = slug - explorersS = extractXML(contents,'explorers') - underground_descriptionS = extractXML(contents,'underground_description') - equipmentS = extractXML(contents,'equipment') - referencesS = extractXML(contents,'references') - surveyS = extractXML(contents,'survey') - kataster_statusS = extractXML(contents,'kataster_status') - underground_centre_lineS = extractXML(contents,'underground_centre_line') - survex_fileS = extractXML(contents,'survex_file') - notesS = extractXML(contents,'notes') - - - newcavedesc = models.Cave_descriptionM( - slug = slugS, - explorers = explorersS, - underground_description = underground_descriptionS, - equipment = equipmentS, - references = referencesS, - survey = surveyS, - kataster_status = kataster_statusS, - underground_centre_line = underground_centre_lineS, - survex_file = survex_fileS, - notes = notesS) - newcavedesc.save() - - - - #end of reading cave descriptions -def file_exists(filename): - test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence - if 'MISSING' in test: #send error message to the database - return False - return True + def extractXML(contents,tag): #find correct lines @@ -199,10 +119,9 @@ def extractXML(contents,tag): if endi!=begi: segment = '\n'.join(lines[begi:endi+1]) else: - segment = lines[begi:endi+1][0] - - hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0] - return hit + segment = lines[begi:endi+1] + return segment[0] + def bash(cmd): #calls command in bash shell, returns output process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) |