diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/cavesM.py | 143 |
1 files changed, 112 insertions, 31 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py index 13cd5d5..71bda0d 100644 --- a/parsers/cavesM.py +++ b/parsers/cavesM.py @@ -19,16 +19,23 @@ def load(): print('Hi! I\'m caves parser. Ready to work') print('Loading caves of 1623 area') - loadarea('caves-1623/') + loadarea('1623') def loadarea(areacode): + if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'): + print('Computing master .3d file') + bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') + else: + print('Loading from existing master .3d file') - print('Searching all cave dirs files') - basedir = settings.SURVEX_DATA+areacode + master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines() + master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file + master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file - bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') + print('Searching all cave dirs files') + basedir = settings.SURVEX_DATA+'caves-'+areacode+'/' cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') @@ -40,14 +47,14 @@ def loadarea(areacode): cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence - if 'MISSING' in test: #send error message to the database + if not file_exists(cavedir+'/'+cavename+'.svx'): msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') print('Cave missing'+cavename+' :(') msg.save() continue fullname=cavedir+'/'+cavename+'.svx' print('Found cave:'+cavename) - cavernout = bash('cavern -q '+fullname) #make cavern process the thing + cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing if 'cavern: error:' in cavernout: msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') print('Fucked svx'+cavename+' :(') @@ -57,28 +64,71 @@ def loadarea(areacode): cavernout = cavernout.splitlines() depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) - surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop() - title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0] - print((('depth','length','surv name'),(depth,length,surveyname))) - print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'') - nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines() - entran = [x for x in nodes if ('ENTRANCE' in x) ] - print(nodes) - - - newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth) + cavefile = open(fullname,'r') + cavefilecontents = cavefile.read().splitlines() + surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower() + try: + title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1] + except: + syrveyname = "Untitled" + + relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] + entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x] + surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x] + location_nodes = [] + print('rel_nodes'+str(len(relevant_nodes))) + if len(entrance_nodes) > 0: + location_nodes = entrance_nodes + elif len(surface_nodes) > 0: + location_nodes = surface_nodes + elif len(relevant_nodes) > 0: + location_nodes = relevant_nodes + + try: + location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop() + except: + print(location_nodes) + location = 'Not found' + + relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] + try: + lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop() + except: + lastleg = ['LINE 1900.01.01'] + try: + lastdate = lastleg.split().pop() + if 'STYLE' in lastdate: + lastdate = lastleg.split().pop().pop() + except: + lastdate = '1900.01.01' + + entrance = ' '.join(location.split()[1:3]) + print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print + + + newcave = models.CaveM( + survex_file = fullname, + total_length = length, + name=areacode+'.'+surveyname, + total_depth = depth, + date = lastdate, + entrance = entrance) newcave.save() #end of reading survex masterfiles - + print ("Reading cave descriptions") cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() for fn in cavefiles: f = open(fn, "r") print(fn) contents = f.read() - + + slug = re.sub(r"\s+", "", extractXML(contents,'caveslug')) desc = extractXML(contents,'underground_description') - name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1] + name = slug[5:] #get survex compatible name + area = slug[0:4] + + print([area,name]) if desc==None or name==None: msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') @@ -86,26 +136,56 @@ def loadarea(areacode): msg.save() continue + print(area+'/'+name+'/'+name+'.svx') - updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx') + updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx') if len(updatecave)>1: print('Non unique solution - skipping. Name:'+name) elif len(updatecave)==0: - print('Cave with no survex data'+name) - newcave = models.CaveM(description = desc, name = name) - newcave.save() + print('Cave with no survex data:'+name) + continue else: #exaclty one match + print('Adding desc:'+name) updatecave = updatecave[0] - updatecave.description = desc - if updatecave.name=="Not found": - updatecave.name=name + updatecave.description = '/cave/descriptionM/'+slug #area-name updatecave.title=name updatecave.save() - + slugS = slug + explorersS = extractXML(contents,'explorers') + underground_descriptionS = extractXML(contents,'underground_description') + equipmentS = extractXML(contents,'equipment') + referencesS = extractXML(contents,'references') + surveyS = extractXML(contents,'survey') + kataster_statusS = extractXML(contents,'kataster_status') + underground_centre_lineS = extractXML(contents,'underground_centre_line') + survex_fileS = extractXML(contents,'survex_file') + notesS = extractXML(contents,'notes') + + + newcavedesc = models.Cave_descriptionM( + slug = slugS, + explorers = explorersS, + underground_description = underground_descriptionS, + equipment = equipmentS, + references = referencesS, + survey = surveyS, + kataster_status = kataster_statusS, + underground_centre_line = underground_centre_lineS, + survex_file = survex_fileS, + notes = notesS) + newcavedesc.save() + + + + #end of reading cave descriptions - +def file_exists(filename): + test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence + if 'MISSING' in test: #send error message to the database + return False + return True def extractXML(contents,tag): #find correct lines @@ -119,9 +199,10 @@ def extractXML(contents,tag): if endi!=begi: segment = '\n'.join(lines[begi:endi+1]) else: - segment = lines[begi:endi+1] - return segment[0] - + segment = lines[begi:endi+1][0] + + hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0] + return hit def bash(cmd): #calls command in bash shell, returns output process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) |