summaryrefslogtreecommitdiffstats
path: root/parsers/cavesM.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/cavesM.py')
-rw-r--r--parsers/cavesM.py129
1 files changed, 0 insertions, 129 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
deleted file mode 100644
index 13cd5d5..0000000
--- a/parsers/cavesM.py
+++ /dev/null
@@ -1,129 +0,0 @@
-
-import troggle.core.models as models #import models for various objects
-from django.conf import settings
-import xml.etree.ElementTree as ET #this is used to parse XML's
-import subprocess
-import re
-
-#
-# This parser has to find several things:
-# There are files of .html format in expoweb area - they contain some of the important information
-# There is a similar number of .svx files in loser are - they contain all the measurements
-#
-# Previous version was incredibly slow due to various shitty ideas about finding things
-# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
-# and handles more sophisticated bits only
-#
-
-def load():
- print('Hi! I\'m caves parser. Ready to work')
-
- print('Loading caves of 1623 area')
- loadarea('caves-1623/')
-
-
-def loadarea(areacode):
-
-
- print('Searching all cave dirs files')
- basedir = settings.SURVEX_DATA+areacode
-
- bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
-
- cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
- print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
- ndirs = len(cavedirs) #remember number of dirs for nice debug output
-
- for cavedir in cavedirs:
- if cavedir==basedir:
- continue #skip the basedir - a non-proper subdirectory
- cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
-
- test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
- if 'MISSING' in test: #send error message to the database
- msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
- print('Cave missing'+cavename+' :(')
- msg.save()
- continue
- fullname=cavedir+'/'+cavename+'.svx'
- print('Found cave:'+cavename)
- cavernout = bash('cavern -q '+fullname) #make cavern process the thing
- if 'cavern: error:' in cavernout:
- msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
- print('Fucked svx'+cavename+' :(')
- msg.save()
- continue
-
- cavernout = cavernout.splitlines()
- depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
- length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
- surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
- title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]
- print((('depth','length','surv name'),(depth,length,surveyname)))
- print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')
- nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
- entran = [x for x in nodes if ('ENTRANCE' in x) ]
- print(nodes)
-
-
- newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
- newcave.save()
- #end of reading survex masterfiles
-
- print ("Reading cave descriptions")
- cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
- for fn in cavefiles:
- f = open(fn, "r")
- print(fn)
- contents = f.read()
-
- desc = extractXML(contents,'underground_description')
- name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
-
- if desc==None or name==None:
- msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
- print('Fucked description '+fn+' :(')
- msg.save()
- continue
-
-
- updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
- if len(updatecave)>1:
- print('Non unique solution - skipping. Name:'+name)
- elif len(updatecave)==0:
- print('Cave with no survex data'+name)
- newcave = models.CaveM(description = desc, name = name)
- newcave.save()
- else: #exaclty one match
- updatecave = updatecave[0]
- updatecave.description = desc
- if updatecave.name=="Not found":
- updatecave.name=name
- updatecave.title=name
- updatecave.save()
-
-
- #end of reading cave descriptions
-
-
-
-def extractXML(contents,tag):
- #find correct lines
- lines = contents.splitlines()
- beg = [x for x in lines if ('<'+tag+'>' in x)]
- end = [x for x in lines if ('</'+tag+'>' in x)]
- if (not beg) or (not end):
- return None
- begi = lines.index(beg[0])
- endi = lines.index(end[0])
- if endi!=begi:
- segment = '\n'.join(lines[begi:endi+1])
- else:
- segment = lines[begi:endi+1]
- return segment[0]
-
-
-def bash(cmd): #calls command in bash shell, returns output
- process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
- output, error = process.communicate()
- return output