1 files changed, 129 insertions, 0 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
new file mode 100644
index 0000000..13cd5d5
--- /dev/null
+++ b/parsers/cavesM.py
@@ -0,0 +1,129 @@
+
+import troggle.core.models as models #import models for various objects
+from django.conf import settings
+import xml.etree.ElementTree as ET #this is used to parse XML's
+import subprocess
+import re
+
+#
+#    This parser has to find several things:
+#    There are files of .html format in expoweb area - they contain some of the important information
+#    There is a similar number of .svx files in loser are - they contain all the measurements
+#
+#    Previous version was incredibly slow due to various shitty ideas about finding things 
+#    and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
+#    and handles more sophisticated bits only
+#
+
+def load():
+    print('Hi! I\'m caves parser. Ready to work')
+    
+    print('Loading caves of 1623 area')
+    loadarea('caves-1623/')
+
+
+def loadarea(areacode):
+
+
+    print('Searching all cave dirs files')
+    basedir = settings.SURVEX_DATA+areacode
+
+    bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
+
+    cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
+    print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
+    ndirs = len(cavedirs) #remember number of dirs for nice debug output
+
+    for cavedir in cavedirs:
+        if cavedir==basedir:
+            continue #skip the basedir - a non-proper subdirectory
+        cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
+        
+        test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
+        if 'MISSING' in test: #send error message to the database
+            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
+            print('Cave missing'+cavename+' :(')
+            msg.save()
+            continue
+        fullname=cavedir+'/'+cavename+'.svx'        
+        print('Found cave:'+cavename)
+        cavernout = bash('cavern -q '+fullname) #make cavern process the thing
+        if 'cavern: error:' in cavernout:
+            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
+            print('Fucked svx'+cavename+' :(')
+            msg.save()
+            continue
+        
+        cavernout = cavernout.splitlines()
+        depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
+        length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
+        surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
+        title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]      
+        print((('depth','length','surv name'),(depth,length,surveyname)))
+        print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')        
+        nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
+        entran = [x for x in nodes if ('ENTRANCE' in x) ]
+        print(nodes)
+
+
+        newcave =  models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
+        newcave.save()
+    #end of reading survex masterfiles
+
+    print ("Reading cave descriptions")
+    cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
+    for fn in cavefiles:
+        f = open(fn, "r")
+        print(fn)
+        contents = f.read()    
+
+        desc = extractXML(contents,'underground_description')
+        name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
+        
+        if desc==None or name==None:
+            msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
+            print('Fucked description '+fn+' :(')
+            msg.save()
+            continue
+
+        
+        updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
+        if len(updatecave)>1:
+            print('Non unique solution - skipping. Name:'+name)
+        elif len(updatecave)==0:
+            print('Cave with no survex data'+name)
+            newcave =  models.CaveM(description = desc, name = name)
+            newcave.save()
+        else: #exaclty one match
+            updatecave = updatecave[0]
+            updatecave.description = desc
+            if updatecave.name=="Not found":
+                updatecave.name=name
+            updatecave.title=name
+            updatecave.save()
+        
+
+    #end of reading cave descriptions
+    
+        
+
+def extractXML(contents,tag):
+    #find correct lines
+    lines = contents.splitlines()
+    beg = [x for x in lines if ('<'+tag+'>' in x)]
+    end = [x for x in lines if ('</'+tag+'>' in x)]
+    if (not beg) or (not end):
+        return None       
+    begi = lines.index(beg[0])
+    endi = lines.index(end[0])
+    if endi!=begi:
+        segment = '\n'.join(lines[begi:endi+1])
+    else:
+        segment = lines[begi:endi+1]
+    return segment[0]
+    
+
+def bash(cmd): #calls command in bash shell, returns output
+    process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
+    output, error = process.communicate()
+    return output