Backed out changeset: e80a936faab6

author: Sam Wenham <sam@wenhams.co.uk> 2019-03-04 19:39:57 +0000
committer: Sam Wenham <sam@wenhams.co.uk> 2019-03-04 19:39:57 +0000
commit: 384b0438b49de3db6aafae7bc986ffa22ddf4795 (patch)
tree: d063f0fddafe26abf2dda6454bc0046e50a48dd1 /parsers
parent: e01507d54112cfc61748eeff27a3a264e1ad7516 (diff)
download: troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.tar.gz
troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.tar.bz2
troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.zip
2 files changed, 32 insertions, 130 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
deleted file mode 100644
index 13cd5d5..0000000
--- a/parsers/cavesM.py
+++ /dev/null
@@ -1,129 +0,0 @@
-
-import troggle.core.models as models #import models for various objects
-from django.conf import settings
-import xml.etree.ElementTree as ET #this is used to parse XML's
-import subprocess
-import re
-
-#
-#    This parser has to find several things:
-#    There are files of .html format in expoweb area - they contain some of the important information
-#    There is a similar number of .svx files in loser are - they contain all the measurements
-#
-#    Previous version was incredibly slow due to various shitty ideas about finding things 
-#    and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
-#    and handles more sophisticated bits only
-#
-
-def load():
-    print('Hi! I\'m caves parser. Ready to work')
-    
-    print('Loading caves of 1623 area')
-    loadarea('caves-1623/')
-
-
-def loadarea(areacode):
-
-
-    print('Searching all cave dirs files')
-    basedir = settings.SURVEX_DATA+areacode
-
-    bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
-
-    cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
-    print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
-    ndirs = len(cavedirs) #remember number of dirs for nice debug output
-
-    for cavedir in cavedirs:
-        if cavedir==basedir:
-            continue #skip the basedir - a non-proper subdirectory
-        cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
-        
-        test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
-        if 'MISSING' in test: #send error message to the database
-            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
-            print('Cave missing'+cavename+' :(')
-            msg.save()
-            continue
-        fullname=cavedir+'/'+cavename+'.svx'        
-        print('Found cave:'+cavename)
-        cavernout = bash('cavern -q '+fullname) #make cavern process the thing
-        if 'cavern: error:' in cavernout:
-            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
-            print('Fucked svx'+cavename+' :(')
-            msg.save()
-            continue
-        
-        cavernout = cavernout.splitlines()
-        depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
-        length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
-        surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
-        title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]      
-        print((('depth','length','surv name'),(depth,length,surveyname)))
-        print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')        
-        nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
-        entran = [x for x in nodes if ('ENTRANCE' in x) ]
-        print(nodes)
-
-
-        newcave =  models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
-        newcave.save()
-    #end of reading survex masterfiles
-
-    print ("Reading cave descriptions")
-    cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
-    for fn in cavefiles:
-        f = open(fn, "r")
-        print(fn)
-        contents = f.read()    
-
-        desc = extractXML(contents,'underground_description')
-        name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
-        
-        if desc==None or name==None:
-            msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
-            print('Fucked description '+fn+' :(')
-            msg.save()
-            continue
-
-        
-        updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
-        if len(updatecave)>1:
-            print('Non unique solution - skipping. Name:'+name)
-        elif len(updatecave)==0:
-            print('Cave with no survex data'+name)
-            newcave =  models.CaveM(description = desc, name = name)
-            newcave.save()
-        else: #exaclty one match
-            updatecave = updatecave[0]
-            updatecave.description = desc
-            if updatecave.name=="Not found":
-                updatecave.name=name
-            updatecave.title=name
-            updatecave.save()
-        
-
-    #end of reading cave descriptions
-    
-        
-
-def extractXML(contents,tag):
-    #find correct lines
-    lines = contents.splitlines()
-    beg = [x for x in lines if ('<'+tag+'>' in x)]
-    end = [x for x in lines if ('</'+tag+'>' in x)]
-    if (not beg) or (not end):
-        return None       
-    begi = lines.index(beg[0])
-    endi = lines.index(end[0])
-    if endi!=begi:
-        segment = '\n'.join(lines[begi:endi+1])
-    else:
-        segment = lines[begi:endi+1]
-    return segment[0]
-    
-
-def bash(cmd): #calls command in bash shell, returns output
-    process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
-    output, error = process.communicate()
-    return output
diff --git a/parsers/people.py b/parsers/people.py
index 4dba3a8..bc18472 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -62,6 +62,8 @@ def LoadPersonsExpos():
     
     # make persons
     print "Loading personexpeditions"
+    #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+    #expomissing = set(expoers2008)
 
     for personline in personreader:
         name = personline[header["Name"]]
@@ -83,7 +85,36 @@ def LoadPersonsExpos():
                 nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
                 save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs)
 
-                
+
+    # this fills in those people for whom 2008 was their first expo
+    #print "Loading personexpeditions 2008"
+    #for name in expomissing:
+        # firstname, lastname = name.split()
+        # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
+        # print "2008:", name
+        # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
+        # if not persons:
+            # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
+            # #person.Sethref()
+            # person.save()
+        # else:
+            # person = persons[0]
+        # expedition = models.Expedition.objects.get(year="2008")
+        # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
+        # personexpedition.save()
+
+    #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
+    # could rank according to surveying as well
+    #print "Setting person notability"
+    #for person in models.Person.objects.all():
+        #person.notability = 0.0
+        #for personexpedition in person.personexpedition_set.all():
+            #if not personexpedition.is_guest:
+                #person.notability += 1.0 / (2012 - int(personexpedition.expedition.year))
+        #person.bisnotable = person.notability > 0.3 # I don't know how to filter by this
+        #person.save()
+        
+        
 # used in other referencing parser functions
 # expedition name lookup cached for speed (it's a very big list)
 Gpersonexpeditionnamelookup = { }
author	Sam Wenham <sam@wenhams.co.uk>	2019-03-04 19:39:57 +0000
committer	Sam Wenham <sam@wenhams.co.uk>	2019-03-04 19:39:57 +0000
commit	384b0438b49de3db6aafae7bc986ffa22ddf4795 (patch)
tree	d063f0fddafe26abf2dda6454bc0046e50a48dd1 /parsers
parent	e01507d54112cfc61748eeff27a3a264e1ad7516 (diff)
download	troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.tar.gz troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.tar.bz2 troggle-384b0438b49de3db6aafae7bc986ffa22ddf4795.zip