summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/cavesM.py129
-rw-r--r--parsers/people.py33
2 files changed, 130 insertions, 32 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
new file mode 100644
index 0000000..13cd5d5
--- /dev/null
+++ b/parsers/cavesM.py
@@ -0,0 +1,129 @@
+
+import troggle.core.models as models #import models for various objects
+from django.conf import settings
+import xml.etree.ElementTree as ET #this is used to parse XML's
+import subprocess
+import re
+
+#
+# This parser has to find several things:
+# There are files of .html format in expoweb area - they contain some of the important information
+# There is a similar number of .svx files in loser are - they contain all the measurements
+#
+# Previous version was incredibly slow due to various shitty ideas about finding things
+# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
+# and handles more sophisticated bits only
+#
+
+def load():
+ print('Hi! I\'m caves parser. Ready to work')
+
+ print('Loading caves of 1623 area')
+ loadarea('caves-1623/')
+
+
+def loadarea(areacode):
+
+
+ print('Searching all cave dirs files')
+ basedir = settings.SURVEX_DATA+areacode
+
+ bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
+
+ cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
+ print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
+ ndirs = len(cavedirs) #remember number of dirs for nice debug output
+
+ for cavedir in cavedirs:
+ if cavedir==basedir:
+ continue #skip the basedir - a non-proper subdirectory
+ cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
+
+ test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
+ if 'MISSING' in test: #send error message to the database
+ msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
+ print('Cave missing'+cavename+' :(')
+ msg.save()
+ continue
+ fullname=cavedir+'/'+cavename+'.svx'
+ print('Found cave:'+cavename)
+ cavernout = bash('cavern -q '+fullname) #make cavern process the thing
+ if 'cavern: error:' in cavernout:
+ msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
+ print('Fucked svx'+cavename+' :(')
+ msg.save()
+ continue
+
+ cavernout = cavernout.splitlines()
+ depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
+ length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
+ surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
+ title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]
+ print((('depth','length','surv name'),(depth,length,surveyname)))
+ print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')
+ nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
+ entran = [x for x in nodes if ('ENTRANCE' in x) ]
+ print(nodes)
+
+
+ newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
+ newcave.save()
+ #end of reading survex masterfiles
+
+ print ("Reading cave descriptions")
+ cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
+ for fn in cavefiles:
+ f = open(fn, "r")
+ print(fn)
+ contents = f.read()
+
+ desc = extractXML(contents,'underground_description')
+ name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
+
+ if desc==None or name==None:
+ msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
+ print('Fucked description '+fn+' :(')
+ msg.save()
+ continue
+
+
+ updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
+ if len(updatecave)>1:
+ print('Non unique solution - skipping. Name:'+name)
+ elif len(updatecave)==0:
+ print('Cave with no survex data'+name)
+ newcave = models.CaveM(description = desc, name = name)
+ newcave.save()
+ else: #exaclty one match
+ updatecave = updatecave[0]
+ updatecave.description = desc
+ if updatecave.name=="Not found":
+ updatecave.name=name
+ updatecave.title=name
+ updatecave.save()
+
+
+ #end of reading cave descriptions
+
+
+
+def extractXML(contents,tag):
+ #find correct lines
+ lines = contents.splitlines()
+ beg = [x for x in lines if ('<'+tag+'>' in x)]
+ end = [x for x in lines if ('</'+tag+'>' in x)]
+ if (not beg) or (not end):
+ return None
+ begi = lines.index(beg[0])
+ endi = lines.index(end[0])
+ if endi!=begi:
+ segment = '\n'.join(lines[begi:endi+1])
+ else:
+ segment = lines[begi:endi+1]
+ return segment[0]
+
+
+def bash(cmd): #calls command in bash shell, returns output
+ process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
+ output, error = process.communicate()
+ return output
diff --git a/parsers/people.py b/parsers/people.py
index bc18472..4dba3a8 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -62,8 +62,6 @@ def LoadPersonsExpos():
# make persons
print "Loading personexpeditions"
- #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
- #expomissing = set(expoers2008)
for personline in personreader:
name = personline[header["Name"]]
@@ -85,36 +83,7 @@ def LoadPersonsExpos():
nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs)
-
- # this fills in those people for whom 2008 was their first expo
- #print "Loading personexpeditions 2008"
- #for name in expomissing:
- # firstname, lastname = name.split()
- # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
- # print "2008:", name
- # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
- # if not persons:
- # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
- # #person.Sethref()
- # person.save()
- # else:
- # person = persons[0]
- # expedition = models.Expedition.objects.get(year="2008")
- # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
- # personexpedition.save()
-
- #Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
- # could rank according to surveying as well
- #print "Setting person notability"
- #for person in models.Person.objects.all():
- #person.notability = 0.0
- #for personexpedition in person.personexpedition_set.all():
- #if not personexpedition.is_guest:
- #person.notability += 1.0 / (2012 - int(personexpedition.expedition.year))
- #person.bisnotable = person.notability > 0.3 # I don't know how to filter by this
- #person.save()
-
-
+
# used in other referencing parser functions
# expedition name lookup cached for speed (it's a very big list)
Gpersonexpeditionnamelookup = { }