summaryrefslogtreecommitdiffstats
path: root/parsers/cavesM.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/cavesM.py')
-rw-r--r--parsers/cavesM.py143
1 files changed, 112 insertions, 31 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
index 13cd5d5..71bda0d 100644
--- a/parsers/cavesM.py
+++ b/parsers/cavesM.py
@@ -19,16 +19,23 @@ def load():
print('Hi! I\'m caves parser. Ready to work')
print('Loading caves of 1623 area')
- loadarea('caves-1623/')
+ loadarea('1623')
def loadarea(areacode):
+ if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'):
+ print('Computing master .3d file')
+ bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
+ else:
+ print('Loading from existing master .3d file')
- print('Searching all cave dirs files')
- basedir = settings.SURVEX_DATA+areacode
+ master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines()
+ master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file
+ master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file
- bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
+ print('Searching all cave dirs files')
+ basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
@@ -40,14 +47,14 @@ def loadarea(areacode):
cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
- if 'MISSING' in test: #send error message to the database
+ if not file_exists(cavedir+'/'+cavename+'.svx'):
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
print('Cave missing'+cavename+' :(')
msg.save()
continue
fullname=cavedir+'/'+cavename+'.svx'
print('Found cave:'+cavename)
- cavernout = bash('cavern -q '+fullname) #make cavern process the thing
+ cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing
if 'cavern: error:' in cavernout:
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
print('Fucked svx'+cavename+' :(')
@@ -57,28 +64,71 @@ def loadarea(areacode):
cavernout = cavernout.splitlines()
depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
- surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
- title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]
- print((('depth','length','surv name'),(depth,length,surveyname)))
- print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')
- nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
- entran = [x for x in nodes if ('ENTRANCE' in x) ]
- print(nodes)
-
-
- newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
+ cavefile = open(fullname,'r')
+ cavefilecontents = cavefile.read().splitlines()
+ surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower()
+ try:
+ title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1]
+ except:
+ syrveyname = "Untitled"
+
+ relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
+ entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x]
+ surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x]
+ location_nodes = []
+ print('rel_nodes'+str(len(relevant_nodes)))
+ if len(entrance_nodes) > 0:
+ location_nodes = entrance_nodes
+ elif len(surface_nodes) > 0:
+ location_nodes = surface_nodes
+ elif len(relevant_nodes) > 0:
+ location_nodes = relevant_nodes
+
+ try:
+ location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop()
+ except:
+ print(location_nodes)
+ location = 'Not found'
+
+ relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
+ try:
+ lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop()
+ except:
+ lastleg = ['LINE 1900.01.01']
+ try:
+ lastdate = lastleg.split().pop()
+ if 'STYLE' in lastdate:
+ lastdate = lastleg.split().pop().pop()
+ except:
+ lastdate = '1900.01.01'
+
+ entrance = ' '.join(location.split()[1:3])
+ print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print
+
+
+ newcave = models.CaveM(
+ survex_file = fullname,
+ total_length = length,
+ name=areacode+'.'+surveyname,
+ total_depth = depth,
+ date = lastdate,
+ entrance = entrance)
newcave.save()
#end of reading survex masterfiles
-
+
print ("Reading cave descriptions")
cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
for fn in cavefiles:
f = open(fn, "r")
print(fn)
contents = f.read()
-
+
+ slug = re.sub(r"\s+", "", extractXML(contents,'caveslug'))
desc = extractXML(contents,'underground_description')
- name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
+ name = slug[5:] #get survex compatible name
+ area = slug[0:4]
+
+ print([area,name])
if desc==None or name==None:
msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
@@ -86,26 +136,56 @@ def loadarea(areacode):
msg.save()
continue
+ print(area+'/'+name+'/'+name+'.svx')
- updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
+ updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx')
if len(updatecave)>1:
print('Non unique solution - skipping. Name:'+name)
elif len(updatecave)==0:
- print('Cave with no survex data'+name)
- newcave = models.CaveM(description = desc, name = name)
- newcave.save()
+ print('Cave with no survex data:'+name)
+ continue
else: #exaclty one match
+ print('Adding desc:'+name)
updatecave = updatecave[0]
- updatecave.description = desc
- if updatecave.name=="Not found":
- updatecave.name=name
+ updatecave.description = '/cave/descriptionM/'+slug #area-name
updatecave.title=name
updatecave.save()
-
+ slugS = slug
+ explorersS = extractXML(contents,'explorers')
+ underground_descriptionS = extractXML(contents,'underground_description')
+ equipmentS = extractXML(contents,'equipment')
+ referencesS = extractXML(contents,'references')
+ surveyS = extractXML(contents,'survey')
+ kataster_statusS = extractXML(contents,'kataster_status')
+ underground_centre_lineS = extractXML(contents,'underground_centre_line')
+ survex_fileS = extractXML(contents,'survex_file')
+ notesS = extractXML(contents,'notes')
+
+
+ newcavedesc = models.Cave_descriptionM(
+ slug = slugS,
+ explorers = explorersS,
+ underground_description = underground_descriptionS,
+ equipment = equipmentS,
+ references = referencesS,
+ survey = surveyS,
+ kataster_status = kataster_statusS,
+ underground_centre_line = underground_centre_lineS,
+ survex_file = survex_fileS,
+ notes = notesS)
+ newcavedesc.save()
+
+
+
+
#end of reading cave descriptions
-
+def file_exists(filename):
+ test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
+ if 'MISSING' in test: #send error message to the database
+ return False
+ return True
def extractXML(contents,tag):
#find correct lines
@@ -119,9 +199,10 @@ def extractXML(contents,tag):
if endi!=begi:
segment = '\n'.join(lines[begi:endi+1])
else:
- segment = lines[begi:endi+1]
- return segment[0]
-
+ segment = lines[begi:endi+1][0]
+
+ hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0]
+ return hit
def bash(cmd): #calls command in bash shell, returns output
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)