1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
import troggle.core.models as models #import models for various objects
from django.conf import settings
import xml.etree.ElementTree as ET #this is used to parse XML's
import subprocess
import re
#
# This parser has to find several things:
# There are files of .html format in expoweb area - they contain some of the important information
# There is a similar number of .svx files in loser are - they contain all the measurements
#
# Previous version was incredibly slow due to various shitty ideas about finding things
# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
# and handles more sophisticated bits only
#
def load():
print('Hi! I\'m caves parser. Ready to work')
print('Loading caves of 1623 area')
loadarea('1623')
def loadarea(areacode):
if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'):
print('Computing master .3d file')
bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
else:
print('Loading from existing master .3d file')
master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines()
master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file
master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file
print('Searching all cave dirs files')
basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
ndirs = len(cavedirs) #remember number of dirs for nice debug output
for cavedir in cavedirs:
if cavedir==basedir:
continue #skip the basedir - a non-proper subdirectory
cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
if not file_exists(cavedir+'/'+cavename+'.svx'):
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
print('Cave missing'+cavename+' :(')
msg.save()
continue
fullname=cavedir+'/'+cavename+'.svx'
print('Found cave:'+cavename)
cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing
if 'cavern: error:' in cavernout:
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
print('Fucked svx'+cavename+' :(')
msg.save()
continue
cavernout = cavernout.splitlines()
depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
cavefile = open(fullname,'r')
cavefilecontents = cavefile.read().splitlines()
surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower()
try:
title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1]
except:
syrveyname = "Untitled"
relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x]
surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x]
location_nodes = []
print('rel_nodes'+str(len(relevant_nodes)))
if len(entrance_nodes) > 0:
location_nodes = entrance_nodes
elif len(surface_nodes) > 0:
location_nodes = surface_nodes
elif len(relevant_nodes) > 0:
location_nodes = relevant_nodes
try:
location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop()
except:
print(location_nodes)
location = 'Not found'
relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
try:
lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop()
except:
lastleg = ['LINE 1900.01.01']
try:
lastdate = lastleg.split().pop()
if 'STYLE' in lastdate:
lastdate = lastleg.split().pop().pop()
except:
lastdate = '1900.01.01'
entrance = ' '.join(location.split()[1:3])
print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print
newcave = models.CaveM(
survex_file = fullname,
total_length = length,
name=areacode+'.'+surveyname,
total_depth = depth,
date = lastdate,
entrance = entrance)
newcave.save()
#end of reading survex masterfiles
print ("Reading cave descriptions")
cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
for fn in cavefiles:
f = open(fn, "r")
print(fn)
contents = f.read()
slug = re.sub(r"\s+", "", extractXML(contents,'caveslug'))
desc = extractXML(contents,'underground_description')
name = slug[5:] #get survex compatible name
area = slug[0:4]
print([area,name])
if desc==None or name==None:
msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
print('Fucked description '+fn+' :(')
msg.save()
continue
print(area+'/'+name+'/'+name+'.svx')
updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx')
if len(updatecave)>1:
print('Non unique solution - skipping. Name:'+name)
elif len(updatecave)==0:
print('Cave with no survex data:'+name)
continue
else: #exaclty one match
print('Adding desc:'+name)
updatecave = updatecave[0]
updatecave.description = '/cave/descriptionM/'+slug #area-name
updatecave.title=name
updatecave.save()
slugS = slug
explorersS = extractXML(contents,'explorers')
underground_descriptionS = extractXML(contents,'underground_description')
equipmentS = extractXML(contents,'equipment')
referencesS = extractXML(contents,'references')
surveyS = extractXML(contents,'survey')
kataster_statusS = extractXML(contents,'kataster_status')
underground_centre_lineS = extractXML(contents,'underground_centre_line')
survex_fileS = extractXML(contents,'survex_file')
notesS = extractXML(contents,'notes')
newcavedesc = models.Cave_descriptionM(
slug = slugS,
explorers = explorersS,
underground_description = underground_descriptionS,
equipment = equipmentS,
references = referencesS,
survey = surveyS,
kataster_status = kataster_statusS,
underground_centre_line = underground_centre_lineS,
survex_file = survex_fileS,
notes = notesS)
newcavedesc.save()
#end of reading cave descriptions
def file_exists(filename):
test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
if 'MISSING' in test: #send error message to the database
return False
return True
def extractXML(contents,tag):
#find correct lines
lines = contents.splitlines()
beg = [x for x in lines if ('<'+tag+'>' in x)]
end = [x for x in lines if ('</'+tag+'>' in x)]
if (not beg) or (not end):
return None
begi = lines.index(beg[0])
endi = lines.index(end[0])
if endi!=begi:
segment = '\n'.join(lines[begi:endi+1])
else:
segment = lines[begi:endi+1][0]
hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0]
return hit
def bash(cmd): #calls command in bash shell, returns output
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
output, error = process.communicate()
return output
|