summaryrefslogtreecommitdiffstats
path: root/parsers/cavesM.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/cavesM.py')
-rw-r--r--parsers/cavesM.py143
1 files changed, 31 insertions, 112 deletions
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
index 71bda0d..13cd5d5 100644
--- a/parsers/cavesM.py
+++ b/parsers/cavesM.py
@@ -19,23 +19,16 @@ def load():
print('Hi! I\'m caves parser. Ready to work')
print('Loading caves of 1623 area')
- loadarea('1623')
+ loadarea('caves-1623/')
def loadarea(areacode):
- if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'):
- print('Computing master .3d file')
- bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
- else:
- print('Loading from existing master .3d file')
-
- master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines()
- master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file
- master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file
print('Searching all cave dirs files')
- basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
+ basedir = settings.SURVEX_DATA+areacode
+
+ bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
@@ -47,14 +40,14 @@ def loadarea(areacode):
cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
- if not file_exists(cavedir+'/'+cavename+'.svx'):
+ if 'MISSING' in test: #send error message to the database
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
print('Cave missing'+cavename+' :(')
msg.save()
continue
fullname=cavedir+'/'+cavename+'.svx'
print('Found cave:'+cavename)
- cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing
+ cavernout = bash('cavern -q '+fullname) #make cavern process the thing
if 'cavern: error:' in cavernout:
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
print('Fucked svx'+cavename+' :(')
@@ -64,71 +57,28 @@ def loadarea(areacode):
cavernout = cavernout.splitlines()
depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
- cavefile = open(fullname,'r')
- cavefilecontents = cavefile.read().splitlines()
- surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower()
- try:
- title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1]
- except:
- syrveyname = "Untitled"
-
- relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
- entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x]
- surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x]
- location_nodes = []
- print('rel_nodes'+str(len(relevant_nodes)))
- if len(entrance_nodes) > 0:
- location_nodes = entrance_nodes
- elif len(surface_nodes) > 0:
- location_nodes = surface_nodes
- elif len(relevant_nodes) > 0:
- location_nodes = relevant_nodes
-
- try:
- location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop()
- except:
- print(location_nodes)
- location = 'Not found'
-
- relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
- try:
- lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop()
- except:
- lastleg = ['LINE 1900.01.01']
- try:
- lastdate = lastleg.split().pop()
- if 'STYLE' in lastdate:
- lastdate = lastleg.split().pop().pop()
- except:
- lastdate = '1900.01.01'
-
- entrance = ' '.join(location.split()[1:3])
- print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print
-
-
- newcave = models.CaveM(
- survex_file = fullname,
- total_length = length,
- name=areacode+'.'+surveyname,
- total_depth = depth,
- date = lastdate,
- entrance = entrance)
+ surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
+ title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]
+ print((('depth','length','surv name'),(depth,length,surveyname)))
+ print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')
+ nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
+ entran = [x for x in nodes if ('ENTRANCE' in x) ]
+ print(nodes)
+
+
+ newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
newcave.save()
#end of reading survex masterfiles
-
+
print ("Reading cave descriptions")
cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
for fn in cavefiles:
f = open(fn, "r")
print(fn)
contents = f.read()
-
- slug = re.sub(r"\s+", "", extractXML(contents,'caveslug'))
+
desc = extractXML(contents,'underground_description')
- name = slug[5:] #get survex compatible name
- area = slug[0:4]
-
- print([area,name])
+ name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
if desc==None or name==None:
msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
@@ -136,56 +86,26 @@ def loadarea(areacode):
msg.save()
continue
- print(area+'/'+name+'/'+name+'.svx')
- updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx')
+ updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
if len(updatecave)>1:
print('Non unique solution - skipping. Name:'+name)
elif len(updatecave)==0:
- print('Cave with no survex data:'+name)
- continue
+ print('Cave with no survex data'+name)
+ newcave = models.CaveM(description = desc, name = name)
+ newcave.save()
else: #exaclty one match
- print('Adding desc:'+name)
updatecave = updatecave[0]
- updatecave.description = '/cave/descriptionM/'+slug #area-name
+ updatecave.description = desc
+ if updatecave.name=="Not found":
+ updatecave.name=name
updatecave.title=name
updatecave.save()
+
- slugS = slug
- explorersS = extractXML(contents,'explorers')
- underground_descriptionS = extractXML(contents,'underground_description')
- equipmentS = extractXML(contents,'equipment')
- referencesS = extractXML(contents,'references')
- surveyS = extractXML(contents,'survey')
- kataster_statusS = extractXML(contents,'kataster_status')
- underground_centre_lineS = extractXML(contents,'underground_centre_line')
- survex_fileS = extractXML(contents,'survex_file')
- notesS = extractXML(contents,'notes')
-
-
- newcavedesc = models.Cave_descriptionM(
- slug = slugS,
- explorers = explorersS,
- underground_description = underground_descriptionS,
- equipment = equipmentS,
- references = referencesS,
- survey = surveyS,
- kataster_status = kataster_statusS,
- underground_centre_line = underground_centre_lineS,
- survex_file = survex_fileS,
- notes = notesS)
- newcavedesc.save()
-
-
-
-
#end of reading cave descriptions
-def file_exists(filename):
- test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
- if 'MISSING' in test: #send error message to the database
- return False
- return True
+
def extractXML(contents,tag):
#find correct lines
@@ -199,10 +119,9 @@ def extractXML(contents,tag):
if endi!=begi:
segment = '\n'.join(lines[begi:endi+1])
else:
- segment = lines[begi:endi+1][0]
-
- hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0]
- return hit
+ segment = lines[begi:endi+1]
+ return segment[0]
+
def bash(cmd): #calls command in bash shell, returns output
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)