summaryrefslogtreecommitdiffstats
path: root/parsers/surveysM.py
blob: 2b94b02019c250581c01b22cd7f5bb9b08bb1f10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from django.conf import settings
import subprocess, re
import troggle.core.models as models

def load():
    print('Load survex files and relations')
    load_area('1623')

def load_area(areacode):

    print('Searching all cave dirs files')
    basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'

    cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
    print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
    
    for cavedir in cavedirs:
        if cavedir==basedir:
            continue #skip the basedir - a non-proper subdirectory
        parentname = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
        parentcave = models.CaveM.objects.filter(survex_file__icontains=cavedir)
        if len(parentcave)>1:
            print('Non unique parent - skipping. Name:'+parentname)
        elif len(parentcave)==0:
            print('Error! parent not created:'+parentname)
            continue
        else: #exaclty one match
            print('Adding relations of:'+parentname)
            parentcave = parentcave[0]
            
            surveyfiles = bash('find '+cavedir+' -name \'*.svx\'').splitlines()
            for fn in surveyfiles:
                print(fn)
                svxcontents = open(fn,'r').read().splitlines()
                try:
                    dateline = [x for x in svxcontents if ('*date' in x)][0]
                    date = re.findall('\\d\\d\\d\\d\\.\\d\\d\\.\\d\\d', dateline, re.S)[0]
                    
                    
                except:
                    if( len( [x for x in svxcontents if ('*date' in x)] ) == 0 ):
                        continue #skip dateless files                     
                    print('Date format error in '+fn)
                    print('Dateline = '+ '"'.join([x for x in svxcontents if ('*date' in x)]))
                    date = '1900.01.01' 
                
            
                newsurvex = models.SurveyM(survex_file=fn, date=date)
                newsurvex.save()
                parentcave.surveys.add(newsurvex)
                parentcave.save()
                        

def file_exists(filename):
    test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
    if 'MISSING' in test: #send error message to the database
        return False
    return True       

def bash(cmd): #calls command in bash shell, returns output
    process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
    output, error = process.communicate()
    return output