parsers/surveys.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

import sys
import os
import types
import logging
import stat
import csv
import re
import datetime

from PIL import Image
from functools import reduce

import settings
from troggle.core.models_survex import SingleScan, ScansFolder, TunnelFile
from troggle.core.models import DataIssue
from troggle.core.utils import save_carefully


def get_or_create_placeholder(year):
    """ All surveys must be related to a logbookentry. We don't have a way to
        automatically figure out which survey went with which logbookentry,
        so we create a survey placeholder logbook entry for each year. This
        function always returns such a placeholder, and creates it if it doesn't
        exist yet.
    """
    lookupAttribs={'date__year':int(year),  'title':"placeholder for surveys",}
    nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)}
    placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
    return placeholder_logbook_entry

def listdir(*directories):
    try:
        return os.listdir(os.path.join(settings.SURVEYS, *directories))
    except:
        import urllib.request, urllib.parse, urllib.error
        url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories))
        folders = urllib.request.urlopen(url.replace("#", "%23")).readlines()
        return [folder.rstrip(r"/") for folder in folders]


# handles url or file, so we can refer to a set of scans on another server
def GetListDir(sdir):
    res = [ ]
    if sdir[:7] == "http://":
        assert False, "Not written"
        s = urllib.request.urlopen(sdir)
    else:
        for f in os.listdir(sdir):
            if f[0] != ".":
                ff = os.path.join(sdir, f)
                res.append((f, ff, os.path.isdir(ff)))
    return res


def LoadListScansFile(scansfolder):
    gld = [ ]
    # flatten out any directories in these wallet folders - should not be any
    for (fyf, ffyf, fisdiryf) in GetListDir(scansfolder.fpath):
        if fisdiryf:
            gld.extend(GetListDir(ffyf))
        else:
            gld.append((fyf, ffyf, fisdiryf))
    
    c=0
    for (fyf, ffyf, fisdiryf) in gld:
        #assert not fisdiryf, ffyf
        if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf):
            singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder)
            singlescan.save()
            c+=1
            if c>=10:
                print(".", end='')
                c = 0

        
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans():

    print(' - Loading Survey Scans')

    SingleScan.objects.all().delete()
    ScansFolder.objects.all().delete()
    print(' - deleting all scansFolder and scansSingle objects')

    # first do the smkhs (large kh survey scans) directory
    manyscansfoldersmkhs = ScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") 
    print("smkhs", end=' ')
    if os.path.isdir(manyscansfoldersmkhs.fpath):
        manyscansfoldersmkhs.save()
        LoadListScansFile(manyscansfoldersmkhs)
        
    
    # iterate into the surveyscans directory
    print(' - ', end=' ')
    for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS):
        if not fisdir:
            continue
        
        # do the year folders
        if re.match(r"\d\d\d\d$", f):
            print("%s" % f, end=' ')
            for fy, ffy, fisdiry in GetListDir(ff):
                if fisdiry:
                    assert fisdiry, ffy
                    scansfolder = ScansFolder(fpath=ffy, walletname=fy)
                    scansfolder.save()
                    LoadListScansFile(scansfolder)
        
        # do the 
        elif f != "thumbs":
            scansfolder = ScansFolder(fpath=ff, walletname=f)
            scansfolder.save()
            LoadListScansFile(scansfolder)
            

def find_tunnel_scan(tunnelfile, path):
    '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
    which we have already seen when we imported all the files we could find in teh surveyscans direstories
    '''
    scansfolder, scansfile = None, None
    mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
    if mscansdir:
        scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1))
        if len(scansfolderl):
            assert len(scansfolderl) == 1
            scansfolder = scansfolderl[0]
        if scansfolder:
            scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2))
            if len(scansfilel):
                if len(scansfilel) > 1:
                    message = "! More than one image filename matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path)
                    print(message)
                    DataIssue.objects.create(parser='Tunnel', message=message)
                scansfile = scansfilel[0]

        if scansfolder:
            tunnelfile.manyscansfolders.add(scansfolder)
        if scansfile:
            tunnelfile.scans.add(scansfile)
    
    elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|pdf)$(?i)", path):
        name = os.path.split(path)[1]
        rtunnelfilel = TunnelFile.objects.filter(tunnelname=name)
        if len(rtunnelfilel):
            message = "! Two paths with same name [{}]: {}".format(path, name)
            print(message)
            DataIssue.objects.create(parser='Tunnel', message=message)
            rtunnelfile = rtunnelfilel[0]
            tunnelfile.tunnelcontains.add(rtunnelfile)

    tunnelfile.save()

def findimageinsert(therionfile, xth_me):
    '''Tries to link the drawing file (Therion format) to the referenced image (scan) file
    '''
    pass

def findimportinsert(therionfile, imp):
    '''Tries to link the scrap (Therion format) to the referenced therion scrap
    '''
    pass

rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE)
rx_scrap  = re.compile(r'^survey (\w*).*$', re.MULTILINE)
rx_input  = re.compile(r'^input (\w*).*$', re.MULTILINE)

def settherionfileinfo(filetuple):
    '''Read in the drawing file contents and sets values on the tunnelfile object
    '''
    thtype, therionfile = filetuple
    
    ff = os.path.join(settings.TUNNEL_DATA, therionfile.tunnelpath)
    therionfile.filesize = os.stat(ff)[stat.ST_SIZE]
    if therionfile.filesize <= 0:
        message = "! Zero length therion file {}".format(ff)
        print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        return
    fin = open(ff,'r')
    ttext = fin.read()
    fin.close()
    
    # The equivalent for a tunnel 'path' would be a .th2 'line wall'  or 'scrap'
    # print(len(re.findall(r"line", ttext)))
    if thtype=='th':
        therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
    elif thtype=='th2':
        therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
    therionfile.save()
  
    # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
    # which would populate tunnelfile.survexfile
    
    # in .th2 files:
    # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
    # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
    
    for xth_me in rx_xth_me.findall(ttext):
        message = f'! Un-parsed image filename: {therionfile.tunnelname} : {xth_me.split()[-3]} - {therionfile.tunnelpath}'
        #print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        findimageinsert(therionfile, xth_me)
        
    for inp in rx_input.findall(ttext):
        # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
        # but we would need to disentangle to get the current path properly
        message = f'! Un-set Therion .th2 input: - {therionfile.tunnelname} : {inp} - {therionfile.tunnelpath}'
        #print(message)
        DataIssue.objects.create(parser='Therion', message=message)
        findimportinsert(therionfile, inp)
    
    therionfile.save()
    
rx_skpath = re.compile(rb'<skpath')
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')

def settunnelfileinfo(tunnelfile):
    '''Read in the drawing file contents and sets values on the tunnelfile object
    Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
    then we could display on the master calendar per expo.
    '''
    ff = os.path.join(settings.TUNNEL_DATA, tunnelfile.tunnelpath)
    tunnelfile.filesize = os.stat(ff)[stat.ST_SIZE]
    if tunnelfile.filesize <= 0:
        message = "! Zero length xml file {}".format(ff)
        print(message)
        DataIssue.objects.create(parser='Drawings', message=message)
        return
    fin = open(ff,'rb')
    ttext = fin.read()
    fin.close()
   
    tunnelfile.npaths = len(rx_skpath.findall(ttext))
    tunnelfile.save()
    
    # example drawing file in Tunnel format.
    # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
    # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
    
    for path, style in rx_pcpath.findall(ttext):
        find_tunnel_scan(tunnelfile, path.decode())
    
    # should also scan and look for survex blocks that might have been included, and image scans
    # which would populate tunnelfile.survexfile

    tunnelfile.save()


def load_drawings_files():
    '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
    '''
    all_xml = []
    drawdatadir = settings.TUNNEL_DATA
    TunnelFile.objects.all().delete()
    DataIssue.objects.filter(parser='Drawings').delete()
    DataIssue.objects.filter(parser='Therion').delete()
    DataIssue.objects.filter(parser='Tunnel').delete()

    drawingsdirs = [ "" ]
    while drawingsdirs:
        drawdir = drawingsdirs.pop()
        for f in os.listdir(os.path.join(drawdatadir, drawdir)):
            if f[0] == "." or f[-1] == "~":
                continue
            lf = os.path.join(drawdir, f)
            ff = os.path.join(drawdatadir, lf)
            if os.path.isdir(ff):
                drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
            elif f[-4:] == ".xml":
                # Always creates new
                tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1])
                tunnelfile.save()
                all_xml.append(('xml',tunnelfile))
            elif f[-3:] == ".th":
                # Always creates new
                tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1])
                tunnelfile.save()
                all_xml.append(('th',tunnelfile))
            elif f[-4:] == ".th2":
                # Always creates new
                tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1])
                tunnelfile.save()
                all_xml.append(('th2',tunnelfile))

    print(f' - {len(all_xml)} Drawings files found')

    for d in all_xml:
        if d[0] == 'xml':
            settunnelfileinfo(d[1])
        # important to import .th2 files before .th so that we can assign them when found in .th files
        if d[0] == 'th2':
            settherionfileinfo(d)
        if d[0] == 'th':
            settherionfileinfo(d)
           
    # for drawfile in TunnelFile.objects.all():
        # SetTunnelfileInfo(drawfile)