parsers/surveys.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

from __future__ import (absolute_import, division,
                        print_function, unicode_literals)

import sys
import os
import types
import logging
import stat
import csv
import re
import datetime

from PIL import Image
from utils import save_carefully
from functools import reduce

import settings
from troggle.core.models import *

def get_or_create_placeholder(year):
    """ All surveys must be related to a logbookentry. We don't have a way to
        automatically figure out which survey went with which logbookentry,
        so we create a survey placeholder logbook entry for each year. This
        function always returns such a placeholder, and creates it if it doesn't
        exist yet.
    """
    lookupAttribs={'date__year':int(year),  'title':"placeholder for surveys",}
    nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)}
    placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
    return placeholder_logbook_entry

# obsolete surveys.csv does not exist.
# def readSurveysFromCSV():
    # try:   # could probably combine these two
        # surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv"))
    # except IOError:
        # import io, urllib.request, urllib.parse, urllib.error  
        # surveytab = io.StringIO(urllib.request.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read())
    # dialect=csv.Sniffer().sniff(surveytab.read())
    # surveytab.seek(0,0)
    # surveyreader = csv.reader(surveytab,dialect=dialect)
    # headers = next(surveyreader)
    # header = dict(list(zip(headers, list(range(len(headers)))))) #set up a dictionary where the indexes are header names and the values are column numbers

    # # test if the expeditions have been added yet
    # if Expedition.objects.count()==0:
        # print("There are no expeditions in the database. Please run the logbook parser.")
        # sys.exit()

    
    # logging.info("Deleting all scanned images")
    # ScannedImage.objects.all().delete()
    
    
    # logging.info("Deleting all survey objects")
    # Survey.objects.all().delete()
    
    
    # logging.info("Beginning to import surveys from "+str(os.path.join(settings.SURVEYS, "Surveys.csv"))+"\n"+"-"*60+"\n")
    
    # for survey in surveyreader:
        # #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
        # walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) 
    # #    print(walletNumberLetter.groups())
        # year=survey[header['Year']]

        
        # surveyobj = Survey(
            # expedition = Expedition.objects.filter(year=year)[0],
            # wallet_number = walletNumberLetter.group('number'),
            # logbook_entry = get_or_create_placeholder(year),
            # comments = survey[header['Comments']],
            # location = survey[header['Location']]
            # )
        # surveyobj.wallet_letter = walletNumberLetter.group('letter')
        # if survey[header['Finished']]=='Yes':
            # #try and find the sketch_scan
            # pass
        # surveyobj.save()

        
        # logging.info("added survey " + survey[header['Year']] + "#" + surveyobj.wallet_number + "\r")

# dead
def listdir(*directories):
    try:
        return os.listdir(os.path.join(settings.SURVEYS, *directories))
    except:
        import urllib.request, urllib.parse, urllib.error
        url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories))
        folders = urllib.request.urlopen(url.replace("#", "%23")).readlines()
        return [folder.rstrip(r"/") for folder in folders]

# add survey scans
# def parseSurveyScans(expedition, logfile=None):
# #    yearFileList = listdir(expedition.year)
    # try:
        # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
        # yearFileList=os.listdir(yearPath)
        # print(yearFileList)
        # for surveyFolder in yearFileList:
            # try:
                # surveyNumber=re.match(rb'\d\d\d\d#(X?)0*(\d+)',surveyFolder).groups()
                # #scanList = listdir(expedition.year, surveyFolder)
                # scanList=os.listdir(os.path.join(yearPath,surveyFolder))
            # except AttributeError:
                # print(("Ignoring file in year folder: " + surveyFolder + "\r"))
                # continue

            # for scan in scanList:
                # # Why does this insist on renaming all the scanned image files?
                # # It produces duplicates names and all images have type .jpg in the scanObj.
                # # It seems to rely on end users being particularly diligent in filenames which is NGtH
                # try:
                    # #scanChopped=re.match(rb'(?i).*(notes|elev|plan|extend|elevation)-?(\d*)\.(png|jpg|jpeg|pdf)',scan).groups()
                    # scanChopped=re.match(rb'(?i)([a-z_-]*\d?[a-z_-]*)(\d*)\.(png|jpg|jpeg|pdf|top|dxf|svg|tdr|th2|xml|txt)',scan).groups()
                    # scanType,scanNumber,scanFormat=scanChopped
                # except AttributeError:
                    # print(("Ignored (bad name format): " + surveyFolder + '/' + scan + "\r"))
                    # continue
                # scanTest = scanType
                # scanType = 'notes'
                # match = re.search(rb'(?i)(elev|extend)',scanTest)
                # if match:
                    # scanType = 'elevation'

                # match = re.search(rb'(?i)(plan)',scanTest)
                # if match:
                    # scanType = 'plan'

                # if scanNumber=='':
                    # scanNumber=1

                # if isinstance(surveyNumber, tuple):
                    # surveyLetter=surveyNumber[0]
                    # surveyNumber=surveyNumber[1]
                # try:
                    # placeholder=get_or_create_placeholder(year=int(expedition.year))
                    # survey=Survey.objects.get_or_create(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
                # except Survey.MultipleObjectsReturned:
                    # survey=Survey.objects.filter(wallet_number=surveyNumber, wallet_letter=surveyLetter, expedition=expedition)[0]
                # file_=os.path.join(yearPath, surveyFolder, scan)
                # scanObj = ScannedImage(
                    # file=file_,
                    # contents=scanType,
                    # number_in_wallet=scanNumber,
                    # survey=survey,
                    # new_since_parsing=False,
                    # )
                # print(("Added scanned image at " + str(scanObj)))
                # #if scanFormat=="png":
                    # #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
                    # #    print file_+ " is an interlaced PNG. No can do."
                    # #continue
                # scanObj.save()
    # except (IOError, OSError):
        # yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
        # print((" ! No folder found for " + expedition.year + " at:- " + yearPath))

# dead
# def parseSurveys(logfile=None):
    # try:
        # readSurveysFromCSV()
    # except (IOError, OSError):
        # print(" ! Survey CSV not found..")
        # pass
    
    # print(" - Loading scans by expedition year")
    # for expedition in Expedition.objects.filter(year__gte=2000):   #expos since 2000, because paths and filenames were nonstandard before then
        # print("%s" % expedition, end=' ')
        # parseSurveyScans(expedition)

# dead
# def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
    # file=Image.open(filePath)
    # print(filePath)
    # if 'interlace' in file.info:
        # return file.info['interlace']
    # else:
        # return False


# handles url or file, so we can refer to a set of scans on another server
def GetListDir(sdir):
    res = [ ]
    if sdir[:7] == "http://":
        assert False, "Not written"
        s = urllib.request.urlopen(sdir)
    else:
        for f in os.listdir(sdir):
            if f[0] != ".":
                ff = os.path.join(sdir, f)
                res.append((f, ff, os.path.isdir(ff)))
    return res


def LoadListScansFile(survexscansfolder):
    gld = [ ]
    
    # flatten out any directories in these wallet folders - should not be any
    for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
        if fisdiryf:
            gld.extend(GetListDir(ffyf))
        else:
            gld.append((fyf, ffyf, fisdiryf))
    
    for (fyf, ffyf, fisdiryf) in gld:
        #assert not fisdiryf, ffyf
        if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
            survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
            survexscansingle.save()

        
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
def LoadListScans():

    print(' - Loading Survey Scans... (deleting all objects first)')

    SurvexScanSingle.objects.all().delete()
    SurvexScansFolder.objects.all().delete()

    # first do the smkhs (large kh survey scans) directory
    survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "smkhs"), walletname="smkhs") 
    if os.path.isdir(survexscansfoldersmkhs.fpath):
        survexscansfoldersmkhs.save()
        LoadListScansFile(survexscansfoldersmkhs)
        
    
    # iterate into the surveyscans directory
    print(' - ', end=' ')
    for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
        if not fisdir:
            continue
        
        # do the year folders
        if re.match(r"\d\d\d\d$", f):
            print("%s" % f, end=' ')
            for fy, ffy, fisdiry in GetListDir(ff):
                if fisdiry:
                    assert fisdiry, ffy
                    survexscansfolder = SurvexScansFolder(fpath=ffy, walletname=fy)
                    survexscansfolder.save()
                    LoadListScansFile(survexscansfolder)
        
        # do the 
        elif f != "thumbs":
            survexscansfolder = SurvexScansFolder(fpath=ff, walletname=f)
            survexscansfolder.save()
            LoadListScansFile(survexscansfolder)
            

def FindTunnelScan(tunnelfile, path):
    scansfolder, scansfile = None, None
    mscansdir = re.search(rb"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
    if mscansdir:
        scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
        if len(scansfolderl):
            assert len(scansfolderl) == 1
            scansfolder = scansfolderl[0]
        if scansfolder:
            scansfilel = scansfolder.survexscansingle_set.filter(name=mscansdir.group(2))
            if len(scansfilel):
                if len(scansfilel) > 1:
                    print("BORK more than one image filename matches filter query. ", scansfilel[0])
                    print("BORK ", tunnelfile.tunnelpath, path)
                    print("BORK ", mscansdir.group(1),  mscansdir.group(2), len(scansfilel))
                #assert len(scansfilel) == 1
                scansfile = scansfilel[0]
            
        if scansfolder:
            tunnelfile.survexscansfolders.add(scansfolder)
        if scansfile:
            tunnelfile.survexscans.add(scansfile)
    
    elif path and not re.search(rb"\.(?:png|jpg|pdf|jpeg)$(?i)", path):
        name = os.path.split(path)[1]
        #print("debug-tunnelfileobjects ", tunnelfile.tunnelpath, path, name)
        rtunnelfilel = TunnelFile.objects.filter(tunnelname=name)
        if len(rtunnelfilel):
            assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded")
            rtunnelfile = rtunnelfilel[0]
            #print "ttt", tunnelfile.tunnelpath, path, name, rtunnelfile.tunnelpath
            tunnelfile.tunnelcontains.add(rtunnelfile)

    tunnelfile.save()


def SetTunnelfileInfo(tunnelfile):
    ff = os.path.join(settings.TUNNEL_DATA, tunnelfile.tunnelpath)
    tunnelfile.filesize = os.stat(ff)[stat.ST_SIZE]
    fin = open(ff,'rb')
    ttext = fin.read()
    fin.close()
    if tunnelfile.filesize <= 0:
        print("DEBUG - zero length xml file", ff)
        return
    mtype = re.search(rb"<(fontcolours|sketch)", ttext)
     
    assert mtype, ff
    tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours")
    tunnelfile.npaths = len(re.findall(rb"<skpath", ttext))
    tunnelfile.save()
    
    # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
    # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
    for path, style in re.findall(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"', ttext):
        FindTunnelScan(tunnelfile, path)
    
    # should also scan and look for survex blocks that might have been included
    # and also survex titles as well.  
    
    tunnelfile.save()


def LoadTunnelFiles():
    tunneldatadir = settings.TUNNEL_DATA
    TunnelFile.objects.all().delete()
    tunneldirs = [ "" ]
    while tunneldirs:
        tunneldir = tunneldirs.pop()
        for f in os.listdir(os.path.join(tunneldatadir, tunneldir)):
            if f[0] == "." or f[-1] == "~":
                continue
            lf = os.path.join(tunneldir, f)
            ff = os.path.join(tunneldatadir, lf)
            if os.path.isdir(ff):
                tunneldirs.append(lf)
            elif f[-4:] == ".xml":
                tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1])
                tunnelfile.save()
                
    for tunnelfile in TunnelFile.objects.all():
        SetTunnelfileInfo(tunnelfile)