1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
|
import os
import re
import stat
from pathlib import Path
import settings
from troggle.core.models.survex import DrawingFile
from troggle.core.models.troggle import DataIssue
from troggle.core.models.wallets import Wallet
"""Searches through all the :drawings: repository looking
for tunnel and therion files
"""
todo = """- Rename functions more consistently between tunnel and therion variants
- Recode to use pathlib instead of whacky resetting of loop variable inside loop
to scan sub-folders.
- Recode rx_valid_ext to use pathlib suffix() function
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
"""
rx_valid_ext = re.compile(r"(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$")
def find_dwg_file(dwgfile, path):
"""Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
which we have already seen when we imported all the files we could find in the surveyscans direstories.
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
What is all this really for ?! Is this data used anywhere ??
"""
wallet, scansfile = None, None
mscansdir = re.search(
r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
)
if mscansdir:
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
if len(scanswalletl):
wallet = scanswalletl[0]
if len(scanswalletl) > 1:
message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}"
print(message)
DataIssue.objects.create(parser="Tunnel", message=message)
if wallet:
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2))
if len(scansfilel):
if len(scansfilel) > 1:
plist = []
for sf in scansfilel:
plist.append(sf.ffile)
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
print(message)
DataIssue.objects.create(parser="Tunnel", message=message)
scansfile = scansfilel[0]
if wallet:
dwgfile.dwgwallets.add(wallet)
if scansfile:
dwgfile.scans.add(scansfile)
elif path and not rx_valid_ext.search(
path
): # ie not recognised as a path where wallets live and not an image file type
name = os.path.split(path)[1]
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
if len(rdwgfilel):
if len(rdwgfilel) > 1:
plist = []
for df in rdwgfilel:
plist.append(df.dwgpath)
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{path}")
rdwgfile = rdwgfilel[0]
dwgfile.dwgcontains.add(rdwgfile)
dwgfile.save()
def findwalletimage(therionfile, foundpath):
"""Tries to link the drawing file (Therion format) to the referenced image (scan) file"""
foundpath = foundpath.strip("{}")
mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath)
if mscansdir:
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
if len(scanswalletl):
wallet = scanswalletl[0]
if len(scanswalletl) > 1:
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(
therionfile, mscansdir.group(1), foundpath
)
print(message)
DataIssue.objects.create(parser="Therion", message=message)
if wallet:
therionfile.dwgwallets.add(wallet)
scanfilename = Path(foundpath).name
scansfilel = wallet.singlescan_set.filter(name=scanfilename, wallet=wallet)
if len(scansfilel):
# message = f'! {len(scansfilel)} {scansfilel} = {scanfilename} found in the wallet specified {wallet.walletname}'
# print(message)
if len(scansfilel) > 1:
plist = []
for sf in scansfilel:
plist.append(sf.ffile)
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
print(message)
DataIssue.objects.create(parser="Therion", message=message)
scansfile = scansfilel[0]
therionfile.scans.add(scansfile)
else:
message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}'
wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":")
# print(message)
DataIssue.objects.create(parser="Therion", message=message, url=wurl)
def findimportinsert(therionfile, imp):
"""Tries to link the scrap (Therion format) to the referenced therion scrap"""
pass
rx_xth_me = re.compile(r"xth_me_image_insert.*{.*}$", re.MULTILINE)
rx_scrap = re.compile(r"^survey (\w*).*$", re.MULTILINE)
rx_input = re.compile(r"^input (\w*).*$", re.MULTILINE)
def settherionfileinfo(filetuple):
"""Read in the drawing file contents and sets values on the dwgfile object"""
thtype, therionfile = filetuple
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
therionfile.filesize = os.stat(ff)[stat.ST_SIZE]
if therionfile.filesize <= 0:
message = f"! Zero length therion file {ff}"
print(message)
DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
return
fin = open(ff, "r")
ttext = fin.read()
fin.close()
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext)))
if thtype == "th":
therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
elif thtype == "th2":
therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
therionfile.save()
# scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings)
# which would populate dwgfile.survexfile
# in .th2 files:
# ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {}
# scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m]
for xth_me in rx_xth_me.findall(ttext):
# WORK IN PROGRESS. Do not clutter up the DataIssues list with this
message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}"
# print(message)
# DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
# ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2
with open("therionrefs.log", "a") as lg:
lg.write(message + "\n")
findwalletimage(therionfile, xth_me.split()[-3])
for inp in rx_input.findall(ttext):
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
# but we would need to disentangle to get the current path properly
message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}"
# print(message)
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
findimportinsert(therionfile, inp)
therionfile.save()
rx_skpath = re.compile(rb"<skpath")
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
def settnlfileinfo(dwgfile):
"""Read in the drawing file contents and sets values on the dwgfile object
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
then we could display on the master calendar per expo.
"""
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
if dwgfile.filesize <= 0:
message = f"! Zero length tunnel file {ff}"
print(message)
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return
fin = open(ff, "rb")
ttext = fin.read()
fin.close()
dwgfile.npaths = len(rx_skpath.findall(ttext))
dwgfile.save()
# example drawing file in Tunnel format.
# <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17">
# <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0">
for path, style in rx_pcpath.findall(ttext):
find_dwg_file(dwgfile, path.decode())
# should also scan and look for survex blocks that might have been included, and image scans
# which would populate dwgfile.survexfile
dwgfile.save()
def setdrwfileinfo(dwgfile):
"""Read in the drawing file contents and sets values on the dwgfile object,
but these are SVGs, PDFs or .txt files, so there is no useful format to search for
This function is a placeholder in case we thnk of a way to do something
to recognise generic survex filenames.
"""
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
dwgfile.filesize = ff.stat().st_size
if dwgfile.filesize <= 0:
message = f"! Zero length drawing file {ff}"
print(message)
DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return
def load_drawings_files():
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ??
We import JPG, PNG and SVG files; which have already been put on the server,
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
"""
all_xml = []
drawdatadir = settings.DRAWINGS_DATA
DrawingFile.objects.all().delete()
DataIssue.objects.filter(parser="drawings").delete()
DataIssue.objects.filter(parser="Therion").delete()
DataIssue.objects.filter(parser="xTherion").delete()
DataIssue.objects.filter(parser="Tunnel").delete()
if os.path.isfile("therionrefs.log"):
os.remove("therionrefs.log")
drawingsdirs = [""]
while drawingsdirs:
drawdir = drawingsdirs.pop()
for f in os.listdir(os.path.join(drawdatadir, drawdir)):
if f[0] == "." or f[-1] == "~":
continue
lf = os.path.join(drawdir, f)
ff = os.path.join(drawdatadir, lf)
if os.path.isdir(ff):
drawingsdirs.append(
lf
) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
elif Path(f).suffix.lower() == ".txt":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("txt", dwgfile))
elif Path(f).suffix.lower() == ".xml":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("xml", dwgfile))
elif Path(f).suffix.lower() == ".th":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("th", dwgfile))
elif Path(f).suffix.lower() == ".th2":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("th2", dwgfile))
elif Path(f).suffix.lower() == ".pdf":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("pdf", dwgfile))
elif Path(f).suffix.lower() == ".png":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("png", dwgfile))
elif Path(f).suffix.lower() == ".svg":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("svg", dwgfile))
elif Path(f).suffix.lower() == ".jpg":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(("jpg", dwgfile))
elif Path(f).suffix == "":
# therion file
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1])
dwgfile.save()
all_xml.append(("", dwgfile))
print(f" - {len(all_xml)} Drawings files found")
for d in all_xml:
if d[0] in ["pdf", "txt", "svg", "jpg", "png", ""]:
setdrwfileinfo(d[1])
if d[0] == "xml":
settnlfileinfo(d[1])
# important to import .th2 files before .th so that we can assign them when found in .th files
if d[0] == "th2":
settherionfileinfo(d)
if d[0] == "th":
settherionfileinfo(d)
# for drawfile in DrawingFile.objects.all():
# SetTunnelfileInfo(drawfile)
|