diff options
Diffstat (limited to 'parsers/drawings.py')
-rw-r--r-- | parsers/drawings.py | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/parsers/drawings.py b/parsers/drawings.py new file mode 100644 index 0000000..5b7bc1e --- /dev/null +++ b/parsers/drawings.py @@ -0,0 +1,310 @@ +import sys +import os +import types +import stat +import csv +import re +import datetime + +from PIL import Image +from functools import reduce + +import settings +from troggle.core.models.survex import SingleScan, Wallet, DrawingFile +from troggle.core.models.troggle import DataIssue +from troggle.core.utils import save_carefully + +'''Searches through all the :drawings: repository looking +for tunnel and therion files + +Searches through all the survey scans directories in expofiles, looking for images to be referenced. +''' + + +def get_or_create_placeholder(year): + """ All surveys must be related to a logbookentry. We don't have a way to + automatically figure out which survey went with which logbookentry, + so we create a survey placeholder logbook entry for each year. This + function always returns such a placeholder, and creates it if it doesn't + exist yet. + """ + lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",} + nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)} + placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) + return placeholder_logbook_entry + +def listdir(*directories): + try: + return os.listdir(os.path.join(settings.SURVEYS, *directories)) + except: + import urllib.request, urllib.parse, urllib.error + url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) + folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() + return [folder.rstrip(r"/") for folder in folders] + + +# handles url or file, so we can refer to a set of scans (not drawings) on another server +def GetListDir(sdir): + res = [ ] + if sdir[:7] == "http://": + # s = urllib.request.urlopen(sdir) + message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" + print(message) + DataIssue.objects.create(parser='Drawings', message=message) + sdir[:7] = "" + + for f in os.listdir(sdir): + if f[0] != ".": + ff = os.path.join(sdir, f) + res.append((f, ff, os.path.isdir(ff))) + return res + + +def LoadListScansFile(wallet): + gld = [ ] + # flatten out any directories in these wallet folders - should not be any + for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): + if fisdiryf: + gld.extend(GetListDir(ffyf)) + else: + gld.append((fyf, ffyf, fisdiryf)) + + c=0 + for (fyf, ffyf, fisdiryf) in gld: + if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf): + singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) + singlescan.save() + c+=1 + if c>=10: + print(".", end='') + c = 0 + + +# this iterates through the scans directories (either here or on the remote server) +# and builds up the models we can access later +def load_all_scans(): + + print(' - Loading Survey Scans') + + SingleScan.objects.all().delete() + Wallet.objects.all().delete() + print(' - deleting all scansFolder and scansSingle objects') + + # first do the smkhs (large kh survey scans) directory + manywallets_smkhs = Wallet(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") + print("smkhs", end=' ') + if os.path.isdir(manywallets_smkhs.fpath): + manywallets_smkhs.save() + LoadListScansFile(manywallets_smkhs) + + + # iterate into the surveyscans directory + print(' - ', end=' ') + for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS): + if not fisdir: + continue + + # do the year folders + if re.match(r"\d\d\d\d$", f): + print("%s" % f, end=' ') + for fy, ffy, fisdiry in GetListDir(ff): + if fisdiry: + wallet = Wallet(fpath=ffy, walletname=fy) + wallet.save() + LoadListScansFile(wallet) + + # do the + elif f != "thumbs": + wallet = Wallet(fpath=ff, walletname=f) + wallet.save() + LoadListScansFile(wallet) + + print("", flush=True) + +def find_tunnel_file(dwgfile, path): + '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file + which we have already seen when we imported all the files we could find in the surveyscans direstories + ''' + wallet, scansfile = None, None + mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path) + if mscansdir: + scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) + # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first. + if len(scanswalletl): + wallet = scanswalletl[0] + if len(scanswalletl) > 1: + message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) + print(message) + DataIssue.objects.create(parser='Tunnel', message=message) + + if wallet: + scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) + if len(scansfilel): + if len(scansfilel) > 1: + message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) + print(message) + DataIssue.objects.create(parser='Tunnel', message=message) + scansfile = scansfilel[0] + + if wallet: + dwgfile.manywallets.add(wallet) + if scansfile: + dwgfile.scans.add(scansfile) + + elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path): + name = os.path.split(path)[1] + rdwgfilel = DrawingFile.objects.filter(dwgname=name) + if len(rdwgfilel): + message = "! Two paths with same name [{}]: {}".format(path, name) + print(message) + DataIssue.objects.create(parser='Tunnel', message=message) + rdwgfile = rdwgfilel[0] + dwgfile.dwgcontains.add(rdwgfile) + + dwgfile.save() + +def findimageinsert(therionfile, xth_me): + '''Tries to link the drawing file (Therion format) to the referenced image (scan) file + ''' + pass + +def findimportinsert(therionfile, imp): + '''Tries to link the scrap (Therion format) to the referenced therion scrap + ''' + pass + +rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE) +rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) +rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) + +def settherionfileinfo(filetuple): + '''Read in the drawing file contents and sets values on the dwgfile object + ''' + thtype, therionfile = filetuple + + ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) + therionfile.filesize = os.stat(ff)[stat.ST_SIZE] + if therionfile.filesize <= 0: + message = "! Zero length therion file {}".format(ff) + print(message) + DataIssue.objects.create(parser='Therion', message=message) + return + fin = open(ff,'r') + ttext = fin.read() + fin.close() + + # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' + # print(len(re.findall(r"line", ttext))) + if thtype=='th': + therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) + elif thtype=='th2': + therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) + therionfile.save() + + # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) + # which would populate dwgfile.survexfile + + # in .th2 files: + # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {} + # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m] + + for xth_me in rx_xth_me.findall(ttext): + message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' + #print(message) + DataIssue.objects.create(parser='Therion', message=message) + findimageinsert(therionfile, xth_me) + + for inp in rx_input.findall(ttext): + # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file + # but we would need to disentangle to get the current path properly + message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' + #print(message) + DataIssue.objects.create(parser='Therion', message=message) + findimportinsert(therionfile, inp) + + therionfile.save() + +rx_skpath = re.compile(rb'<skpath') +rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"') + +def setdwgfileinfo(dwgfile): + '''Read in the drawing file contents and sets values on the dwgfile object + Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57 + then we could display on the master calendar per expo. + ''' + ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) + dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] + if dwgfile.filesize <= 0: + message = "! Zero length xml file {}".format(ff) + print(message) + DataIssue.objects.create(parser='Drawings', message=message) + return + fin = open(ff,'rb') + ttext = fin.read() + fin.close() + + dwgfile.npaths = len(rx_skpath.findall(ttext)) + dwgfile.save() + + # example drawing file in Tunnel format. + # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17"> + # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0"> + + for path, style in rx_pcpath.findall(ttext): + find_tunnel_file(dwgfile, path.decode()) + + # should also scan and look for survex blocks that might have been included, and image scans + # which would populate dwgfile.survexfile + + dwgfile.save() + + +def load_drawings_files(): + '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize + ''' + all_xml = [] + drawdatadir = settings.DRAWINGS_DATA + DrawingFile.objects.all().delete() + DataIssue.objects.filter(parser='Drawings').delete() + DataIssue.objects.filter(parser='Therion').delete() + DataIssue.objects.filter(parser='Tunnel').delete() + + drawingsdirs = [ "" ] + while drawingsdirs: + drawdir = drawingsdirs.pop() + for f in os.listdir(os.path.join(drawdatadir, drawdir)): + if f[0] == "." or f[-1] == "~": + continue + lf = os.path.join(drawdir, f) + ff = os.path.join(drawdatadir, lf) + if os.path.isdir(ff): + drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! + elif f[-4:] == ".xml": + # Always creates new + dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) + dwgfile.save() + all_xml.append(('xml',dwgfile)) + elif f[-3:] == ".th": + # Always creates new + dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) + dwgfile.save() + all_xml.append(('th',dwgfile)) + elif f[-4:] == ".th2": + # Always creates new + dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) + dwgfile.save() + all_xml.append(('th2',dwgfile)) + + print(f' - {len(all_xml)} Drawings files found') + + for d in all_xml: + if d[0] == 'xml': + setdwgfileinfo(d[1]) + # important to import .th2 files before .th so that we can assign them when found in .th files + if d[0] == 'th2': + settherionfileinfo(d) + if d[0] == 'th': + settherionfileinfo(d) + + # for drawfile in DrawingFile.objects.all(): + # SetTunnelfileInfo(drawfile) |