diff options
Diffstat (limited to 'parsers/surveys.py')
-rw-r--r-- | parsers/surveys.py | 310 |
1 files changed, 0 insertions, 310 deletions
diff --git a/parsers/surveys.py b/parsers/surveys.py deleted file mode 100644 index 5b7bc1e..0000000 --- a/parsers/surveys.py +++ /dev/null @@ -1,310 +0,0 @@ -import sys -import os -import types -import stat -import csv -import re -import datetime - -from PIL import Image -from functools import reduce - -import settings -from troggle.core.models.survex import SingleScan, Wallet, DrawingFile -from troggle.core.models.troggle import DataIssue -from troggle.core.utils import save_carefully - -'''Searches through all the :drawings: repository looking -for tunnel and therion files - -Searches through all the survey scans directories in expofiles, looking for images to be referenced. -''' - - -def get_or_create_placeholder(year): - """ All surveys must be related to a logbookentry. We don't have a way to - automatically figure out which survey went with which logbookentry, - so we create a survey placeholder logbook entry for each year. This - function always returns such a placeholder, and creates it if it doesn't - exist yet. - """ - lookupAttribs={'date__year':int(year), 'title':"placeholder for surveys",} - nonLookupAttribs={'text':"surveys temporarily attached to this should be re-attached to their actual trips", 'date':datetime.date(int(year),1,1)} - placeholder_logbook_entry, newly_created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) - return placeholder_logbook_entry - -def listdir(*directories): - try: - return os.listdir(os.path.join(settings.SURVEYS, *directories)) - except: - import urllib.request, urllib.parse, urllib.error - url = settings.SURVEYS + reduce(lambda x, y: x + "/" + y, ["listdir"] + list(directories)) - folders = urllib.request.urlopen(url.replace("#", "%23")).readlines() - return [folder.rstrip(r"/") for folder in folders] - - -# handles url or file, so we can refer to a set of scans (not drawings) on another server -def GetListDir(sdir): - res = [ ] - if sdir[:7] == "http://": - # s = urllib.request.urlopen(sdir) - message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" - print(message) - DataIssue.objects.create(parser='Drawings', message=message) - sdir[:7] = "" - - for f in os.listdir(sdir): - if f[0] != ".": - ff = os.path.join(sdir, f) - res.append((f, ff, os.path.isdir(ff))) - return res - - -def LoadListScansFile(wallet): - gld = [ ] - # flatten out any directories in these wallet folders - should not be any - for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): - if fisdiryf: - gld.extend(GetListDir(ffyf)) - else: - gld.append((fyf, ffyf, fisdiryf)) - - c=0 - for (fyf, ffyf, fisdiryf) in gld: - if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf): - singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) - singlescan.save() - c+=1 - if c>=10: - print(".", end='') - c = 0 - - -# this iterates through the scans directories (either here or on the remote server) -# and builds up the models we can access later -def load_all_scans(): - - print(' - Loading Survey Scans') - - SingleScan.objects.all().delete() - Wallet.objects.all().delete() - print(' - deleting all scansFolder and scansSingle objects') - - # first do the smkhs (large kh survey scans) directory - manywallets_smkhs = Wallet(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs") - print("smkhs", end=' ') - if os.path.isdir(manywallets_smkhs.fpath): - manywallets_smkhs.save() - LoadListScansFile(manywallets_smkhs) - - - # iterate into the surveyscans directory - print(' - ', end=' ') - for f, ff, fisdir in GetListDir(settings.SURVEY_SCANS): - if not fisdir: - continue - - # do the year folders - if re.match(r"\d\d\d\d$", f): - print("%s" % f, end=' ') - for fy, ffy, fisdiry in GetListDir(ff): - if fisdiry: - wallet = Wallet(fpath=ffy, walletname=fy) - wallet.save() - LoadListScansFile(wallet) - - # do the - elif f != "thumbs": - wallet = Wallet(fpath=ff, walletname=f) - wallet.save() - LoadListScansFile(wallet) - - print("", flush=True) - -def find_tunnel_file(dwgfile, path): - '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file - which we have already seen when we imported all the files we could find in the surveyscans direstories - ''' - wallet, scansfile = None, None - mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path) - if mscansdir: - scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) - # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first. - if len(scanswalletl): - wallet = scanswalletl[0] - if len(scanswalletl) > 1: - message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) - print(message) - DataIssue.objects.create(parser='Tunnel', message=message) - - if wallet: - scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) - if len(scansfilel): - if len(scansfilel) > 1: - message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), dwgfile.dwgpath, path) - print(message) - DataIssue.objects.create(parser='Tunnel', message=message) - scansfile = scansfilel[0] - - if wallet: - dwgfile.manywallets.add(wallet) - if scansfile: - dwgfile.scans.add(scansfile) - - elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path): - name = os.path.split(path)[1] - rdwgfilel = DrawingFile.objects.filter(dwgname=name) - if len(rdwgfilel): - message = "! Two paths with same name [{}]: {}".format(path, name) - print(message) - DataIssue.objects.create(parser='Tunnel', message=message) - rdwgfile = rdwgfilel[0] - dwgfile.dwgcontains.add(rdwgfile) - - dwgfile.save() - -def findimageinsert(therionfile, xth_me): - '''Tries to link the drawing file (Therion format) to the referenced image (scan) file - ''' - pass - -def findimportinsert(therionfile, imp): - '''Tries to link the scrap (Therion format) to the referenced therion scrap - ''' - pass - -rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE) -rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) -rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) - -def settherionfileinfo(filetuple): - '''Read in the drawing file contents and sets values on the dwgfile object - ''' - thtype, therionfile = filetuple - - ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) - therionfile.filesize = os.stat(ff)[stat.ST_SIZE] - if therionfile.filesize <= 0: - message = "! Zero length therion file {}".format(ff) - print(message) - DataIssue.objects.create(parser='Therion', message=message) - return - fin = open(ff,'r') - ttext = fin.read() - fin.close() - - # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' - # print(len(re.findall(r"line", ttext))) - if thtype=='th': - therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) - elif thtype=='th2': - therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) - therionfile.save() - - # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) - # which would populate dwgfile.survexfile - - # in .th2 files: - # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {} - # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m] - - for xth_me in rx_xth_me.findall(ttext): - message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' - #print(message) - DataIssue.objects.create(parser='Therion', message=message) - findimageinsert(therionfile, xth_me) - - for inp in rx_input.findall(ttext): - # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file - # but we would need to disentangle to get the current path properly - message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' - #print(message) - DataIssue.objects.create(parser='Therion', message=message) - findimportinsert(therionfile, inp) - - therionfile.save() - -rx_skpath = re.compile(rb'<skpath') -rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"') - -def setdwgfileinfo(dwgfile): - '''Read in the drawing file contents and sets values on the dwgfile object - Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57 - then we could display on the master calendar per expo. - ''' - ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) - dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] - if dwgfile.filesize <= 0: - message = "! Zero length xml file {}".format(ff) - print(message) - DataIssue.objects.create(parser='Drawings', message=message) - return - fin = open(ff,'rb') - ttext = fin.read() - fin.close() - - dwgfile.npaths = len(rx_skpath.findall(ttext)) - dwgfile.save() - - # example drawing file in Tunnel format. - # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17"> - # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0"> - - for path, style in rx_pcpath.findall(ttext): - find_tunnel_file(dwgfile, path.decode()) - - # should also scan and look for survex blocks that might have been included, and image scans - # which would populate dwgfile.survexfile - - dwgfile.save() - - -def load_drawings_files(): - '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize - ''' - all_xml = [] - drawdatadir = settings.DRAWINGS_DATA - DrawingFile.objects.all().delete() - DataIssue.objects.filter(parser='Drawings').delete() - DataIssue.objects.filter(parser='Therion').delete() - DataIssue.objects.filter(parser='Tunnel').delete() - - drawingsdirs = [ "" ] - while drawingsdirs: - drawdir = drawingsdirs.pop() - for f in os.listdir(os.path.join(drawdatadir, drawdir)): - if f[0] == "." or f[-1] == "~": - continue - lf = os.path.join(drawdir, f) - ff = os.path.join(drawdatadir, lf) - if os.path.isdir(ff): - drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! - elif f[-4:] == ".xml": - # Always creates new - dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) - dwgfile.save() - all_xml.append(('xml',dwgfile)) - elif f[-3:] == ".th": - # Always creates new - dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) - dwgfile.save() - all_xml.append(('th',dwgfile)) - elif f[-4:] == ".th2": - # Always creates new - dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) - dwgfile.save() - all_xml.append(('th2',dwgfile)) - - print(f' - {len(all_xml)} Drawings files found') - - for d in all_xml: - if d[0] == 'xml': - setdwgfileinfo(d[1]) - # important to import .th2 files before .th so that we can assign them when found in .th files - if d[0] == 'th2': - settherionfileinfo(d) - if d[0] == 'th': - settherionfileinfo(d) - - # for drawfile in DrawingFile.objects.all(): - # SetTunnelfileInfo(drawfile) |