diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2022-03-05 20:29:01 +0000 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2022-03-05 20:29:01 +0000 |
commit | a3a65524b86abde3bc924d611620f71f87b7bf36 (patch) | |
tree | 6b9edfa909a242792417059b3e50af9df583711f /parsers/drawings.py | |
parent | 88f5df0f19a1b9d16e0b92dcd910f6bd7db9e2b9 (diff) | |
download | troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.gz troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.bz2 troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.zip |
better errors for drawings parsing & upload
Diffstat (limited to 'parsers/drawings.py')
-rw-r--r-- | parsers/drawings.py | 58 |
1 files changed, 40 insertions, 18 deletions
diff --git a/parsers/drawings.py b/parsers/drawings.py index a8306b1..024ce43 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -38,7 +38,7 @@ def find_dwg_file(dwgfile, path): which we have already seen when we imported all the files we could find in the surveyscans direstories ''' wallet, scansfile = None, None - mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path) + mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path) if mscansdir: scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first. @@ -66,7 +66,7 @@ def find_dwg_file(dwgfile, path): if scansfile: dwgfile.scans.add(scansfile) - elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path): + elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|txt)$(?i)", path): name = os.path.split(path)[1] rdwgfilel = DrawingFile.objects.filter(dwgname=name) if len(rdwgfilel): @@ -76,7 +76,7 @@ def find_dwg_file(dwgfile, path): plist.append(df.dwgname) message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}" print(message) - DataIssue.objects.create(parser='Tunnel', message=message) + DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}') rdwgfile = rdwgfilel[0] dwgfile.dwgcontains.add(rdwgfile) @@ -106,7 +106,7 @@ def settherionfileinfo(filetuple): if therionfile.filesize <= 0: message = "! Zero length therion file {}".format(ff) print(message) - DataIssue.objects.create(parser='Therion', message=message) + DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') return fin = open(ff,'r') ttext = fin.read() @@ -130,7 +130,7 @@ def settherionfileinfo(filetuple): for xth_me in rx_xth_me.findall(ttext): message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' #print(message) - DataIssue.objects.create(parser='Therion', message=message) + DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') findimageinsert(therionfile, xth_me) for inp in rx_input.findall(ttext): @@ -138,7 +138,7 @@ def settherionfileinfo(filetuple): # but we would need to disentangle to get the current path properly message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' #print(message) - DataIssue.objects.create(parser='Therion', message=message) + DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') findimportinsert(therionfile, inp) therionfile.save() @@ -146,7 +146,7 @@ def settherionfileinfo(filetuple): rx_skpath = re.compile(rb'<skpath') rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"') -def setdwgfileinfo(dwgfile): +def settnlfileinfo(dwgfile): '''Read in the drawing file contents and sets values on the dwgfile object Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57 then we could display on the master calendar per expo. @@ -154,9 +154,9 @@ def setdwgfileinfo(dwgfile): ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] if dwgfile.filesize <= 0: - message = "! Zero length xml file {}".format(ff) + message = "! Zero length tunnel file {}".format(ff) print(message) - DataIssue.objects.create(parser='Drawings', message=message) + DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}') return fin = open(ff,'rb') ttext = fin.read() @@ -177,6 +177,20 @@ def setdwgfileinfo(dwgfile): dwgfile.save() +def setdrwfileinfo(dwgfile): + '''Read in the drawing file contents and sets values on the dwgfile object, + but these are PDFs or .txt files, so there is no useful format to search for + This function is a placeholder in case we thnk of a way to do something + to recognise generic survex filenames. + ''' + ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath + dwgfile.filesize = ff.stat().st_size + if dwgfile.filesize <= 0: + message = "! Zero length drawing file {}".format(ff) + print(message) + DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}') + return + def load_drawings_files(): '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize @@ -184,13 +198,14 @@ def load_drawings_files(): Why do we have all this detection of file types/! Why not use get_mime_types ? What is it all for ?? - ALL THIS NEEDS TO DETCT UPPER CASE suffices + ALL THIS NEEDS TO DETECT UPPER CASE suffices ''' all_xml = [] drawdatadir = settings.DRAWINGS_DATA DrawingFile.objects.all().delete() - DataIssue.objects.filter(parser='Drawings').delete() + DataIssue.objects.filter(parser='drawings').delete() DataIssue.objects.filter(parser='Therion').delete() + DataIssue.objects.filter(parser='xTherion').delete() DataIssue.objects.filter(parser='Tunnel').delete() drawingsdirs = [ "" ] @@ -203,32 +218,37 @@ def load_drawings_files(): ff = os.path.join(drawdatadir, lf) if os.path.isdir(ff): drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! - elif f[-4:] == ".xml": + elif Path(f).suffix.lower() == ".txt": + # Always creates new + dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) + dwgfile.save() + all_xml.append(('txt',dwgfile)) + elif Path(f).suffix.lower() == ".xml": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('xml',dwgfile)) - elif f[-3:] == ".th": + elif Path(f).suffix.lower() == ".th": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('th',dwgfile)) - elif f[-4:] == ".th2": + elif Path(f).suffix.lower() == ".th2": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('th2',dwgfile)) - elif f[-4:] == ".pdf": + elif Path(f).suffix.lower() == ".pdf": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('pdf',dwgfile)) - elif f[-4:] == ".svg": + elif Path(f).suffix.lower() == ".svg": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() all_xml.append(('svg',dwgfile)) - elif f[-4:] == ".jpg": + elif Path(f).suffix.lower() == ".jpg": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() @@ -242,8 +262,10 @@ def load_drawings_files(): print(f' - {len(all_xml)} Drawings files found') for d in all_xml: + if d[0] in ['pdf', 'txt', '']: + setdrwfileinfo(d[1]) if d[0] == 'xml': - setdwgfileinfo(d[1]) + settnlfileinfo(d[1]) # important to import .th2 files before .th so that we can assign them when found in .th files if d[0] == 'th2': settherionfileinfo(d) |