better errors for drawings parsing & upload

author: Philip Sargent <philip.sargent@klebos.com> 2022-03-05 20:29:01 +0000
committer: Philip Sargent <philip.sargent@klebos.com> 2022-03-05 20:29:01 +0000
commit: a3a65524b86abde3bc924d611620f71f87b7bf36 (patch)
tree: 6b9edfa909a242792417059b3e50af9df583711f /parsers/drawings.py
parent: 88f5df0f19a1b9d16e0b92dcd910f6bd7db9e2b9 (diff)
download: troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.gz
troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.bz2
troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.zip
1 files changed, 40 insertions, 18 deletions
diff --git a/parsers/drawings.py b/parsers/drawings.py
index a8306b1..024ce43 100644
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -38,7 +38,7 @@ def find_dwg_file(dwgfile, path):
     which we have already seen when we imported all the files we could find in the surveyscans direstories
     '''
     wallet, scansfile = None, None
-    mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path)
+    mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
     if mscansdir:
         scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
         # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first.
@@ -66,7 +66,7 @@ def find_dwg_file(dwgfile, path):
         if scansfile:
             dwgfile.scans.add(scansfile)
     
-    elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path):
+    elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|txt)$(?i)", path):
         name = os.path.split(path)[1]
         rdwgfilel = DrawingFile.objects.filter(dwgname=name)
         if len(rdwgfilel):
@@ -76,7 +76,7 @@ def find_dwg_file(dwgfile, path):
                     plist.append(df.dwgname)
                 message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}"
                 print(message)
-                DataIssue.objects.create(parser='Tunnel', message=message)
+                DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
                 rdwgfile = rdwgfilel[0]
                 dwgfile.dwgcontains.add(rdwgfile)
 
@@ -106,7 +106,7 @@ def settherionfileinfo(filetuple):
     if therionfile.filesize <= 0:
         message = "! Zero length therion file {}".format(ff)
         print(message)
-        DataIssue.objects.create(parser='Therion', message=message)
+        DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
         return
     fin = open(ff,'r')
     ttext = fin.read()
@@ -130,7 +130,7 @@ def settherionfileinfo(filetuple):
     for xth_me in rx_xth_me.findall(ttext):
         message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
         #print(message)
-        DataIssue.objects.create(parser='Therion', message=message)
+        DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
         findimageinsert(therionfile, xth_me)
         
     for inp in rx_input.findall(ttext):
@@ -138,7 +138,7 @@ def settherionfileinfo(filetuple):
         # but we would need to disentangle to get the current path properly
         message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
         #print(message)
-        DataIssue.objects.create(parser='Therion', message=message)
+        DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
         findimportinsert(therionfile, inp)
     
     therionfile.save()
@@ -146,7 +146,7 @@ def settherionfileinfo(filetuple):
 rx_skpath = re.compile(rb'<skpath')
 rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
 
-def setdwgfileinfo(dwgfile):
+def settnlfileinfo(dwgfile):
     '''Read in the drawing file contents and sets values on the dwgfile object
     Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
     then we could display on the master calendar per expo.
@@ -154,9 +154,9 @@ def setdwgfileinfo(dwgfile):
     ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
     dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
     if dwgfile.filesize <= 0:
-        message = "! Zero length xml file {}".format(ff)
+        message = "! Zero length tunnel file {}".format(ff)
         print(message)
-        DataIssue.objects.create(parser='Drawings', message=message)
+        DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
         return
     fin = open(ff,'rb')
     ttext = fin.read()
@@ -177,6 +177,20 @@ def setdwgfileinfo(dwgfile):
 
     dwgfile.save()
 
+def setdrwfileinfo(dwgfile):
+    '''Read in the drawing file contents and sets values on the dwgfile object,
+    but these are PDFs or .txt files, so there is no useful format to search for
+    This function is a placeholder in case we thnk of a way to do something
+    to recognise generic survex filenames.
+    '''
+    ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
+    dwgfile.filesize = ff.stat().st_size
+    if dwgfile.filesize <= 0:
+        message = "! Zero length drawing file {}".format(ff)
+        print(message)
+        DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
+        return
+ 
 
 def load_drawings_files():
     '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
@@ -184,13 +198,14 @@ def load_drawings_files():
     Why do we have all this detection of file types/! Why not use get_mime_types ? 
     What is it all for ??
     
-    ALL THIS NEEDS TO DETCT UPPER CASE suffices
+    ALL THIS NEEDS TO DETECT UPPER CASE suffices
     '''
     all_xml = []
     drawdatadir = settings.DRAWINGS_DATA
     DrawingFile.objects.all().delete()
-    DataIssue.objects.filter(parser='Drawings').delete()
+    DataIssue.objects.filter(parser='drawings').delete()
     DataIssue.objects.filter(parser='Therion').delete()
+    DataIssue.objects.filter(parser='xTherion').delete()
     DataIssue.objects.filter(parser='Tunnel').delete()
 
     drawingsdirs = [ "" ]
@@ -203,32 +218,37 @@ def load_drawings_files():
             ff = os.path.join(drawdatadir, lf)
             if os.path.isdir(ff):
                 drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
-            elif f[-4:] == ".xml":
+            elif Path(f).suffix.lower() == ".txt":
+                # Always creates new
+                dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
+                dwgfile.save()
+                all_xml.append(('txt',dwgfile))
+            elif Path(f).suffix.lower() == ".xml":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
                 all_xml.append(('xml',dwgfile))
-            elif f[-3:] == ".th":
+            elif Path(f).suffix.lower() == ".th":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
                 all_xml.append(('th',dwgfile))
-            elif f[-4:] == ".th2":
+            elif Path(f).suffix.lower() == ".th2":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
                 all_xml.append(('th2',dwgfile))
-            elif f[-4:] == ".pdf":
+            elif Path(f).suffix.lower() == ".pdf":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
                 all_xml.append(('pdf',dwgfile))
-            elif f[-4:] == ".svg":
+            elif Path(f).suffix.lower() == ".svg":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
                 all_xml.append(('svg',dwgfile))
-            elif f[-4:] == ".jpg":
+            elif Path(f).suffix.lower() == ".jpg":
                 # Always creates new
                 dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
                 dwgfile.save()
@@ -242,8 +262,10 @@ def load_drawings_files():
     print(f' - {len(all_xml)} Drawings files found')
 
     for d in all_xml:
+        if d[0] in ['pdf', 'txt', '']:
+            setdrwfileinfo(d[1])
         if d[0] == 'xml':
-            setdwgfileinfo(d[1])
+            settnlfileinfo(d[1])
         # important to import .th2 files before .th so that we can assign them when found in .th files
         if d[0] == 'th2':
             settherionfileinfo(d)
author	Philip Sargent <philip.sargent@klebos.com>	2022-03-05 20:29:01 +0000
committer	Philip Sargent <philip.sargent@klebos.com>	2022-03-05 20:29:01 +0000
commit	a3a65524b86abde3bc924d611620f71f87b7bf36 (patch)
tree	6b9edfa909a242792417059b3e50af9df583711f /parsers/drawings.py
parent	88f5df0f19a1b9d16e0b92dcd910f6bd7db9e2b9 (diff)
download	troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.gz troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.tar.bz2 troggle-a3a65524b86abde3bc924d611620f71f87b7bf36.zip