From 17b2b7b89c2ce34471251a64a1f7c9e416a1c869 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Wed, 7 Dec 2022 18:22:09 +0000 Subject: fix comments and 2019 LB parsing --- parsers/drawings.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'parsers/drawings.py') diff --git a/parsers/drawings.py b/parsers/drawings.py index 132b663..a688771 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -20,14 +20,23 @@ for tunnel and therion files todo='''- Rename functions more consistently between tunnel and therion variants -- Recode rx_valid_ext to use profile suffix() function +- Recode to use pathlib instead of whacky resetting of loop variable inside loop +to scan sub-folders. + +- Recode rx_valid_ext to use pathlib suffix() function + +- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity ''' rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$') def find_dwg_file(dwgfile, path): '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file - which we have already seen when we imported all the files we could find in the surveyscans direstories + which we have already seen when we imported all the files we could find in the surveyscans direstories. + + The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ? + + What is all this really for ?! Is this data used anywhere ?? ''' wallet, scansfile = None, None mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path) @@ -58,15 +67,15 @@ def find_dwg_file(dwgfile, path): if scansfile: dwgfile.scans.add(scansfile) - elif path and not rx_valid_ext.search(path): + elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type name = os.path.split(path)[1] - rdwgfilel = DrawingFile.objects.filter(dwgname=name) + rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen if len(rdwgfilel): if len(rdwgfilel) > 1: plist =[] for df in rdwgfilel: - plist.append(df.dwgname) - message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}" + plist.append(df.dwgpath) + message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem? print(message) DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}') rdwgfile = rdwgfilel[0] @@ -224,6 +233,7 @@ def setdrwfileinfo(dwgfile): def load_drawings_files(): '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize + This is brain-damaged very early code. Should be replaced with proper use of pathlib. Why do we have all this detection of file types/! Why not use get_mime_types ? What is it all for ?? @@ -251,7 +261,7 @@ def load_drawings_files(): lf = os.path.join(drawdir, f) ff = os.path.join(drawdatadir, lf) if os.path.isdir(ff): - drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! + drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions. elif Path(f).suffix.lower() == ".txt": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) -- cgit v1.2.3