diff options
Diffstat (limited to 'parsers/scans.py')
-rw-r--r-- | parsers/scans.py | 182 |
1 files changed, 100 insertions, 82 deletions
diff --git a/parsers/scans.py b/parsers/scans.py index 3922b6b..4a8b68d 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -14,7 +14,7 @@ from pathlib import Path import settings from troggle.core.models.survex import SingleScan, Wallet, DrawingFile from troggle.core.models.troggle import DataIssue -from troggle.core.utils import save_carefully, GetListDir +from troggle.core.utils import save_carefully from troggle.core.views.scans import datewallet '''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. @@ -26,66 +26,63 @@ git = settings.GIT # to do: Actually read all the JSON files and set the survex file field appropriately! - -def CheckEmptyDate(wallet): - '''If date is not set, get it from a linked survex file. - Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying. - ''' - earliest = datetime.datetime.now().date() - - # This is not working, can't see why. An scans parser now taking a very long time.. - #datewallet(wallet, earliest) - return +# def GetListDir(sdir): + # '''handles url or file, so we can refer to a set of scans (not drawings) on another server + # returns a list of f (file), ff (file full path), is_dir (bool) -def CheckEmptyPeople(wallet): - '''If people list is empty, copy them from the survex files: all of them - - To be a Troggle model change; a many:many relationship between wallets and people, - as well as being a list in the JSON file (which is the permanent repository). We want the many:many - relationship so that we can filter wallets based on a person. - - For the moment, we will just get a list.. - ''' - return + # REPLACE all use of this with Path.rglob() ! + # ''' + # res = [ ] + # if type(sdir) is str and sdir[:7] == "http://": + # # s = urllib.request.urlopen(sdir) + # message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" + # print(message) + # DataIssue.objects.create(parser='Drawings', message=message) + # sdir[:7] = "" -def LoadListScansFile(wallet): - gld = [ ] - # flatten out any directories in these wallet folders - should not be any - for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): - if fisdiryf: - gld.extend(GetListDir(ffyf)) - else: - gld.append((fyf, ffyf, fisdiryf)) + # for f in os.listdir(sdir): + # if f[0] != ".": + # ff = os.path.join(sdir, f) + # res.append((f, ff, os.path.isdir(ff))) + # return res + + +# def LoadListScansFile(wallet): + # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions + # gld = [ ] + # # flatten out any directories in these wallet folders - should not be any + # for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): + # if fisdiryf: + # gld.extend(GetListDir(ffyf)) + # else: + # gld.append((fyf, ffyf, fisdiryf)) - c=0 - for (fyf, ffyf, fisdiryf) in gld: - if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf): - singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) - singlescan.save() - c+=1 - if c>=10: - print(".", end='') - c = 0 + # c=0 + # for (fyf, ffyf, fisdiryf) in gld: + # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf): + # singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) + # singlescan.save() + # c+=1 + # if c>=10: + # print(".", end='') + # c = 0 def load_all_scans(): '''This iterates through the scans directories (either here or on the remote server) and builds up the models we can access later. + It does NOT read or validate anything in the JSON data attached to each wallet. Those checks are done at runtime, when a wallet is accessed, not at import time. - Replace GetListDir with a more modern Path.iter idiom - path = Path("scans") - for p in path.rglob("*"): - print(p.name) - ''' print(' - Loading Survey Scans') SingleScan.objects.all().delete() Wallet.objects.all().delete() - print(' - deleting all Wallet and SingleScan objects') + print(' - deleting all Wallet and SingleScan objects') DataIssue.objects.filter(parser='scans').delete() + # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", ".ods",".csv",".xcf",".xml"] @@ -95,10 +92,12 @@ def load_all_scans(): # Not all folders with files in them are wallets. # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ - # but not if they are /2010/1010#33/therion or /1998/ - print(' - ', end=' ') + # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ + print(' - ', end='') scans_path = Path(settings.SCANS_ROOT) seen = [] + c=0 + wallets = {} for p in scans_path.rglob('*'): if p.is_file(): if p.suffix.lower() not in valids and p.name.lower() not in validnames: @@ -107,6 +106,13 @@ def load_all_scans(): elif p.parent == scans_path: # skip files directly in /surveyscans/ pass else: + + c+=1 + if c % 15 == 0 : + print(".", end='') + if c % 500 == 0 : + print("\n -", end='') + if p.parent.parent.parent.parent == scans_path: # print(f"too deep {p}", end='\n') fpath = p.parent.parent @@ -114,54 +120,66 @@ def load_all_scans(): else: fpath = p.parent walletname = p.parent.name - - # UNFINISHED + + if walletname in wallets: + wallet = wallets[walletname] + else: + print("", flush=True, end='') + wallet = Wallet(fpath=fpath, walletname=walletname) + wallet.save() + wallets[walletname] = wallet + + singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet) + singlescan.save() + + + # only printing progress: tag = p.parent if len(walletname)>4: if walletname[4] == "#": tag = p.parent.parent if tag not in seen: - print(f"{tag.name}", end=' ') + print(f" {tag.name} ", end='') seen.append(tag) - #wallet = Wallet(fpath=fpath, walletname=walletname) + - - print('\n UNFINISHED \n\n--- ') - for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): - if not fisdir: - continue + print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets') + + # if False: + # n=0 + # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): + # if not fisdir: + # continue - # do the year folders - if re.match(r"\d\d\d\d$", topfolder): - print(f"{topfolder}", end=' ') - for walletname, fpath, fisdir in GetListDir(fpath): - if fisdir: - wallet = Wallet(fpath=fpath, walletname=walletname) - # this is where we should record the year explicitly - # line 347 of view/uploads.py and needs refactoring for loading contentsjson - CheckEmptyDate(wallet) - CheckEmptyPeople(wallet) - wallet.save() - LoadListScansFile(wallet) - else: - # but We *should* load all the scans, even for nonstandard names. - print(f'\n - IGNORE {walletname} - {fpath}') - - # but we also need to check if JSON exists, even if there are no uploaded scan files + # # do the year folders + # # if re.match(r"\d\d\d\d$", topfolder): + # print(f"{topfolder}", end=' ') + # for walletname, fpath, fisdir in GetListDir(fpath): + # if fisdir: + # wallet = Wallet(fpath=fpath, walletname=walletname) + # # this is where we should record the year explicitly + # # line 347 of view/uploads.py and needs refactoring for loading contentsjson + # wallet.save() + # LoadListScansFile(wallet) + # # else: + # # # but We *should* load all the scans, even for nonstandard names. + # # print(f'\n - IGNORE {topfolder} - {fpath}') + # print("", flush=True) + + # but we also need to check if JSON exists, even if there are no uploaded scan files. + # Here we know there is a rigid folder structure, so no need to look for sub folders contents_path = Path(settings.DRAWINGS_DATA, "walletjson") for yeardir in contents_path.iterdir(): if yeardir.is_dir(): for walletpath in yeardir.iterdir(): if Path(walletpath, contentsjson).is_file(): walletname = walletpath.name - wallet, created = Wallet.objects.update_or_create(walletname=walletname) - # should now also load the json and use it ! check &ref is correct or missing too - if created: - print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ') - CheckEmptyDate(wallet) - CheckEmptyPeople(wallet) - wallet.save() - - - print("", flush=True) + + if walletname not in wallets: + print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ') + wallet, created = Wallet.objects.update_or_create(walletname=walletname) + # should now also load the json and use it ! check &ref is correct or missing too + if created: + print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ') + wallet.save() |