diff options
Diffstat (limited to 'parsers/scans.py')
-rw-r--r-- | parsers/scans.py | 132 |
1 files changed, 79 insertions, 53 deletions
diff --git a/parsers/scans.py b/parsers/scans.py index 1bebe18..cc54633 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -17,8 +17,8 @@ from troggle.core.models.troggle import DataIssue from troggle.core.utils import save_carefully from troggle.core.views.scans import datewallet -'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. -''' +"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. +""" contentsjson = "contents.json" @@ -26,111 +26,135 @@ git = settings.GIT # to do: Actually read all the JSON files and set the survex file field appropriately! + def setwalletyear(wallet): - _ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear + _ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear + def load_all_scans(): - '''This iterates through the scans directories (either here or on the remote server) + """This iterates through the scans directories (either here or on the remote server) and builds up the models we can access later. - + It does NOT read or validate anything in the JSON data attached to each wallet. Those checks are done at runtime, when a wallet is accessed, not at import time. - - ''' - print(' - Loading Survey Scans') + + """ + print(" - Loading Survey Scans") SingleScan.objects.all().delete() Wallet.objects.all().delete() - print(' - deleting all Wallet and SingleScan objects') - DataIssue.objects.filter(parser='scans').delete() - + print(" - deleting all Wallet and SingleScan objects") + DataIssue.objects.filter(parser="scans").delete() + # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. - valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", - ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", - ".ods",".csv",".xcf",".xml"] - validnames = ["thconfig","manifest"] + valids = [ + ".top", + ".txt", + ".tif", + ".png", + ".jpg", + ".jpeg", + ".pdf", + ".svg", + ".gif", + ".xvi", + ".json", + ".autosave", + ".sxd", + ".svx", + ".th", + ".th2", + ".tdr", + ".sql", + ".zip", + ".dxf", + ".3d", + ".ods", + ".csv", + ".xcf", + ".xml", + ] + validnames = ["thconfig", "manifest"] # iterate into the surveyscans directory # Not all folders with files in them are wallets. - # they are if they are /2010/2010#33 + # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ - print(' ', end='') - scans_path = Path(settings.SCANS_ROOT) + print(" ", end="") + scans_path = Path(settings.SCANS_ROOT) seen = [] - c=0 + c = 0 wallets = {} - for p in scans_path.rglob('*'): + for p in scans_path.rglob("*"): if p.is_file(): if p.suffix.lower() not in valids and p.name.lower() not in validnames: # print(f"'{p}'", end='\n') pass - elif p.parent == scans_path: # skip files directly in /surveyscans/ + elif p.parent == scans_path: # skip files directly in /surveyscans/ pass else: - - c+=1 - if c % 15 == 0 : - print(".", end='') - if c % 750 == 0 : - print("\n ", end='') + + c += 1 + if c % 15 == 0: + print(".", end="") + if c % 750 == 0: + print("\n ", end="") if p.parent.parent.parent.parent == scans_path: # print(f"too deep {p}", end='\n') fpath = p.parent.parent - walletname = p.parent.parent.name # wallet is one level higher - else: + walletname = p.parent.parent.name # wallet is one level higher + else: fpath = p.parent walletname = p.parent.name - + if walletname in wallets: wallet = wallets[walletname] else: - print("", flush=True, end='') + print("", flush=True, end="") # Create the wallet object. But we don't have a date for it yet. wallet = Wallet(fpath=fpath, walletname=walletname) setwalletyear(wallet) wallet.save() wallets[walletname] = wallet - + singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet) singlescan.save() - - + # only printing progress: tag = p.parent - if len(walletname)>4: + if len(walletname) > 4: if walletname[4] == "#": tag = p.parent.parent - + if tag not in seen: - print(f" {tag.name} ", end='') + print(f" {tag.name} ", end="") if len(str(tag.name)) > 17: - print('\n ', end='') + print("\n ", end="") seen.append(tag) - - - print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets') - + + print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets") + # but we also need to check if JSON exists, even if there are no uploaded scan files. # Here we know there is a rigid folder structure, so no need to look for sub folders print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:") - print(' ', end='') + print(" ", end="") wjson = 0 - contents_path = Path(settings.DRAWINGS_DATA, "walletjson") - for yeardir in contents_path.iterdir(): + contents_path = Path(settings.DRAWINGS_DATA, "walletjson") + for yeardir in contents_path.iterdir(): if yeardir.is_dir(): - for walletpath in yeardir.iterdir(): + for walletpath in yeardir.iterdir(): if Path(walletpath, contentsjson).is_file(): walletname = walletpath.name - + if walletname not in wallets: wjson += 1 - if wjson % 10 == 0 : - print("\n ", end='') + if wjson % 10 == 0: + print("\n ", end="") - print(f"{walletname} ", end='') - fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname) + print(f"{walletname} ", end="") + fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname) # The wallets found from JSON should all have dates already wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath) wallets[walletname] = wallet @@ -140,9 +164,11 @@ def load_all_scans(): # But we *do* set the walletyear: setwalletyear(wallet) if not created: - print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?") + print( + f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?" + ) wallet.save() - print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets') + print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets") wallets = Wallet.objects.filter(walletyear=None) for w in wallets: w.walletyear = datetime.date(1999, 1, 1) |