diff options
-rw-r--r-- | core/views/scans.py | 4 | ||||
-rw-r--r-- | parsers/scans.py | 86 |
2 files changed, 20 insertions, 70 deletions
diff --git a/core/views/scans.py b/core/views/scans.py index 50e7209..5269d0b 100644 --- a/core/views/scans.py +++ b/core/views/scans.py @@ -64,7 +64,7 @@ def caveifywallet(w): '''Gets the cave from the list of survex files, only selects one of them though. Only used for display. ''' - print(f' - Caveify {w=}') + #print(f' - Caveify {w=}') blocknames = [] blocks = SurvexBlock.objects.filter(scanswallet = w) for b in blocks: @@ -208,7 +208,7 @@ def cavewallets(request, caveid): wallets.add(z) else: wurl = f"/scanupload/{z.walletname.replace('#',':')}" - print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names') + print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names and aliases)') message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names" DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl) diff --git a/parsers/scans.py b/parsers/scans.py index 4a8b68d..9cbeeab 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -26,46 +26,6 @@ git = settings.GIT # to do: Actually read all the JSON files and set the survex file field appropriately! -# def GetListDir(sdir): - # '''handles url or file, so we can refer to a set of scans (not drawings) on another server - # returns a list of f (file), ff (file full path), is_dir (bool) - - # REPLACE all use of this with Path.rglob() ! - # ''' - # res = [ ] - # if type(sdir) is str and sdir[:7] == "http://": - # # s = urllib.request.urlopen(sdir) - # message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" - # print(message) - # DataIssue.objects.create(parser='Drawings', message=message) - # sdir[:7] = "" - - # for f in os.listdir(sdir): - # if f[0] != ".": - # ff = os.path.join(sdir, f) - # res.append((f, ff, os.path.isdir(ff))) - # return res - - -# def LoadListScansFile(wallet): - # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions - # gld = [ ] - # # flatten out any directories in these wallet folders - should not be any - # for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath): - # if fisdiryf: - # gld.extend(GetListDir(ffyf)) - # else: - # gld.append((fyf, ffyf, fisdiryf)) - - # c=0 - # for (fyf, ffyf, fisdiryf) in gld: - # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf): - # singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet) - # singlescan.save() - # c+=1 - # if c>=10: - # print(".", end='') - # c = 0 def load_all_scans(): '''This iterates through the scans directories (either here or on the remote server) @@ -93,7 +53,7 @@ def load_all_scans(): # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ - print(' - ', end='') + print(' ', end='') scans_path = Path(settings.SCANS_ROOT) seen = [] c=0 @@ -110,8 +70,8 @@ def load_all_scans(): c+=1 if c % 15 == 0 : print(".", end='') - if c % 500 == 0 : - print("\n -", end='') + if c % 750 == 0 : + print("\n ", end='') if p.parent.parent.parent.parent == scans_path: # print(f"too deep {p}", end='\n') @@ -141,34 +101,18 @@ def load_all_scans(): if tag not in seen: print(f" {tag.name} ", end='') + if len(str(tag.name)) > 17: + print('\n ', end='') seen.append(tag) print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets') - - # if False: - # n=0 - # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT): - # if not fisdir: - # continue - - # # do the year folders - # # if re.match(r"\d\d\d\d$", topfolder): - # print(f"{topfolder}", end=' ') - # for walletname, fpath, fisdir in GetListDir(fpath): - # if fisdir: - # wallet = Wallet(fpath=fpath, walletname=walletname) - # # this is where we should record the year explicitly - # # line 347 of view/uploads.py and needs refactoring for loading contentsjson - # wallet.save() - # LoadListScansFile(wallet) - # # else: - # # # but We *should* load all the scans, even for nonstandard names. - # # print(f'\n - IGNORE {topfolder} - {fpath}') - # print("", flush=True) # but we also need to check if JSON exists, even if there are no uploaded scan files. # Here we know there is a rigid folder structure, so no need to look for sub folders + print(f"\n - Checking for wallets where only JSON exists, but there are no actual uploaded scan files:") + print(' ', end='') + wjson = 0 contents_path = Path(settings.DRAWINGS_DATA, "walletjson") for yeardir in contents_path.iterdir(): if yeardir.is_dir(): @@ -177,9 +121,15 @@ def load_all_scans(): walletname = walletpath.name if walletname not in wallets: - print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ') + wjson += 1 + if wjson % 10 == 0 : + print("\n ", end='') + + print(f"{walletname} ", end='') wallet, created = Wallet.objects.update_or_create(walletname=walletname) - # should now also load the json and use it ! check &ref is correct or missing too - if created: - print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ') + wallets[walletname] = wallet + # could now also load the json and use it. check &ref is correct or missing too.. + if not created: + print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?") wallet.save() + print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets') |