summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/views/scans.py4
-rw-r--r--parsers/scans.py86
2 files changed, 20 insertions, 70 deletions
diff --git a/core/views/scans.py b/core/views/scans.py
index 50e7209..5269d0b 100644
--- a/core/views/scans.py
+++ b/core/views/scans.py
@@ -64,7 +64,7 @@ def caveifywallet(w):
'''Gets the cave from the list of survex files,
only selects one of them though. Only used for display.
'''
- print(f' - Caveify {w=}')
+ #print(f' - Caveify {w=}')
blocknames = []
blocks = SurvexBlock.objects.filter(scanswallet = w)
for b in blocks:
@@ -208,7 +208,7 @@ def cavewallets(request, caveid):
wallets.add(z)
else:
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
- print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
+ print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names and aliases)')
message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
diff --git a/parsers/scans.py b/parsers/scans.py
index 4a8b68d..9cbeeab 100644
--- a/parsers/scans.py
+++ b/parsers/scans.py
@@ -26,46 +26,6 @@ git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately!
-# def GetListDir(sdir):
- # '''handles url or file, so we can refer to a set of scans (not drawings) on another server
- # returns a list of f (file), ff (file full path), is_dir (bool)
-
- # REPLACE all use of this with Path.rglob() !
- # '''
- # res = [ ]
- # if type(sdir) is str and sdir[:7] == "http://":
- # # s = urllib.request.urlopen(sdir)
- # message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
- # print(message)
- # DataIssue.objects.create(parser='Drawings', message=message)
- # sdir[:7] = ""
-
- # for f in os.listdir(sdir):
- # if f[0] != ".":
- # ff = os.path.join(sdir, f)
- # res.append((f, ff, os.path.isdir(ff)))
- # return res
-
-
-# def LoadListScansFile(wallet):
- # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
- # gld = [ ]
- # # flatten out any directories in these wallet folders - should not be any
- # for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
- # if fisdiryf:
- # gld.extend(GetListDir(ffyf))
- # else:
- # gld.append((fyf, ffyf, fisdiryf))
-
- # c=0
- # for (fyf, ffyf, fisdiryf) in gld:
- # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
- # singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
- # singlescan.save()
- # c+=1
- # if c>=10:
- # print(".", end='')
- # c = 0
def load_all_scans():
'''This iterates through the scans directories (either here or on the remote server)
@@ -93,7 +53,7 @@ def load_all_scans():
# they are if they are /2010/2010#33
# or /1996-1999NotKHbook/
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
- print(' - ', end='')
+ print(' ', end='')
scans_path = Path(settings.SCANS_ROOT)
seen = []
c=0
@@ -110,8 +70,8 @@ def load_all_scans():
c+=1
if c % 15 == 0 :
print(".", end='')
- if c % 500 == 0 :
- print("\n -", end='')
+ if c % 750 == 0 :
+ print("\n ", end='')
if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n')
@@ -141,34 +101,18 @@ def load_all_scans():
if tag not in seen:
print(f" {tag.name} ", end='')
+ if len(str(tag.name)) > 17:
+ print('\n ', end='')
seen.append(tag)
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
-
- # if False:
- # n=0
- # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
- # if not fisdir:
- # continue
-
- # # do the year folders
- # # if re.match(r"\d\d\d\d$", topfolder):
- # print(f"{topfolder}", end=' ')
- # for walletname, fpath, fisdir in GetListDir(fpath):
- # if fisdir:
- # wallet = Wallet(fpath=fpath, walletname=walletname)
- # # this is where we should record the year explicitly
- # # line 347 of view/uploads.py and needs refactoring for loading contentsjson
- # wallet.save()
- # LoadListScansFile(wallet)
- # # else:
- # # # but We *should* load all the scans, even for nonstandard names.
- # # print(f'\n - IGNORE {topfolder} - {fpath}')
- # print("", flush=True)
# but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders
+ print(f"\n - Checking for wallets where only JSON exists, but there are no actual uploaded scan files:")
+ print(' ', end='')
+ wjson = 0
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir():
if yeardir.is_dir():
@@ -177,9 +121,15 @@ def load_all_scans():
walletname = walletpath.name
if walletname not in wallets:
- print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
+ wjson += 1
+ if wjson % 10 == 0 :
+ print("\n ", end='')
+
+ print(f"{walletname} ", end='')
wallet, created = Wallet.objects.update_or_create(walletname=walletname)
- # should now also load the json and use it ! check &ref is correct or missing too
- if created:
- print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
+ wallets[walletname] = wallet
+ # could now also load the json and use it. check &ref is correct or missing too..
+ if not created:
+ print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?")
wallet.save()
+ print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets')