summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/utils.py21
-rw-r--r--core/views/scans.py4
-rw-r--r--core/views/uploads.py2
-rw-r--r--parsers/drawings.py2
-rw-r--r--parsers/scans.py182
-rw-r--r--parsers/survex.py5
6 files changed, 107 insertions, 109 deletions
diff --git a/core/utils.py b/core/utils.py
index 1081e17..c7f71fa 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -70,27 +70,6 @@ def chaosmonkey(n):
# print("CHAOS strikes !", file=sys.stderr)
return True
-#
-def GetListDir(sdir):
- '''handles url or file, so we can refer to a set of scans (not drawings) on another server
- returns a list of f (file), ff (file full path), is_dir (bool)
-
- REPLACE all use of this with Path.rglob() !
- '''
- res = [ ]
- if type(sdir) is str and sdir[:7] == "http://":
- # s = urllib.request.urlopen(sdir)
- message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
- print(message)
- DataIssue.objects.create(parser='Drawings', message=message)
- sdir[:7] = ""
-
- for f in os.listdir(sdir):
- if f[0] != ".":
- ff = os.path.join(sdir, f)
- res.append((f, ff, os.path.isdir(ff)))
- return res
-
def only_commit(fname, message):
'''Only used to commit a survex file edited and saved in view/survex.py
'''
diff --git a/core/views/scans.py b/core/views/scans.py
index 1b9ab95..50e7209 100644
--- a/core/views/scans.py
+++ b/core/views/scans.py
@@ -208,8 +208,8 @@ def cavewallets(request, caveid):
wallets.add(z)
else:
wurl = f"/scanupload/{z.walletname.replace('#',':')}"
- print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname}')
- message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}'"
+ print(f' - Unrecognised cave name \'{zcaveid}\' in {z.walletname} (out of {len(Gcavelookup):,} cave names')
+ message = f" ! In {z.walletname} there is an unrecognised cave name '{zcaveid}' (out of {len(Gcavelookup):,} cave names"
DataIssue.objects.update_or_create(parser='scans', message=message, url=wurl)
manywallets = list(set(wallets))
diff --git a/core/views/uploads.py b/core/views/uploads.py
index 676c554..7e25980 100644
--- a/core/views/uploads.py
+++ b/core/views/uploads.py
@@ -208,7 +208,7 @@ def get_complaints(complaints, waldata, svxfiles, files, wallet, wurl):
if not waldata["description written"]:
complaints.append("The guidebook description needs writing into the survex file. Tick the 'Cave description written' checkbox when this is done.")
# QMs
- if not waldata["qms written"] and int(w.year()) >= 2015:
+ if not waldata["qms written"] and w.year() and int(w.year()) >= 2015:
complaints.append("The QMs needs writing into the survex file. Tick the 'QMs written' checkbox when this is done.")
# Website
diff --git a/parsers/drawings.py b/parsers/drawings.py
index 4f52889..b3ce8c8 100644
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -12,7 +12,7 @@ from functools import reduce
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
-from troggle.core.utils import save_carefully, GetListDir
+from troggle.core.utils import save_carefully
'''Searches through all the :drawings: repository looking
for tunnel and therion files
diff --git a/parsers/scans.py b/parsers/scans.py
index 3922b6b..4a8b68d 100644
--- a/parsers/scans.py
+++ b/parsers/scans.py
@@ -14,7 +14,7 @@ from pathlib import Path
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
from troggle.core.models.troggle import DataIssue
-from troggle.core.utils import save_carefully, GetListDir
+from troggle.core.utils import save_carefully
from troggle.core.views.scans import datewallet
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
@@ -26,66 +26,63 @@ git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately!
-
-def CheckEmptyDate(wallet):
- '''If date is not set, get it from a linked survex file.
- Could also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
- '''
- earliest = datetime.datetime.now().date()
-
- # This is not working, can't see why. An scans parser now taking a very long time..
- #datewallet(wallet, earliest)
- return
+# def GetListDir(sdir):
+ # '''handles url or file, so we can refer to a set of scans (not drawings) on another server
+ # returns a list of f (file), ff (file full path), is_dir (bool)
-def CheckEmptyPeople(wallet):
- '''If people list is empty, copy them from the survex files: all of them
-
- To be a Troggle model change; a many:many relationship between wallets and people,
- as well as being a list in the JSON file (which is the permanent repository). We want the many:many
- relationship so that we can filter wallets based on a person.
-
- For the moment, we will just get a list..
- '''
- return
+ # REPLACE all use of this with Path.rglob() !
+ # '''
+ # res = [ ]
+ # if type(sdir) is str and sdir[:7] == "http://":
+ # # s = urllib.request.urlopen(sdir)
+ # message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"
+ # print(message)
+ # DataIssue.objects.create(parser='Drawings', message=message)
+ # sdir[:7] = ""
-def LoadListScansFile(wallet):
- gld = [ ]
- # flatten out any directories in these wallet folders - should not be any
- for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
- if fisdiryf:
- gld.extend(GetListDir(ffyf))
- else:
- gld.append((fyf, ffyf, fisdiryf))
+ # for f in os.listdir(sdir):
+ # if f[0] != ".":
+ # ff = os.path.join(sdir, f)
+ # res.append((f, ff, os.path.isdir(ff)))
+ # return res
+
+
+# def LoadListScansFile(wallet):
+ # # formerly a generic troggle utility, written by who ? Being gradually expunged and replaced by python standard library functions
+ # gld = [ ]
+ # # flatten out any directories in these wallet folders - should not be any
+ # for (fyf, ffyf, fisdiryf) in GetListDir(wallet.fpath):
+ # if fisdiryf:
+ # gld.extend(GetListDir(ffyf))
+ # else:
+ # gld.append((fyf, ffyf, fisdiryf))
- c=0
- for (fyf, ffyf, fisdiryf) in gld:
- if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
- singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
- singlescan.save()
- c+=1
- if c>=10:
- print(".", end='')
- c = 0
+ # c=0
+ # for (fyf, ffyf, fisdiryf) in gld:
+ # if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif|xvi)(?i)$", fyf):
+ # singlescan = SingleScan(ffile=ffyf, name=fyf, wallet=wallet)
+ # singlescan.save()
+ # c+=1
+ # if c>=10:
+ # print(".", end='')
+ # c = 0
def load_all_scans():
'''This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later.
+
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
are done at runtime, when a wallet is accessed, not at import time.
- Replace GetListDir with a more modern Path.iter idiom
- path = Path("scans")
- for p in path.rglob("*"):
- print(p.name)
-
'''
print(' - Loading Survey Scans')
SingleScan.objects.all().delete()
Wallet.objects.all().delete()
- print(' - deleting all Wallet and SingleScan objects')
+ print(' - deleting all Wallet and SingleScan objects')
DataIssue.objects.filter(parser='scans').delete()
+ # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
".ods",".csv",".xcf",".xml"]
@@ -95,10 +92,12 @@ def load_all_scans():
# Not all folders with files in them are wallets.
# they are if they are /2010/2010#33
# or /1996-1999NotKHbook/
- # but not if they are /2010/1010#33/therion or /1998/
- print(' - ', end=' ')
+ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
+ print(' - ', end='')
scans_path = Path(settings.SCANS_ROOT)
seen = []
+ c=0
+ wallets = {}
for p in scans_path.rglob('*'):
if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
@@ -107,6 +106,13 @@ def load_all_scans():
elif p.parent == scans_path: # skip files directly in /surveyscans/
pass
else:
+
+ c+=1
+ if c % 15 == 0 :
+ print(".", end='')
+ if c % 500 == 0 :
+ print("\n -", end='')
+
if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n')
fpath = p.parent.parent
@@ -114,54 +120,66 @@ def load_all_scans():
else:
fpath = p.parent
walletname = p.parent.name
-
- # UNFINISHED
+
+ if walletname in wallets:
+ wallet = wallets[walletname]
+ else:
+ print("", flush=True, end='')
+ wallet = Wallet(fpath=fpath, walletname=walletname)
+ wallet.save()
+ wallets[walletname] = wallet
+
+ singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
+ singlescan.save()
+
+
+ # only printing progress:
tag = p.parent
if len(walletname)>4:
if walletname[4] == "#":
tag = p.parent.parent
if tag not in seen:
- print(f"{tag.name}", end=' ')
+ print(f" {tag.name} ", end='')
seen.append(tag)
- #wallet = Wallet(fpath=fpath, walletname=walletname)
+
-
- print('\n UNFINISHED \n\n--- ')
- for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
- if not fisdir:
- continue
+ print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
+
+ # if False:
+ # n=0
+ # for topfolder, fpath, fisdir in GetListDir(settings.SCANS_ROOT):
+ # if not fisdir:
+ # continue
- # do the year folders
- if re.match(r"\d\d\d\d$", topfolder):
- print(f"{topfolder}", end=' ')
- for walletname, fpath, fisdir in GetListDir(fpath):
- if fisdir:
- wallet = Wallet(fpath=fpath, walletname=walletname)
- # this is where we should record the year explicitly
- # line 347 of view/uploads.py and needs refactoring for loading contentsjson
- CheckEmptyDate(wallet)
- CheckEmptyPeople(wallet)
- wallet.save()
- LoadListScansFile(wallet)
- else:
- # but We *should* load all the scans, even for nonstandard names.
- print(f'\n - IGNORE {walletname} - {fpath}')
-
- # but we also need to check if JSON exists, even if there are no uploaded scan files
+ # # do the year folders
+ # # if re.match(r"\d\d\d\d$", topfolder):
+ # print(f"{topfolder}", end=' ')
+ # for walletname, fpath, fisdir in GetListDir(fpath):
+ # if fisdir:
+ # wallet = Wallet(fpath=fpath, walletname=walletname)
+ # # this is where we should record the year explicitly
+ # # line 347 of view/uploads.py and needs refactoring for loading contentsjson
+ # wallet.save()
+ # LoadListScansFile(wallet)
+ # # else:
+ # # # but We *should* load all the scans, even for nonstandard names.
+ # # print(f'\n - IGNORE {topfolder} - {fpath}')
+ # print("", flush=True)
+
+ # but we also need to check if JSON exists, even if there are no uploaded scan files.
+ # Here we know there is a rigid folder structure, so no need to look for sub folders
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir():
if yeardir.is_dir():
for walletpath in yeardir.iterdir():
if Path(walletpath, contentsjson).is_file():
walletname = walletpath.name
- wallet, created = Wallet.objects.update_or_create(walletname=walletname)
- # should now also load the json and use it ! check &ref is correct or missing too
- if created:
- print(f"\n{walletname} created: only JSON, no actual uploaded scan files.", end=' ')
- CheckEmptyDate(wallet)
- CheckEmptyPeople(wallet)
- wallet.save()
-
-
- print("", flush=True)
+
+ if walletname not in wallets:
+ print(f" - {walletname} creation attempting: only JSON, no actual uploaded scan files.", end=' ')
+ wallet, created = Wallet.objects.update_or_create(walletname=walletname)
+ # should now also load the json and use it ! check &ref is correct or missing too
+ if created:
+ print(f" - {walletname} created: only JSON, no actual uploaded scan files.", end=' ')
+ wallet.save()
diff --git a/parsers/survex.py b/parsers/survex.py
index d3eec8c..44f72f8 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -323,7 +323,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message)
- DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
+ DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month
setdate(year)
elif len(line) == 4:
@@ -331,7 +331,7 @@ class LoadingSurvex():
perps = get_people_on_trip(survexblock)
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
print(self.insp+message)
- DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
+ DataIssue.objects.create(parser='survex-date', message=message, url=get_offending_filename(survexblock.survexfile.path))
survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st
setdate(year)
else:
@@ -1546,6 +1546,7 @@ def LoadSurvexBlocks():
SurvexStation.objects.all().delete()
print(" - survex Data Issues flushed")
DataIssue.objects.filter(parser='survex').delete()
+ DataIssue.objects.filter(parser='survex-date').delete()
DataIssue.objects.filter(parser='survexleg').delete()
DataIssue.objects.filter(parser='survexunits').delete()
DataIssue.objects.filter(parser='entrances').delete()