summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py83
1 files changed, 9 insertions, 74 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index 2339fbc..f0d3057 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -14,7 +14,6 @@ from troggle.core.models.survex import SurvexBlock, SurvexFile, SurvexPersonRole
from troggle.core.models.wallets import Wallet
from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.utils import chaosmonkey, get_process_memory
-#from troggle.parsers.logbooks import GetCaveLookup
from troggle.parsers.caves import create_new_cave
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
@@ -23,7 +22,7 @@ It also scans the Loser repo for all the svx files, which it loads individually
"""
todo = """
-- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. SHould
+- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
speed it up noticably.
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
@@ -31,7 +30,7 @@ todo = """
- Learn to use Django .select_related() and .prefetch_related() to speed things up
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
-- LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory
+- LoadSurvexFile() Creates a new current survexfile
The survexblock passed-in is not necessarily the parent. FIX THIS.
- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
@@ -68,9 +67,7 @@ class SurvexLeg:
def IdentifyCave(cavepath):
"""Given a file path for a survex file, or a survex-block path,
return the cave object
-
- This is clearly getting it badly wrong, see /survexdirs report.
- """
+ """
caveslist = GetCaveLookup()
if cavepath.lower() in caveslist:
return caveslist[cavepath.lower()]
@@ -331,11 +328,10 @@ class LoadingSurvex:
includestack = []
stacksvxfiles = []
svxfileslist = []
- svxdirs = {}
+ #svxdirs = {}
svxprim = {}
uniquefile = {} # each key is a survex path, and its value is a list of parent files
expos = {}
- #survexdict = {} # each key is a directory, and its value is a list of files
lineno = 0
insp = ""
callcount = 0
@@ -1169,31 +1165,19 @@ class LoadingSurvex:
)
- def GetSurvexDirectory(self, headpath):
- """This creates a SurvexDirectory if it has not been seen before, and on creation
- it sets the primarysurvexfile. This is correct as it should be set on the first file
+ def set_primary(self, headpath):
+ """This sets the primarysurvexfile. This is correct as it should be set on the first file
in the directory, where first is defined by the *include ordering. Which is what we
are doing.
-
"""
- # all = SurvexDirectory.objects.all()
- # if not all:
- # sd0 = SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfiley)
- # sd0 = SurvexDirectory.objects.filter(id=1)[0]
if not headpath:
# This is normal for .svx file in the root of the :loser: repo
- # message = f" ! GetSurvexDirectory NO headpath given at {self.currentsurvexfile}"
- # print("\n"+message,file=sys.stderr)
- # stash_data_issue(parser="survex", message=message, url=f"/survexfile/{self.currentsurvexfile}")
return self.currentsurvexfile
if headpath.lower() not in self.svxprim:
primary = self.currentsurvexfile
- # self.svxdirs[headpath.lower()] = sd0 #SurvexDirectory(path=headpath, primarysurvexfile=primary) # NOT .lower()
- # self.svxdirs[headpath.lower()].save()
self.svxprim[headpath.lower()] = primary
- #self.survexdict[self.svxdirs[headpath.lower()]] = [] # list of the files in the directory
return self.svxprim[headpath.lower()]
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
@@ -1285,17 +1269,7 @@ class LoadingSurvex:
newfile.save() # until we do this there is no internal id so no foreign key works
self.currentsurvexfile = newfile
- primary = self.GetSurvexDirectory(headpath)
- #newdirectory.save()
- #newfile.survexdirectory = newdirectory
- #self.survexdict[newdirectory].append(newfile)
- newfile.primary = primary
-
- # if not newdirectory:
- # message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
- # print(message)
- # print(message, file=sys.stderr)
- # stash_data_issue(parser="survex", message=message, url=f"/survexfile/{svxid}")
+ newfile.primary = self.set_primary(headpath)
# REPLACE all this IdentifyCave() stuff with GCaveLookup ?
cave = IdentifyCave(headpath) # cave already exists in db
@@ -1309,23 +1283,12 @@ class LoadingSurvex:
newfile.cave = cave
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
- # if not newfile.survexdirectory:
- # message = f" ! .survexdirectory NOT SET in new SurvexFile {svxid} "
- # print(message)
- # print(message, file=sys.stderr)
- # stash_data_issue(parser="survex", message=message)
if not newfile.primary:
message = f" ! .primary NOT SET in new SurvexFile {svxid} "
print(message)
print(message, file=sys.stderr)
stash_data_issue(parser="survex", message=message)
self.currentsurvexfile.save() # django insists on this although it is already saved !?
- # try:
- # newdirectory.save()
- # except:
- # print(newdirectory, file=sys.stderr)
- # print(newdirectory.primarysurvexfile, file=sys.stderr)
- # raise
def ProcessIncludeLine(self, included):
@@ -1875,7 +1838,6 @@ class LoadingSurvex:
if self.never_seen(includepath, path):
fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath), path)
- self.check_unique_name(os.path.join(settings.SURVEX_DATA, includepath))
if os.path.isfile(fullpath):
# --------------------------------------------------------
self.depthinclude += 1
@@ -2011,16 +1973,7 @@ class LoadingSurvex:
return False
else:
self.uniquefile[incpath] = [parent]
- return True
-
- def check_unique_name(self, fullpath):
- """This only checks whether the last bit of the name of the survex file is unique,
- e.g. "bigpitch", not whether the whole path of the survexfile has been seen before.
-
- We don't care about this any more.
- """
- return
-
+ return True
def RunSurvexIfNeeded(self, fullpath, calledpath):
now = time.time()
@@ -2137,12 +2090,10 @@ def FindAndLoadSurvex(survexblockroot):
svx_scan = LoadingSurvex()
svx_scan.callcount = 0
svx_scan.depthinclude = 0
- #fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path)
fullpathtotop = str(Path(survexfileroot.path).parent / survexfileroot.path)
print(f" - RunSurvexIfNeeded cavern on '{fullpathtotop}'", file=sys.stderr)
svx_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
- svx_scan.check_unique_name(fullpathtotop)
svx_scan.uniquefile[str(survexfileroot)] = ["0"]
indent = ""
@@ -2262,7 +2213,6 @@ def FindAndLoadSurvex(survexblockroot):
svx_scan.svxfileslist = [] # free memory
svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
- omit_scan.check_unique_name(fullpathtotop)
omit_scan.uniquefile[unseensroot] = ["0"]
mem0 = get_process_memory()
@@ -2348,9 +2298,7 @@ def FindAndLoadSurvex(survexblockroot):
mem1 = get_process_memory()
print(f" - Number of SurvexDirectories: {len(svx_load.svxprim):,}")
- tf = 0
- # for d in svx_load.survexdict:
- # tf += len(svx_load.survexdict[d])
+ tf = SurvexFile.objects.all().count()
print(f" - Number of SurvexFiles: {tf:,}")
print(f" - Number of Survex legs: {legsnumber:,}")
svx_load = None
@@ -2533,19 +2481,6 @@ def MakeFileRoot(fn):
print(f" - Making/finding a new root survexfile for this import: {fn}")
fileroot = SurvexFile(path=fn, cave=cave)
- # try:
- # fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default
- # except:
- # fileroot.survexdirectory = None
-
-
- # if cave:
- # # But setting the SurvexDirectory does work !
- # # The fluffy stuff is because of errors in the original setting of survex directories
- # # which needs to be cleaned up..
- # for sd in cave.survexdirectory_set.filter(cave=cave):
- # if f"{sd.primarysurvexfile}".replace("caves-","").startswith(f"{sd.cave}"[:4]):
- # fileroot.survexdirectory = sd
fileroot.save()
fileroot.cave = cave
print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave} - {fileroot}")