summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py233
1 files changed, 132 insertions, 101 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index 0156140..e7340a4 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -8,13 +8,13 @@ from datetime import datetime, timezone
from pathlib import Path
import troggle.settings as settings
-from troggle.core.models.caves import Cave, Entrance
+from troggle.core.models.caves import Cave, Entrance, GetCaveLookup
from troggle.core.models.logbooks import QM
from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation
from troggle.core.models.wallets import Wallet
from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.utils import chaosmonkey, get_process_memory
-from troggle.parsers.logbooks import GetCaveLookup
+#from troggle.parsers.logbooks import GetCaveLookup
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
"""Imports the tree of survex files following from a defined root .svx file
@@ -63,6 +63,28 @@ class SurvexLeg:
tape = 0.0
compass = 0.0
clino = 0.0
+
+def IdentifyCave(cavepath):
+ """Given a file path for a survex file, or a survex-block path,
+ return the cave object
+ """
+ caveslist = GetCaveLookup()
+ if cavepath.lower() in caveslist:
+ return caveslist[cavepath.lower()]
+ # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
+ path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
+ if path_match:
+ sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
+ guesses = [sluggy.lower(), path_match.group(2).lower()]
+ for g in guesses:
+ if g in caveslist:
+ caveslist[cavepath] = caveslist[g]
+ return caveslist[g]
+ print(f" ! Failed to find cave for {cavepath.lower()}")
+ else:
+ # not a cave, but that is fine.
+ # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
+ return None
def datewallet(w, earliest):
"""Gets the date of the youngest survexblock associated with the wallet
@@ -1093,23 +1115,6 @@ class LoadingSurvex:
f" $ flagslist:{flags}",
)
- def IdentifyCave(self, cavepath):
- if cavepath.lower() in self.caveslist:
- return self.caveslist[cavepath.lower()]
- # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
- path_match = self.rx_cave.search(cavepath)
- if path_match:
- sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
- guesses = [sluggy.lower(), path_match.group(2).lower()]
- for g in guesses:
- if g in self.caveslist:
- self.caveslist[cavepath] = self.caveslist[g]
- return self.caveslist[g]
- print(f" ! Failed to find cave for {cavepath.lower()}")
- else:
- # not a cave, but that is fine.
- # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
- return None
def GetSurvexDirectory(self, headpath):
"""This creates a SurvexDirectory if it has not been seen before, and on creation
@@ -1214,7 +1219,7 @@ class LoadingSurvex:
newdirectory.save()
newfile.survexdirectory = newdirectory
self.survexdict[newdirectory].append(newfile)
- cave = self.IdentifyCave(headpath) # cave already exists in db
+ cave = IdentifyCave(headpath) # cave already exists in db
if not newdirectory:
message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
@@ -1458,28 +1463,42 @@ class LoadingSurvex:
# ;*edulcni means we are returning from an included file
if edulcni:
self.ProcessEdulcniLine(edulcni)
-
- def LoadSurvexSetup(self, survexblock, survexfile):
- self.depthbegin = 0
- self.datastar = self.datastardefault
- print(
- self.insp
- + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} "
- )
- self.lineno = 0
- sys.stderr.flush()
- self.callcount += 1
- if self.callcount % 10 == 0:
- print(".", file=sys.stderr, end="")
- if self.callcount % 500 == 0:
- print("\n", file=sys.stderr, end="")
- # Try to find the cave in the DB if not use the string as before
- path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+
+ def get_cave(self, path):
+ """Read the file path to a survex file and guesses the cave
+ """
+ path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", path)
if path_match:
pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
cave = getCaveByReference(pos_cave)
- if cave:
- survexfile.cave = cave
+ return cave
+ return None
+
+ # def LoadSurvexSetup(self, survexblock, survexfile):
+ # """REFACTOR to use get_cave()
+
+ # This does not seem to get run at all ?!
+ # """
+ # self.depthbegin = 0
+ # self.datastar = self.datastardefault
+ # print(
+ # self.insp
+ # + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} "
+ # )
+ # self.lineno = 0
+ # sys.stderr.flush()
+ # self.callcount += 1
+ # if self.callcount % 10 == 0:
+ # print(".", file=sys.stderr, end="")
+ # if self.callcount % 500 == 0:
+ # print("\n", file=sys.stderr, end="")
+ # # Try to find the cave in the DB if not use the string as before
+ # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+ # if path_match:
+ # pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
+ # cave = getCaveByReference(pos_cave)
+ # if cave:
+ # survexfile.cave = cave
def LinearLoad(self, survexblock, path, collatefilename):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
@@ -1497,7 +1516,7 @@ class LoadingSurvex:
slengthtotal = 0.0
nlegstotal = 0
self.relativefilename = path
- self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
+ IdentifyCave(path) # this will produce null for survex files which are geographic collections
self.currentsurvexfile = survexblock.survexfile
self.currentsurvexfile.save() # django insists on this although it is already saved !?
@@ -1637,6 +1656,7 @@ class LoadingSurvex:
legslength=0.0,
)
newsurvexblock.save()
+ print(f"SB: #{newsurvexblock.id} '{newsurvexblock}' parent:{newsurvexblock.parent} f:{newsurvexblock.survexfile}")
newsurvexblock.title = (
"(" + survexblock.title + ")"
) # copy parent inititally, overwrite if it has its own
@@ -2306,43 +2326,48 @@ def parse_one_file(fpath): # --------------------------------------in progress--
In the initial file parsing in databaseReset, the *include expansion is done
in an earlier stange than LinearLoad(). By the time LinearLoad() is called,
all the *include expansion has happened.
-
- WORK IN PROGRESS.
- Works fine for completely new survex file.
-
- For an edited, pre-existing survex file,
- I am having great trouble getting the 'parent' block to work correctly.
- It gets overwritten, and then nullified, on repeated SAVE & import.
- I should learn how to step through with the debugger.
"""
- def parse_new_svx(fpath, blockroot=None, svxfileroot=None):
+ def parse_new_svx(fpath, svx_load, cave, svxfileroot=None):
+ """We need a dummy survex block which has the survexfile being parsed
+ as its .survexfile field. But it is used in two ways, it is also
+ set as the parent block for the new blocks being created. This has to be fixed
+ later.
+ This all needs refactoring.
+ """
if svxfileroot == None:
- svxfileroot = MakeFileRoot(fpath)
+
+ svxfileroot = MakeFileRoot(fpath, cave)
svxfileroot.save()
- if blockroot == None:
- newname = "adhoc_" + str(Path(str(svxfileroot)).name)
- survexblockparent = SurvexBlock(
- name=newname, survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
- )
- survexblockparent.save()
- blockroot = survexblockparent
+ # It is vital that the block has attached the survexfile object which is being parsed.
+ block_dummy = SurvexBlock(
+ name="dummy", survexpath="", survexfile=svxfileroot, legsall=0, legslength=0.0
+ )
+ svxfileroot.save()
+ block_dummy.save()
+ newname = f"#{block_dummy.id}_" + str(Path(str(svxfileroot)).name)
+ block_dummy.name = newname
+ block_dummy.save()
+ print(f" - block_dummy now '{block_dummy}' {type(block_dummy)} id={block_dummy.id} f:{block_dummy.survexfile}")
+
svx_load.survexdict[svxfileroot.survexdirectory] = []
svx_load.survexdict[svxfileroot.survexdirectory].append(svxfileroot)
svx_load.svxdirs[""] = svxfileroot.survexdirectory
# ----------------------------------------------------------------
- svx_load.LinearLoad(blockroot, svxfileroot.path, fname)
+ svx_load.LinearLoad(block_dummy, svxfileroot.path, fname)
# ----------------------------------------------------------------
+ # Now we don't need or want the dummy any more
+ block_dummy.delete()
+ global svx_load
print(f"\n - Loading One Survex file '{fpath}'", file=sys.stderr)
svx_load = LoadingSurvex()
svx_load.survexdict = {}
fname = Path(settings.SURVEX_DATA, (fpath + ".svx"))
- # print(f" - {fname=}")
svxs = SurvexFile.objects.filter(path=fpath)
if svxs:
@@ -2351,67 +2376,70 @@ def parse_one_file(fpath): # --------------------------------------in progress--
print(f" - Aborting file parsing & import into database.")
return False
print(f" - Pre-existing survexfile {svxs}.")
- # reparse_existing_svx(svxs)
existingsvx = SurvexFile.objects.get(path=fpath)
existingcave = existingsvx.cave
- print(f" - survexfile is {existingsvx} id={existingsvx.id} {existingcave}")
+ print(f" - survexfile id={existingsvx.id} {existingsvx} {existingcave}")
sbs = existingsvx.survexblock_set.all()
existingparent = None
parents =set()
if sbs:
for sb in sbs:
- print(f" - cleaning survex block {sb=}")
+ # print(f" - {sb.id} checking survex block {sb=}")
try:
if sb.parent:
parents.add(sb.parent)
+ # print(f" - adding {sb.parent=}")
except:
- print(f" ! FAILURE to access sb.parent {sb=}")
- sb.delete()
+ print(f" ! FAILURE to access sb.parent {sb=}\n ! {sb.parent_id=} ")# \n{dir(sb)}
+ # even though the parent_id exists.. hmm.
+ for sb in sbs:
+ # print(f" - {sb.id} {sb.pk} {sb}")
+ sb_keep = sb
+ if sb not in parents:
+ # print(f" - {sb.id} Deleting survex block {sb=}")
+ sb.delete()
+
if parents:
- print(f" - set of parent blocks {parents}")
+ # print(f" - parents get {parents}")
if len(parents) > 1:
print(f" - WARNING more than one parent survex block!")
- existingparent = parents.pop()
+ existingparent = parents.pop() # removes it
+ parents.add(existingparent) # restores it
- # print(f" - deleting survex file {existingsvx=}")
- # existingsvx.delete()
- print(f" - Reloading and parsing this survexfile '{fpath}' Loading...")
-
- parse_new_svx(fpath, blockroot=existingparent, svxfileroot=existingsvx)
-
- svxs = SurvexFile.objects.filter(path=fpath)
- if len(svxs)>1:
- print(f" ! Mistake? More than one survex file object in database with the same file-path {svxs}")
- print(f" - Aborting file parsing & import into database.")
- return False
- replacesvx = SurvexFile.objects.get(path=fpath)
- replacesvx.cave = existingcave
- print(f" - new/replacement survexfile {svxs}. id={replacesvx.id}")
- replacesvx.save()
+ print(f" - Reloading and parsing this survexfile '{fpath}' Loading...")
+ # Logic is that we need an SB which links to the survexfile we are parsing for the parser
+ # to work, but we delete all those before we start parsing. Urk.
+ #===========
+ parse_new_svx(fpath, svx_load, existingsvx, svxfileroot=existingsvx)
+ #===========
+ print(f" - survexfile id={existingsvx.id} update ")
+
if parents:
- sbs = replacesvx.survexblock_set.all()
+ print(f" - parents set {parents}")
+ sbs = existingsvx.survexblock_set.all()
+ if len(sbs)<1:
+ print(f" ! No survex blocks found. Parser failure...")
for sb in sbs:
- print(f" - re-setting survex block parent{sb=}")
+ print(f" - {sb.id} re-setting survex block parent{sb=}")
sb.parent = existingparent # should be all the same
sb.save()
else:
- print(f" - Not seen this survexfile before '{fpath}' Loading...")
- parse_new_svx(fpath)
-
- legsnumber = svx_load.legsnumber
-
- print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
- print(f" - SurvexDirectories: {svx_load.survexdict}")
-
- tf = 0
- for d in svx_load.survexdict:
- tf += len(svx_load.survexdict[d])
- print(f" - Number of SurvexFiles: {tf:,}")
- print(f" - Number of Survex legs: {legsnumber:,}")
- print(f" - Length of Survex legs: {svx_load.slength:.2f} m")
+ print(f" - Not seen this survexfile before '{fpath}' Loading. ..")
+ #===========
+ parse_new_svx(fpath,svx_load, IdentifyCave(fpath))
+ #===========
+
+ # print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
+ # tf = 0
+ # for d in svx_load.survexdict:
+ # print(f" - SD: {d}")
+ # tf += len(svx_load.survexdict[d])
+ # print(f" - Number of SurvexFiles: {tf:,}")
+ # print(f" - Number of Survex legs: {svx_load.legsnumber:,}")
+ # print(f" - Length of Survex legs: {svx_load.slength:.2f} m")
svx_load = None
return True
@@ -2432,15 +2460,18 @@ def MakeSurvexFileRoot():
return fileroot
-def MakeFileRoot(fn):
+def MakeFileRoot(fn, cave):
"""Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
CHANGE THIS to just use the same block root as for SURVEX_TOPNAME ?
"""
- print(f" - making a new root survexfile for this import: {fn}")
- fileroot = SurvexFile(path=fn, cave=None)
+ print(f" - Making a new root survexfile for this import: {fn}")
+ fileroot = SurvexFile(path=fn, cave=cave)
fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made
fileroot.save()
+ cave = IdentifyCave(fn)
+ fileroot.cave = cave
+ print(f" - new fileroot {type(fileroot)} for {fn} with cave {cave}")
return fileroot
@@ -2490,7 +2521,7 @@ def LoadSurvexBlocks():
# sudo service mariadb start
survexblockroot.save()
- omitsfileroot = MakeFileRoot(UNSEENS)
+ omitsfileroot = MakeFileRoot(UNSEENS, None)
survexomitsroot = SurvexBlock(
name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0
)