From 327b1923b06bd4789df7ad0be4976e7d63c3aad5 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 10 Sep 2023 02:06:38 +0300 Subject: Cleaning auto Cave creation from survex file detection --- parsers/survex.py | 97 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 43 deletions(-) (limited to 'parsers/survex.py') diff --git a/parsers/survex.py b/parsers/survex.py index ef69759..fe7d211 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -317,16 +317,16 @@ class LoadingSurvex: insp = "" callcount = 0 caverncount = 0 - ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated"] - ignorenoncave = [ - "caves-1623", - "caves-1623/2007-NEU", - "caves-1626", - "caves-1624", - "caves-1627", - "fixedpts/gps/gps00raw", - "", - ] + ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated", "dummy_"] + # ignorenoncave = [ + # "caves-1623", + # "caves-1623/2007-NEU", + # "caves-1626", + # "caves-1624", + # "caves-1627", + # "fixedpts/gps/gps00raw", + # "", + # ] TREE = "tree" ODDS = "oddments" svxpass = TREE @@ -1165,42 +1165,60 @@ class LoadingSurvex: def IdentifyCave(self, cavepath, svxid, depth): """Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path, - return the cave object + return the cave object + kataster + fixedpts/gps + and everything at top level, directly in caves-1623/ not in a subdir + NOTE self.cavelist is a superset of GCaveLookup, which already contians both uppercase and lowercase aliases """ - path = cavepath.lower() - if path in self.caveslist: # primed with GCaveLookup - return self.caveslist[path] + if cavepath == "caves-1623/99ob02": + for key in self.caveslist: + cave = self.caveslist[key] + if type(cave) != Cave: + print(f"BAD CAVE TYPE '{key}' -- {type(cave)}'{cave}'") + for key in self.caveslist: + cave = self.caveslist[key] + print(f"{key} -- Cave<{cave}>") + + for i in self.ignoreprefix: + if cavepath.lower().startswith(i): + message = (f" - {cavepath} starts with (while creating '{svxid}.svx' )") + return False + + if cavepath in self.caveslist: # primed with GCaveLookup + return self.caveslist[cavepath] + + rx_svxcollection = re.compile(r"(?i)caves-(\d\d\d\d)/(.*)$") # rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)") path_match = self.rx_cave.search(cavepath) if path_match: sluggy = f"{path_match.group(1)}-{path_match.group(2)}" - guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms - for g in guesses: - if g in self.caveslist: - self.caveslist[cavepath] = self.caveslist[g] # set "caves-1626/107/107" as index to cave 1626-107 - return self.caveslist[g] - cave = create_new_cave(cavepath) # uses the pending stuff to create pending cave descriptions + seek = [sluggy, sluggy.replace("1623-","")] # to catch '2023-kt-02' etc + for s in seek: + if s in self.caveslist: + self.caveslist[cavepath] = self.caveslist[s] # set "caves-1626/107/107" as index to cave 1626-107 + return self.caveslist[s] + + cave = create_new_cave(cavepath, f"Make cave found in survex file {svxid}") # uses the pending code to create pending cave descriptions self.caveslist[cavepath] = cave - message = f" ! MAKING cave for {cavepath=} {svxid=}" - stash_data_issue(parser="survex", message=message, url=None, sb=(svxid)) - if not cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626"): + message = f"\n ! MAKING cave {sluggy} for {cavepath=} {svxid=} (not reporting this for 1624 or 1626)" + # stash_data_issue(parser="survex", message=message, url="/survexfile/{svxid}.svx", sb=(svxid)) + if not (cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626")): print(message, file=sys.stderr) + return cave else: - # isn't all this pointless...?? - if self.is_it_already_pending(cavepath, svxid, depth): # but pending will already have been created as Cave objects - pass + path_match = rx_svxcollection.search(svxid) + if path_match: + message = f" ! Recognised survex file which is not a cave at {svxid=}" + # stash_data_issue(parser="survex", message=message, url=None, sb=(svxid)) + # print(message, file=sys.stderr) + return False else: - # It is too late to add it to the pending caves list here, they were already - # processed in parsers/caves.py So we have to do a bespoke creation. - cave = create_new_cave(svxid) - message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]." + message = f" ! ERROR: no cave at '{svxid}.svx - is not a known cavename format. " print("\n" + message) print("\n" + message, file=sys.stderr) - print(f"{self.pending}", end="", file=sys.stderr) - stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel)) - - print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr) - return None + stash_data_issue(parser="survex", message=message, url="{svxid}.svx", sb=(svxid)) + return None def is_it_already_pending(self, headpath, includelabel, depth): """Ignore surface, kataser and gpx *include survex files""" @@ -1222,14 +1240,7 @@ class LoadingSurvex: # print("\n"+message) # print("\n"+message,file=sys.stderr) return True - for i in self.ignoreprefix: - if headpath.startswith(i): - message = ( - f" - {headpath} starts with (while creating '{includelabel}' sfile & sdirectory)" - ) - # print("\n"+message) - # print("\n"+message,file=sys.stderr) - return True + caveid = f"{headpath[6:10]}-{headpath[11:]}".upper() if caveid in self.pending: # Yes we didn't find this cave, but we know it is a pending one. So not an error. -- cgit v1.2.3