diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2023-09-10 02:06:38 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2023-09-10 02:06:38 +0300 |
commit | 327b1923b06bd4789df7ad0be4976e7d63c3aad5 (patch) | |
tree | bc85d886da99719ab4b8bc97a2f9d81996cc9d64 /parsers | |
parent | 54136721b8ef4c10be6efa70dd8c8e48f7d561fb (diff) | |
download | troggle-327b1923b06bd4789df7ad0be4976e7d63c3aad5.tar.gz troggle-327b1923b06bd4789df7ad0be4976e7d63c3aad5.tar.bz2 troggle-327b1923b06bd4789df7ad0be4976e7d63c3aad5.zip |
Cleaning auto Cave creation from survex file detection
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/caves.py | 47 | ||||
-rw-r--r-- | parsers/survex.py | 97 |
2 files changed, 77 insertions, 67 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 982fb40..abeb800 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -130,13 +130,13 @@ def get_area(areanum): area = Area.objects.get(short_name="1627") return area -def create_new_cave(svxpath): +def create_new_cave(svxpath, msg=None): """This is called only when a new survex file is edited online which has a path on the :loser: repo which is not recognised as a known cave. ALSO called by survex parser when it finds a cave it doesn't recognise """ # e.g. svxpath = "caves-1623/666/antig" - print(f"Create new cave at {svxpath}") + print(f"Create new cave at {svxpath} - {msg}") # survex_file = svxpath + ".svx" parts = svxpath.split("/") @@ -149,7 +149,7 @@ def create_new_cave(svxpath): url = f"{areanum}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability. #url = f"{areanum}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls else: - print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'?") + print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}") areanum = "1623" url = f"1623/{caveid}.html" #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls @@ -157,17 +157,17 @@ def create_new_cave(svxpath): k = f"{areanum}-{caveid}" area = get_area(areanum) - caves = Cave.objects.filter(unofficial_number=caveid) + caves = Cave.objects.filter(unofficial_number=caveid, area =areanum) if caves: - message = f" ! Already exists, caveid:{k} in area {areanum} {caves}" + message = f" ! Already exists, caveid:{k} in area {areanum} {caves} - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) return caves[0] try: - cave = do_pending_cave(k, caveid, url, area) + cave = do_pending_cave(k, caveid, url, area, msg) except: - message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" + message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum} - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) raise @@ -178,7 +178,7 @@ def create_new_cave(svxpath): cave.save() return cave -def do_pending_cave(k, caveid, url, area): +def do_pending_cave(k, caveid, url, area, msg=None): """ default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists in expoweb/cave_data/1623-"k".html @@ -217,7 +217,7 @@ def do_pending_cave(k, caveid, url, area): prime_suspect = survex_file if prime_suspect: survex_file = prime_suspect - # message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}" + # message = f" ! {k:14} Found a survex file which might be the right one: {survex_file} - {msg}" # DataIssue.objects.create(parser='caves', message=message, url=url) # print(message) return survex_file @@ -227,7 +227,7 @@ def do_pending_cave(k, caveid, url, area): g = GetCaveLookup() with transaction.atomic(): if slug in g: - message = f" ! {k:18} cave listed in pendingcaves.txt already exists." + message = f" ! {k:18} cave listed in pendingcaves.txt already exists. - {msg}" DataIssue.objects.create(parser="caves", message=message, url=url) print(message) return @@ -267,11 +267,17 @@ def do_pending_cave(k, caveid, url, area): default_note += ( f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done." ) + urltest = Cave.objects.filter(url=url) + if urltest: + message = f" ! Cave {urltest[0]} already exists with this url {url}. Can't create new cave {slug}" + DataIssue.objects.create(parser="caves", message=message, url=url) + print(message) + return urltest[0] survex_file = get_survex_file(k) cave = Cave( - unofficial_number=caveid, + unofficial_number=caveid.upper(), underground_description="Pending cave write-up - No cave description created yet.", survex_file=survex_file, url=url, @@ -281,30 +287,21 @@ def do_pending_cave(k, caveid, url, area): cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. cave.area.add(area) cave.save() - message = f" ! {k:18} {cave.underground_description} url: {url}" + message = f" ! {k:18} Pending cave write-up url: {url} - {msg}" DataIssue.objects.create(parser="caves", message=message, url=url) print(message) try: # Now create a cave slug ID CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False) except: - message = f" ! {k:11s} PENDING CaveSLUG {slug} create failure" + message = f" ! {k:11s} PENDING CaveSLUG {slug} create failure - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) else: - message = f" ! {k:11s} PENDING cave create failure" + message = f" ! {k:11s} PENDING cave create failure - {msg}" DataIssue.objects.create(parser="caves", message=message) print(message) - try: - # troggle is more robust against missing entrances now, not needed. - # set_dummy_entrance(k, slug, cave, msg="PENDING") - pass - except: - message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{slug}] {k}" - # message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]" - DataIssue.objects.create(parser="caves", message=message) - print(message) return cave def getXML(text, itemname, minItems=1, maxItems=None, context=""): @@ -730,7 +727,9 @@ def read_cave(filename, cave=None): cave.save() return cave -def add_cave_to_pending_list(id): +def add_cave_to_pending_list(id, msg=None): + message = f"On dev machine, adding to PENDING. - {msg}" + print(message) fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") try: if settings.DBSWITCH == "sqlite": # dev machine only diff --git a/parsers/survex.py b/parsers/survex.py index ef69759..fe7d211 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -317,16 +317,16 @@ class LoadingSurvex: insp = "" callcount = 0 caverncount = 0 - ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated"] - ignorenoncave = [ - "caves-1623", - "caves-1623/2007-NEU", - "caves-1626", - "caves-1624", - "caves-1627", - "fixedpts/gps/gps00raw", - "", - ] + ignoreprefix = ["surface", "kataster", "fixedpts", "gpx", "deprecated", "dummy_"] + # ignorenoncave = [ + # "caves-1623", + # "caves-1623/2007-NEU", + # "caves-1626", + # "caves-1624", + # "caves-1627", + # "fixedpts/gps/gps00raw", + # "", + # ] TREE = "tree" ODDS = "oddments" svxpass = TREE @@ -1165,42 +1165,60 @@ class LoadingSurvex: def IdentifyCave(self, cavepath, svxid, depth): """Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path, - return the cave object + return the cave object + kataster + fixedpts/gps + and everything at top level, directly in caves-1623/ not in a subdir + NOTE self.cavelist is a superset of GCaveLookup, which already contians both uppercase and lowercase aliases """ - path = cavepath.lower() - if path in self.caveslist: # primed with GCaveLookup - return self.caveslist[path] + if cavepath == "caves-1623/99ob02": + for key in self.caveslist: + cave = self.caveslist[key] + if type(cave) != Cave: + print(f"BAD CAVE TYPE '{key}' -- {type(cave)}'{cave}'") + for key in self.caveslist: + cave = self.caveslist[key] + print(f"{key} -- Cave<{cave}>") + + for i in self.ignoreprefix: + if cavepath.lower().startswith(i): + message = (f" - {cavepath} starts with <ignoreprefix> (while creating '{svxid}.svx' )") + return False + + if cavepath in self.caveslist: # primed with GCaveLookup + return self.caveslist[cavepath] + + rx_svxcollection = re.compile(r"(?i)caves-(\d\d\d\d)/(.*)$") # rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)") path_match = self.rx_cave.search(cavepath) if path_match: sluggy = f"{path_match.group(1)}-{path_match.group(2)}" - guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms - for g in guesses: - if g in self.caveslist: - self.caveslist[cavepath] = self.caveslist[g] # set "caves-1626/107/107" as index to cave 1626-107 - return self.caveslist[g] - cave = create_new_cave(cavepath) # uses the pending stuff to create pending cave descriptions + seek = [sluggy, sluggy.replace("1623-","")] # to catch '2023-kt-02' etc + for s in seek: + if s in self.caveslist: + self.caveslist[cavepath] = self.caveslist[s] # set "caves-1626/107/107" as index to cave 1626-107 + return self.caveslist[s] + + cave = create_new_cave(cavepath, f"Make cave found in survex file {svxid}") # uses the pending code to create pending cave descriptions self.caveslist[cavepath] = cave - message = f" ! MAKING cave for {cavepath=} {svxid=}" - stash_data_issue(parser="survex", message=message, url=None, sb=(svxid)) - if not cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626"): + message = f"\n ! MAKING cave {sluggy} for {cavepath=} {svxid=} (not reporting this for 1624 or 1626)" + # stash_data_issue(parser="survex", message=message, url="/survexfile/{svxid}.svx", sb=(svxid)) + if not (cavepath.startswith("caves-1624") or cavepath.startswith("caves-1626")): print(message, file=sys.stderr) + return cave else: - # isn't all this pointless...?? - if self.is_it_already_pending(cavepath, svxid, depth): # but pending will already have been created as Cave objects - pass + path_match = rx_svxcollection.search(svxid) + if path_match: + message = f" ! Recognised survex file which is not a cave at {svxid=}" + # stash_data_issue(parser="survex", message=message, url=None, sb=(svxid)) + # print(message, file=sys.stderr) + return False else: - # It is too late to add it to the pending caves list here, they were already - # processed in parsers/caves.py So we have to do a bespoke creation. - cave = create_new_cave(svxid) - message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]." + message = f" ! ERROR: no cave at '{svxid}.svx - is not a known cavename format. " print("\n" + message) print("\n" + message, file=sys.stderr) - print(f"{self.pending}", end="", file=sys.stderr) - stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel)) - - print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr) - return None + stash_data_issue(parser="survex", message=message, url="{svxid}.svx", sb=(svxid)) + return None def is_it_already_pending(self, headpath, includelabel, depth): """Ignore surface, kataser and gpx *include survex files""" @@ -1222,14 +1240,7 @@ class LoadingSurvex: # print("\n"+message) # print("\n"+message,file=sys.stderr) return True - for i in self.ignoreprefix: - if headpath.startswith(i): - message = ( - f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)" - ) - # print("\n"+message) - # print("\n"+message,file=sys.stderr) - return True + caveid = f"{headpath[6:10]}-{headpath[11:]}".upper() if caveid in self.pending: # Yes we didn't find this cave, but we know it is a pending one. So not an error. |