From d93133c338fa46b0df3ccdd1e53afeb8592a8a73 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Thu, 27 Jul 2023 00:38:47 +0300 Subject: ambiguous aliases removed more thoroughly --- core/models/caves.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'core/models/caves.py') diff --git a/core/models/caves.py b/core/models/caves.py index 0d94b4c..98094e4 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -472,6 +472,8 @@ def GetCaveLookup(): OR we could set it to return None if there are duplicates, and require the caller to fall back on doing the actual database query it wants rather thna using this cache shortcut """ + + duplicates = {} def checkcaveid(cave, id): global Gcavelookup @@ -482,11 +484,11 @@ def GetCaveLookup(): if cave == Gcavelookup[id]: pass # same id, same cave else: # same id but different cave - message = f" - Warning: same alias id '{id:3}' for two caves '{Gcavelookup[id]}' and '{cave}'. Removing this shorthand alias entirely." - Gcavelookup.pop(id) - print(message) - DataIssue.objects.create(parser="aliases", message=message) - + # message = f" - Warning: ignoring alias id '{id:3}'. Caves '{Gcavelookup[id]}' and '{cave}'. " + # print(message) + # DataIssue.objects.create(parser="aliases", message=message) + duplicates[id] = 1 + global Gcavelookup if Gcavelookup: return Gcavelookup @@ -499,8 +501,14 @@ def GetCaveLookup(): for cave in Cave.objects.all(): key = cave.official_name.lower() if key != "" and key != "unamed" and key != "unnamed": - Gcavelookup[key] = cave - Gcave_count[key] += 1 + if Gcave_count[key] > 0: + # message = f" - Warning: ignoring alias id '{id:3}'. Caves '{Gcavelookup[id]}' and '{cave}'. " + # print(message) + # DataIssue.objects.create(parser="aliases", message=message) + duplicates[key] = 1 + else: + Gcavelookup[key] = cave + Gcave_count[key] += 1 if cave.kataster_number: checkcaveid(cave, cave.kataster_number) # we do expect 1623/55 and 1626/55 to cause a warning message @@ -665,11 +673,20 @@ def GetCaveLookup(): addmore = {} + ldup = [] + for d in duplicates: + Gcavelookup.pop(d) + Gcave_count.pop(d) + ldup.append(d) + if ldup: + message = f" - Ambiguous aliases removed: {ldup}" + print(message) + DataIssue.objects.create(parser="aliases", message=message) + for c in Gcave_count: if Gcave_count[c] > 1: message = f" ** Duplicate cave id count={Gcave_count[c]} id:'{Gcavelookup[c]}' cave __str__:'{c}'" print(message) DataIssue.objects.create(parser="aliases", message=message) - # logdataissues[Gcavelookup[c]]=message # pending troggle-wide issues logging system - + return Gcavelookup -- cgit v1.2.3