Cleaning up entrance importing robustly

author: Philip Sargent <philip.sargent@gmail.com> 2023-07-26 00:14:46 +0300
committer: Philip Sargent <philip.sargent@gmail.com> 2023-07-26 00:14:46 +0300
commit: fab7adf07989c03289336c6f2117696129f82f19 (patch)
tree: 81696d9bd31703ef3453ed3a31c37eeb34397a2d /parsers/caves.py
parent: 72a6b091e6f53e11798decf2fa4365ccfd4eef50 (diff)
download: troggle-fab7adf07989c03289336c6f2117696129f82f19.tar.gz
troggle-fab7adf07989c03289336c6f2117696129f82f19.tar.bz2
troggle-fab7adf07989c03289336c6f2117696129f82f19.zip
1 files changed, 37 insertions, 3 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index f8b39e9..93d5cff 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -91,9 +91,12 @@ def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
         # I am not sure if we want  entrances automagically created.          
         # Therefore I have commented it out. MJG
         
-        # entrance = dummy_entrance(id, slug, msg="DUMMY")
-        # entrances_xslug[slug] = entrance
-        # CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
+        # Automagic  restored, as it creates a load of error message in the DataIssue log. 2023-07-25
+        # pending doing this properly, ie. properly creating an XML file for each of these pending caves.
+        
+        entrance = dummy_entrance(id, slug, msg="DUMMY")
+        entrances_xslug[slug] = entrance
+        CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
         pass
     except:
         message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
@@ -343,6 +346,22 @@ def boolify(boolstrs):
             "False": False,
             "true": True,
             "false": False}[boolstrs[0]]
+            
+def validate_station(station):
+    """It is possible to break troggle entirely by getting this wrong.
+    These station identifiers are matched against other statsions using .endswith()
+    in parsers/locations.py
+    so a simple number here will match hundreds of SUrvexStation objects
+    It should be, e.g. "1623.p240"
+    """
+    if station == "":
+        return True
+    dot = station.find(".")
+    if dot == -1:
+        # no full stop found. Bad station identifier.
+        raise
+    else:
+        return True
 
 def read_entrance(filename, ent=None):
     """Reads an entrance description from the .html file.
@@ -440,6 +459,17 @@ def read_entrance(filename, ent=None):
     ent.underground_description=underground_description[0]
     ent.url=url[0]
     
+    for st in [ent.exact_station, ent.other_station, ent.tag_station]:
+        try:
+            validate_station(st)
+        except:
+            message = f" ! BAD ENTRANCE TAG '{st}' in '{filename}'. Must format like '1623.p204'. Edit file manually, click."
+            #http://localhost:8000/1623/2023-EBH-01/1623-2023-EBH-01:1623-2023-EBH-01_entrance_edit
+            DataIssue.objects.create(parser="entrances", message=message, url=f"/1623/{slug}/{slug}:{slug}_entrance_edit")
+            print(message)
+            # ent_issues = DataIssue.objects.filter(parser="entrances")
+            # print(f".. We now have  {len(ent_issues)} entrance DataIssues")
+            return None
     ent.save()
     return ent
 
@@ -704,6 +734,10 @@ def readcaves():
         print(" - Reading Entrances from entrance descriptions xml files")
         for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
             read_entrance(filename)
+        
+        # WHy is this needed ? Without it, we lose these DataIssues!
+        ent_issues = DataIssue.objects.filter(parser="entrances")
+        print(f"__ We now have  {len(ent_issues)} entrance DataIssues")
 
         print(" - Reading Caves from cave descriptions xml files")
         for filename in next(os.walk(CAVEDESCRIPTIONS))[2]:  # Should be a better way of getting a list of files
author	Philip Sargent <philip.sargent@gmail.com>	2023-07-26 00:14:46 +0300
committer	Philip Sargent <philip.sargent@gmail.com>	2023-07-26 00:14:46 +0300
commit	fab7adf07989c03289336c6f2117696129f82f19 (patch)
tree	81696d9bd31703ef3453ed3a31c37eeb34397a2d /parsers/caves.py
parent	72a6b091e6f53e11798decf2fa4365ccfd4eef50 (diff)
download	troggle-fab7adf07989c03289336c6f2117696129f82f19.tar.gz troggle-fab7adf07989c03289336c6f2117696129f82f19.tar.bz2 troggle-fab7adf07989c03289336c6f2117696129f82f19.zip