bug fix

author: Philip Sargent <philip.sargent@gmail.com> 2024-07-02 12:23:33 +0300
committer: Philip Sargent <philip.sargent@gmail.com> 2024-07-02 12:23:33 +0300
commit: e4d3e33dec40fe9b494c46a486172055a2e80e00 (patch)
tree: 5579c0a87a2a314bed31f95930d3e95008bd04a2 /parsers/caves.py
parent: 208c0ffa1f1d3af3959e2bec8f837d6c03935799 (diff)
download: troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.gz
troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.bz2
troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.zip
1 files changed, 24 insertions, 39 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 0bf3520..f2176e0 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -479,12 +479,6 @@ def read_entrance(filename, ent=None):
         contents = f.read()
     context = filename
     
-    # validate filename, check areacode
-    ent_area = filename[:4]
-    if ent_area not in AREACODES:
-        message = f'! BAD AREA CODE in "{filename}". Not recognised.'
-        DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
-        print(message)
         
     
     # Derive the letter, entrance slug and cave slug fromthe filename
@@ -497,10 +491,16 @@ def read_entrance(filename, ent=None):
         letter_fn = ""
         
     cave_name_fn = caveslug_fn[5:] # remove initial "1623-"
+    ent_area = filename[:4]
     
     ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit"
     cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit"
     
+    # validate filename, check areacode
+    if ent_area not in AREACODES:
+        message = f'! BAD AREA CODE in "{filename}". Not recognised.'
+        DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
+        print(message)
 
     
     entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
@@ -726,8 +726,7 @@ def read_cave(filename, mvf=None, cave=None):
     def check_slug(areacode, kataster_number, unofficial_number, url):
         """There is a <caveslug> field in the .html file, but we now ignore it as we use the 
          filename itself to set the slug.
-         However we do check it for sanity, pending its removal eventually."""
-        # context = f"/{cave.areacode}/{slug}_cave_edit/"
+         However we do check it for sanity, if it is there, pending its removal eventually."""
            
         if kataster_number:
             if slug == f"{areacode}-{kataster_number}":
@@ -764,14 +763,20 @@ def read_cave(filename, mvf=None, cave=None):
     
     # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
     fn = settings.CAVEDESCRIPTIONS / filename
-    context = f"/cave_data/{filename}_edit" # the expo html editor, not the specialist cave data editor. oops.FIX THIS
     
-    # print(f" - Reading Cave from cave descriptions file {fn}")
+    #print(f" - Reading Cave from cave descriptions file {fn}")
     if not fn.exists():
         message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
-        DataIssue.objects.create(parser="caves", message=message, url=context)
+        DataIssue.objects.create(parser="caves", message=message, url="")
         print(message)
         return None
+
+    # ignore <caveslug> read from the filename
+    slug = filename[:-5] # strip off the ".html" at the end of the filename
+    slugs = [slug]
+    areacode = slug[:4]
+
+    context = f"/{areacode}/{slug}_cave_edit" 
         
     with open(fn) as f:
         contents = f.read()
@@ -779,32 +784,17 @@ def read_cave(filename, mvf=None, cave=None):
 
     if len(cavecontentslist) != 1:
         message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
-        DataIssue.objects.create(parser="caves", message=message, url=context)
+        DataIssue.objects.create(parser="caves", message=message, url="")
         print(message)
         return None
         
     cavecontents = cavecontentslist[0]
-    
-    # This is ignored, we are using the filename not this <caveslug> field now
-    # New 2024 June 28th.
-    # slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context) 
-    # if len(slugs) > 1:
-        # message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}."
-        # DataIssue.objects.create(parser="caves", message=message, url=context)
-        # print(message)
-    # slug = slugs[0]
-    
-    # but ignore <caveslug> read from the file
-    slug = filename[:-5] # strip off the ".html" at the end of the filename
-    slugs = [slug]
+ 
     #print(f"{filename=} {slug=}")
-    #print(slug[-1].lower(), slug)
     if slug[-1].lower() in LETTERS:
         message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} "
         DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing
         print(message)
-
-    
  
     non_public = getXMLmax1("non_public")
     official_name = getXMLmax1("official_name")
@@ -822,14 +812,11 @@ def read_cave(filename, mvf=None, cave=None):
     survex_file = getXMLmax1("survex_file")
     description_file = getXMLmax1("description_file")
 
+    # Optional, but probably deprecated as we should just derive this from the survex data
     length = getXMLmin0("length")
     depth = getXMLmin0("depth")
     extent = getXMLmin0("extent")
-    
-    areacode = slug[:4]
- 
-    contextguess  = f"/{slug[0:4]}/{slug}_cave_edit/" 
-    
+     
     manual_edit = True
     if not cave:
         # we are parsing using databaseReset.py not an online edit
@@ -843,7 +830,7 @@ def read_cave(filename, mvf=None, cave=None):
             kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up
             for k in kaves:
                 message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
-                DataIssue.objects.create(parser="caves", message=message, url=contextguess)
+                DataIssue.objects.create(parser="caves", message=message, url=context)
                 print(message)
             for k in kaves:
                 if k.slug() is not None:
@@ -855,15 +842,13 @@ def read_cave(filename, mvf=None, cave=None):
     do_caveslugstuff() # needs cave!=None
 
     # We no longer need the <area> tag to define 1623 etc as we get that from the filename.
-    areas = getXML(cavecontents, "area", context=contextguess, minItems=0) # can be multiple <area> tags
+    areas = getXML(cavecontents, "area", context=context, minItems=0) # can be multiple <area> tags
     for area_slug in areas:
         if area_slug not in AREACODES: # only detect subareas
             cave.subarea = area_slug
     if not cave.areacode:
          if areacode in AREACODES:
             cave.areacode = areacode
-    context = f"/{cave.areacode}/{slug}_cave_edit/" 
-
          
     cave.non_public=boolify(non_public)
     cave.official_name=official_name[0]
@@ -886,13 +871,13 @@ def read_cave(filename, mvf=None, cave=None):
         cave.extent=extent[0]
     cave.survex_file=survex_file[0]
     cave.description_file=description_file[0]
-    # cave.url=url[0] # set algorithically:
+    # cave.url=url[0] # set algorithically now:
     cave.url = f"{cave.areacode}/{cave.number()}/{cave.number()}.html"
     check_directory(cave.areacode, cave.number(), cave.url, cave)
 
     slug = check_slug(cave.areacode, cave.kataster_number, cave.unofficial_number, cave.url) #NB cave.slug is not a field on Cave
                
-    # Thsi whole way of doing entrances can be replaced by simply knowing formthe entrance_data filename what the cave is.
+    # Thsi whole way of doing entrances can be replaced by simply knowing the entrance_data filename what the cave is. to do.
     entrances = getXML(cavecontents, "entrance", context=context)
     do_entrances()
     # print(f"- {entrances_xslug=}")
author	Philip Sargent <philip.sargent@gmail.com>	2024-07-02 12:23:33 +0300
committer	Philip Sargent <philip.sargent@gmail.com>	2024-07-02 12:23:33 +0300
commit	e4d3e33dec40fe9b494c46a486172055a2e80e00 (patch)
tree	5579c0a87a2a314bed31f95930d3e95008bd04a2 /parsers/caves.py
parent	208c0ffa1f1d3af3959e2bec8f837d6c03935799 (diff)
download	troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.gz troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.bz2 troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.zip