debugging Cave page links..

author: Philip Sargent <philip.sargent@gmail.com> 2023-09-11 20:38:14 +0300
committer: Philip Sargent <philip.sargent@gmail.com> 2023-09-11 20:38:14 +0300
commit: d323ff270083ead337f8297653bb833100306999 (patch)
tree: cb234cd494092c14f5f878bbc2cf060b8a5f85a9 /parsers/caves.py
parent: 47db19f1a24d22610de2a5782737650f4b0941dc (diff)
download: troggle-d323ff270083ead337f8297653bb833100306999.tar.gz
troggle-d323ff270083ead337f8297653bb833100306999.tar.bz2
troggle-d323ff270083ead337f8297653bb833100306999.zip
1 files changed, 41 insertions, 25 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 70f92cb..974f73e 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -27,6 +27,9 @@ So is the first thing that creates tables.
 """
 
 todo = """  
+- When reading cave data, to start off wit we do not know the cave id (slug) so we can't give a useful url in
+  the error message, but we do have the filename. Systematize this, and the same thing with reading entrance files.
+  
 - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
    So we will need a separate file-editing capability just for this configuration file ?!
    
@@ -35,15 +38,15 @@ todo = """
 - rewrite archaic regex
   re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
   in modern form and pre-compile it. 
-
-- Semi-automagically import all the 1627- pending caves and create HTML files for them to be
-  edited individually. (These are caves we only know about because we have German survex files.)
    
 - crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a 
   non null parent, But this is not true.   The only solution we have found is to let it crash, then 
   stop and restart MariaDB (requires a logon able to sudo)   and then restart the databasereset.py 
-  again. (status as of July 2022)
+  again. (status as of July 2022). May not happen now that class Area is removed (Sept.2023).
 """
+AREACODES = {"1623", "1624", "1626", "1627"} # NB set not dict
+ARGEAREAS = {"1626", "1627"}
+
 entrances_xslug = {}
 caves_xslug = {}
 areas_xslug = {}
@@ -105,7 +108,7 @@ def create_new_cave(svxpath, msg=None):
     :loser: repo which is not recognised as a known cave.
     ALSO called by survex parser when it finds a cave it doesn't recognise
     """
-    # e.g. svxpath = "caves-1623/666/antig"
+    # e.g. svxpath = "caves-1623/666/beast" .svx
     print(f"Create new cave at {svxpath} - {msg}")
     #
     survex_file = svxpath + ".svx"
@@ -117,16 +120,13 @@ def create_new_cave(svxpath, msg=None):
     if a[0:3] == "162":
         areacode = a[0:4]
         url = f"{areacode}/{caveid}.html"  # Note we are appending the .html as we are believe in backwards compatability.
-        #url = f"{areacode}/{a[5:]}.html"  # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
     else: 
         print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'.  Surely it should start 'caves-162*'? {msg}")
         areacode = "1623"
         url = f"1623/{caveid}.html"
-        #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
-        
+         
     k = f"{areacode}-{caveid}"
 
-
     caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode) 
     if caves:
         message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}"
@@ -149,14 +149,15 @@ def create_new_cave(svxpath, msg=None):
     cave.save()
     return cave
 
-def do_ARGE_cave(slug, caveid, url, areacode, svxid):
-    """Only called by survex parser. 
+def do_ARGE_cave(slug, caveid, svxurl, areacode, svxid):
+    """Only called by survex parser, NOT the cave parser.
     Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have.
     We already know the survex file.
-    We already know that it doesn't exist... though there are bugs..
+    We already know that the cave doesn't exist... though there are bugs..
     """
     
     default_note = "This is an ARGE cave where we only have the survex file and no other information"
+    url = f"{areacode}/{caveid}.html"
 
     urltest = Cave.objects.filter(url=url)
     if urltest:
@@ -171,11 +172,21 @@ def do_ARGE_cave(slug, caveid, url, areacode, svxid):
         DataIssue.objects.create(parser="caves", message=message, url=url)
         print(message)
         return numtest[0]
-
+    
+    sv = Path(settings.SURVEX_DATA, svxid + ".svx")
+    if sv.is_file:
+        with open(sv, "r") as s:
+            line1 = s.readline()
+            line2 = s.readline()
+            line3 = s.readline()
+    else:
+        print(f"not correct {sv}", file=sys.stderr)
+        
     cave = Cave( 
-    underground_description="ARGE cave.",
-    survex_file= f"{svxid}.svx", # or is this svxurl ?!
-    # url=url, No, the url spplied is that of the survexfile not of the cave file, e.g. /1626/254/254
+    underground_description="ARGE cave.\n3 lines of the survexfile:\n" + line1 +line2 +line3,
+    unofficial_number="ARGE",
+    survex_file= svxurl,
+    url=url, 
     notes=default_note,
     areacode=areacode,
     )
@@ -427,7 +438,7 @@ def read_entrance(filename, ent=None):
         DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
         print(message)
         
-    lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=context)
+    lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")
 
     alt = getXMLmax1("alt")
     approach = getXMLmax1("approach")
@@ -611,12 +622,12 @@ def read_cave(filename, cave=None):
     
     # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
     fn = settings.CAVEDESCRIPTIONS / filename
-    context = filename
+    context = f"/cave_data/{filename}_edit"
     
     # print(f" - Reading Cave from cave descriptions file {fn}")
     if not fn.exists():
         message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
-        DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
         print(message)
         return None
 
@@ -626,7 +637,7 @@ def read_cave(filename, cave=None):
 
     if len(cavecontentslist) != 1:
         message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
-        DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
         print(message)
         return None
         
@@ -634,10 +645,11 @@ def read_cave(filename, cave=None):
     slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
     if len(slugs) > 1:
         message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}. Ignoring all except first."
-        DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}_edit/")
+        DataIssue.objects.create(parser="caves", message=message, url=context)
         print(message)
     slug = slugs[0]
-
+    context = url=f"/{slug[0:4]}/{slug}_cave_edit/"
+ 
     non_public = getXMLmax1("non_public")
     official_name = getXMLmax1("official_name")
     kataster_code = getXMLmax1("kataster_code")
@@ -668,7 +680,7 @@ def read_cave(filename, cave=None):
             kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up
             for k in kaves:
                 message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
-                DataIssue.objects.create(parser="caves", message=message)
+                DataIssue.objects.create(parser="caves", message=message, url=context)
                 print(message)
             for k in kaves:
                 if k.slug() is not None:
@@ -679,6 +691,7 @@ def read_cave(filename, cave=None):
     # From here on the code applies to both edited and newly-imported caves (mostly!)
     do_caveslugstuff() # needs cave!=None
     
+    
     cave.non_public=boolify(non_public)
     cave.official_name=official_name[0]
     cave.kataster_code=kataster_code[0]
@@ -701,11 +714,14 @@ def read_cave(filename, cave=None):
             
     areas = getXML(cavecontents, "area", context=context) # can be multiple <area> tags
     for area_slug in areas:
-        if area_slug in ["1623", "1624", "1626", "1627"]: # ignore sub areas which are in another <area> tag
+        if area_slug in AREACODES: # ignore sub areas which are in another <area> tag
             cave.areacode = area_slug 
         else:
             cave.subarea = area_slug
- 
+    if not cave.areacode:
+         if slug[0:4] in AREACODES:
+            cave.areacode = slug[0:4]
+            
     entrances = getXML(cavecontents, "entrance", context=context)
     do_entrances()
     # print(f"- {entrances_xslug=}")
author	Philip Sargent <philip.sargent@gmail.com>	2023-09-11 20:38:14 +0300
committer	Philip Sargent <philip.sargent@gmail.com>	2023-09-11 20:38:14 +0300
commit	d323ff270083ead337f8297653bb833100306999 (patch)
tree	cb234cd494092c14f5f878bbc2cf060b8a5f85a9 /parsers/caves.py
parent	47db19f1a24d22610de2a5782737650f4b0941dc (diff)
download	troggle-d323ff270083ead337f8297653bb833100306999.tar.gz troggle-d323ff270083ead337f8297653bb833100306999.tar.bz2 troggle-d323ff270083ead337f8297653bb833100306999.zip