4 files changed, 135 insertions, 55 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 3549c75..9d95f32 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -13,23 +13,16 @@ from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSl
 '''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
 and creating the various Cave, Entrance and necessary Area objects.
 
-This is the first import that happens after the dabase is reinitialised. 
+This is the first import that happens after the database is reinitialised. 
 So is the first thing that creates tables.
 
-BUT in Django 2.0 and later we cannot do any queries on data we have just entered 
-because this is all happening inside one transaction. Bummer.
-
-django.db.transaction.TransactionManagementError: 
-An error occurred in the current transaction. You can't execute queries until the end of the 'atomic' block.
 '''
 
-todo='''- db Update does not work when a cave id is in the pending list but a proper cave description file exists
-   and is being imported. It should work. But currently Django aborts and he file is not read in.
-   
+todo='''  
  - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
    So we will need a separate file-editing capability just for this configuration file ?!
    
-- crashes on MariaDB on server when deleting Caves and complains Area needs a non null parent, But this is not true.
+- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
   The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
   and then restart the databasereset.py again. (status as of July 2022)
 '''
@@ -91,6 +84,15 @@ def do_pending_cave(k, url, area):
     in expoweb/cave_data/1623-"k".html 
     '''
     slug = k
+    
+    g = GetCaveLookup()
+    if slug in g:
+        message = f" ! {k} cave listed in pendingcaves.txt already exists."
+        DataIssue.objects.create(parser='caves', message=message, url=url)
+        print(message)
+        return
+
+    
 
     default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n" 
     default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " 
@@ -118,7 +120,7 @@ def do_pending_cave(k, url, area):
     cave = Cave(
             unofficial_number = k, 
             underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
-            survex_file = f"caves-{area.short_name}/{k}/{k}.svx",
+            survex_file = f"caves-{area.short_name}/{k[5:]}/{k[5:]}.svx",
             url = url,
             notes = default_note)
     if cave:
@@ -465,27 +467,6 @@ def readcaves():
         print(" - Saving Area 1626")
         area_1626.save()
 
-        print (" - Setting pending caves")
-        # Do this first, so that these empty entries are overwritten as they get properly created.
-
-        for k in pending:
-            
-            area = area_1623
-            areanum = k[0:4]
-            url = areanum + "/" +  k[5:] # Note we are not appending the .htm as we are modern folks now.
-            if areanum == "1623":
-                area = area_1623
-            if areanum == "1624":
-                area = area_1624
-            if areanum == "1626":
-                area = area_1626
-            try:    
-                do_pending_cave(k[5:], url, area)
-            except:
-                message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
-                DataIssue.objects.create(parser='caves', message=message)
-                print(message)
-                raise
 
     with transaction.atomic():
         print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
@@ -505,4 +486,27 @@ def readcaves():
 
     print (" - Setting up all the variously useful alias names")
     mycavelookup = GetCaveLookup()
+    
+    print (" - Setting pending caves")
+    # Do this last, so we can detect if they are created and no longer 'pending'
+
+    for k in pending:
+        
+        area = area_1623
+        areanum = k[0:4]
+        url = areanum + "/" +  k[5:] # Note we are not appending the .htm as we are modern folks now.
+        if areanum == "1623":
+            area = area_1623
+        if areanum == "1624":
+            area = area_1624
+        if areanum == "1626":
+            area = area_1626
+        try:    
+            do_pending_cave(k, url, area)
+        except:
+            message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
+            DataIssue.objects.create(parser='caves', message=message)
+            print(message)
+            raise
+
 
diff --git a/parsers/drawings.py b/parsers/drawings.py
index 88a6ca3..4b3e44d 100644
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -50,7 +50,7 @@ def find_dwg_file(dwgfile, path):
                 scansfile = scansfilel[0]
 
         if wallet:
-            dwgfile.manywallets.add(wallet)
+            dwgfile.dwgwallets.add(wallet)
         if scansfile:
             dwgfile.scans.add(scansfile)
     
diff --git a/parsers/scans.py b/parsers/scans.py
index fdded82..b78f76f 100644
--- a/parsers/scans.py
+++ b/parsers/scans.py
@@ -1,12 +1,15 @@
 import sys
 import os
+import subprocess
 import types
 import stat
 import csv
 import re
 import datetime
+import shutil, filecmp
 
 from functools import reduce
+from pathlib import Path
 
 import settings
 from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
@@ -18,7 +21,9 @@ from troggle.core.utils import save_carefully, GetListDir
 
 contentsjson = "contents.json"
 indexhtml = "walletindex.html"
+git = settings.GIT
 
+# to do: create a 'low priority' field, so that any such wallet does not appear in summary reports
 wallet_blank_json = {
  "cave": "", 
  "date": "", 
@@ -54,6 +59,22 @@ wallet_blank_html = '''<html><body><H1>Wallet WALLET</H1>
 </UL>
 </body></html>
 '''
+                    
+def CheckEmptyDate(wallet):
+    '''If date is not set, get it from a linked survex file. If several, pick the earliest.
+    
+    Maybe also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
+    '''
+    return
+    
+def CheckEmptyPeople(wallet):
+    '''If people list is empty, copy them from the survex files: all of them
+   
+    To be a Troggle model change; a many:many relationship between wallets and people,
+    as well as being a list in the JSON file (which is the permanent repository). We want the many:many
+    relationship so that we can filter wallets based on a person.
+    '''
+    return
 
 def LoadListScansFile(wallet):
     gld = [ ]
@@ -73,7 +94,45 @@ def LoadListScansFile(wallet):
             if c>=10:
                 print(".", end='')
                 c = 0
+def CopyWalletData(wallet):
+    '''Copies all the contents.json to a parallel set of folders in the drawings repo
+    refreshes everything during a ful import, but it shoudl all be up to date as every time
+    wallet data gets saved it should also be copied across and committed.
+    '''
+    year = wallet.walletname[0:4]
+    destfolder = Path(settings.DRAWINGS_DATA,'walletjson', year, wallet.walletname)
+    destjson = destfolder / contentsjson
+    sourcejson = Path(wallet.fpath, contentsjson)
+    if not os.path.exists(Path(destfolder)):
+        try:
+            os.makedirs(destfolder)
+            print(f' -  created folder {destfolder}..')
+        except PermissionError:
+            print(f"CANNOT  save this JSON file.\nPERMISSIONS incorrectly set on server for this folder {destfolder}. Ask a nerd to fix this.")
+    if os.path.isfile(sourcejson):
+        try:
+            if not os.path.isfile(destjson) or not filecmp.cmp(sourcejson, destjson):
+                shutil.copy(sourcejson, destjson)
+                print(f' -  Copied {sourcejson} to {destjson}')
+                dr_add = subprocess.run([git, "add", contentsjson], cwd=destfolder, capture_output=True, text=True)
+                if dr_add.returncode != 0:
+                    msgdata = 'Ask a nerd to fix this.\n\n' + dr_add.stderr + '\n\n' + dr_add.stdout  + '\n\nreturn code: ' + str(dr_add.returncode)
+                    message = f'CANNOT git on server for this file {contentsjson}. Edits saved but not added to git.\n\n' + msgdata
+                    print(message)
+                else:
+                    # ideally we would commit many chnages to many wallets just once. But most of the time only a couple of files will change.
+                    dr_commit = subprocess.run([git, "commit", "-m", f'Update of {contentsjson} in wallet'], cwd=destfolder, capture_output=True, text=True)
+                    # This produces return code = 1 if it commits OK
+                    if dr_commit.returncode != 0:
+                        msgdata = 'Ask a nerd to fix this.\n\n' + dr_commit.stderr + '\n\n' + dr_commit.stdout  + '\n\nreturn code: ' + str(dr_commit.returncode)
+                        message = f'Error code with git on server for this {contentsjson}. File is copied, added to git, but NOT committed.\n\n' + msgdata
+                        print(message)
+
+        except PermissionError:
+            print(f"CANNOT  copy this JSON file.\nPERMISSIONS incorrectly set on server for this file {destjson}. Ask a nerd to fix this.")
 
+
+ 
         
 # this iterates through the scans directories (either here or on the remote server)
 # and builds up the models we can access later
@@ -109,17 +168,20 @@ def load_all_scans():
                 if fisdir:
                     wallet = Wallet(fpath=fpath, walletname=walletname)
                     # this is where we should load the contents.json for people so we can report on them later
-                    # this is where we shoudl record the year explicitly
+                    # this is where we should record the year explicitly
                     # line 347 of view/uploads.py and needs refactoring for loading contentsjson
                     wallet.save()
                     LoadListScansFile(wallet)
+                    CheckEmptyDate(wallet)
+                    CheckEmptyPeople(wallet)
+                    CopyWalletData(wallet)
         
         # what is this?
-        elif walletname != "thumbs":
-            print(f'\n - Wallet {walletname} - {fpath}')
-            wallet = Wallet(fpath=fpath, walletname=walletname)
-            wallet.save()
-            LoadListScansFile(wallet)
+        # elif walletname != "thumbs":
+            # print(f'\n - Wallet {walletname} - {fpath}')
+            # wallet = Wallet(fpath=fpath, walletname=walletname)
+            # wallet.save()
+            # LoadListScansFile(wallet)
         else:
             print(f'\n - IGNORE {walletname} - {fpath}')
         
diff --git a/parsers/survex.py b/parsers/survex.py
index 7b94005..39d42dc 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -37,7 +37,6 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected
 - LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         
-- rx_qm recognises only simple survey point ids. EXTEND to cover more naming formats and test fully for 2023
 '''
 survexblockroot = None
 ROOTBLOCK = "rootblock"
@@ -131,8 +130,8 @@ class LoadingSurvex():
 
     rx_cave    = re.compile(r'(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)')
     rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
-    rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
-    rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
+    rx_comminc = re.compile(r'(?i)^\|\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
+    rx_commcni = re.compile(r'(?i)^\|\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
     rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
     rx_commref = re.compile(r'(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
     rx_wallet  = re.compile(r'(?i)^\s*wallet[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
@@ -178,13 +177,14 @@ class LoadingSurvex():
     callcount = 0
     caverncount = 0
     ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
-    ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
+    ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"]
     includedfilename =""
     currentsurvexblock = None
     currentsurvexfile = None
     currentcave = None
     caverndate = None
     currentpersonexped = []
+    pending = []
 
     def __init__(self):
         self.caveslist = GetCaveLookup()
@@ -690,9 +690,7 @@ class LoadingSurvex():
     def IdentifyCave(self, cavepath):
         if cavepath.lower() in self.caveslist:
             return self.caveslist[cavepath.lower()]
-        # TO DO - some of this is already done in generating self.caveslist so simplify this
-        # esp. as it is in a loop.
-        # TO DO recognise cave if different name, e.g. gruenstein == 281
+        # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
         path_match = self.rx_cave.search(cavepath)
         if path_match:
             sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -724,31 +722,46 @@ class LoadingSurvex():
     def ReportNonCaveIncludes(self, headpath, includelabel, depth):
         """Ignore surface, kataser and gpx *include survex files
         """
+        if not self.pending:
+            self.pending = set()
+            fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt")
+            if fpending.is_file():
+                with open(fpending, "r") as fo:
+                    cids = fo.readlines()
+                for cid in cids:
+                    self.pending.add(cid.rstrip('\n').upper())
+
         if headpath in self.ignorenoncave:
-            #message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
+            message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
             #print("\n"+message)
             #print("\n"+message,file=sys.stderr)
             return
         for i in self.ignoreprefix:
             if headpath.startswith(i):
                 message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
-                #print("\n"+message)
-                #print("\n"+message,file=sys.stderr)
+                # print("\n"+message)
+                # print("\n"+message,file=sys.stderr)
                 return
-        message = f" ! Error: FAILURE '{headpath}' while creating '{includelabel}' at depth:[{depth}]. Not a cave or in the ignore list:'{self.ignoreprefix}'"
-        # getting this triggered for gpx/2018 (cavern error) but not for gpx/2017 (no content).
+        caveid = f'{headpath[6:10]}-{headpath[11:]}'.upper()
+        if caveid in self.pending:
+           # Yes we didn't find this cave, but we know it is a pending one. So not an error.
+           # print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
+           return
+            
+        message = f" ! Error: not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}].  ignore prefix list:'{self.ignoreprefix}'"
         print("\n"+message)
         print("\n"+message,file=sys.stderr)
         DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath))
         print(f' # datastack in  LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr)
         for dict in self.datastack:
-            print(f'{dict["type"].upper()}   ', end="",file=sys.stderr)
+            print(f'<{dict["type"].upper()}   >', end="",file=sys.stderr)
         
 
     def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
         Creates a new current survexfile and valid .survexdirectory
+        Inspects the parent folder of the survexfile and uses that to decide if this is a cave we know
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         """
         if debugprint:
@@ -780,7 +793,7 @@ class LoadingSurvex():
         if cave:
             newdirectory.cave = cave
             newfile.cave   = cave
-            # print(f"\n - New directory {newdirectory} for cave {newdirectory.cave}",file=sys.stderr)
+            # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
         else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list
             self.ReportNonCaveIncludes(headpath, svxid, depth)
             
@@ -862,6 +875,7 @@ class LoadingSurvex():
             
         included = self.rx_comminc.match(comment)
         # ;*include means 'we have been included'; whereas *include means 'proceed to include' 
+        # bug, If the original survex file contians the line ;*include then we pick it up ! So fix our special code to be ;|*include
         if included:
             self.ProcessIncludeLine(included)
 
@@ -1211,7 +1225,7 @@ class LoadingSurvex():
                         #--------------------------------------------------------
                         self.depthinclude += 1
                         fininclude = open(fullpath,'r')
-                        fcollate.write(";*include {}\n".format(includepath))
+                        fcollate.write(";|*include {}\n".format(includepath))
                         flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
                         push = includepath.lower()
                         self.includestack.append(push)
@@ -1226,7 +1240,7 @@ class LoadingSurvex():
                             print(message,file=sys.stderr)
                             DataIssue.objects.create(parser='survex', message=message,  url=get_offending_filename(path))
                         flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
-                        fcollate.write(";*edulcni {}\n".format(pop))
+                        fcollate.write(";|*edulcni {}\n".format(pop))
                         fininclude.close()
                         self.depthinclude -= 1
                         #--------------------------------------------------------