1 files changed, 70 insertions, 68 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index d712394..681dc48 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -22,20 +22,19 @@ It also scans the Loser repo for all the svx files, which it loads individually
 """
 
 todo = """
+- Obscure bug in the *team inheritance and rootblock initialization needs tracking down,
+  probably in the team cache which should NOT be global, but should be an instance variable of 
+  LoadingSurvex
+  
 - Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
   speed it up noticably. 
- 
-- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
   
 - Learn to use Django .select_related() and .prefetch_related() to speed things up
   https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
         
 - LoadSurvexFile() Creates a new current survexfile 
-        The survexblock passed-in is not necessarily the parent. FIX THIS.
-        
-- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
-  easier if this is a completely new file rather than an existing file.. nasty.
-        
+        The survexblock passed-in is not necessarily the survex parent. FIX THIS.
+                
 - When Olly implements LEG in the  'dump3d --legs' utility, then we can use that to get the length of
   all the legs in a survex block instead of adding them up oursleves. Which means that we can
   ignore all the  Units and offset stuff, that troggle will work with survex files with backsights,
@@ -62,27 +61,10 @@ class SurvexLeg:
     compass = 0.0
     clino = 0.0
     
-def IdentifyCave(cavepath):
-    """Given a file path for a survex file, or a survex-block path,
-    return the cave object
-        """
-    caveslist = GetCaveLookup()
-    if cavepath.lower() in caveslist:
-        return caveslist[cavepath.lower()]
-    # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
-    path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
-    if path_match:
-        sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
-        guesses = [sluggy.lower(), path_match.group(2).lower()]
-        for g in guesses:
-            if g in caveslist:
-                caveslist[cavepath] = caveslist[g]
-                return caveslist[g]
-        print(f"    ! Failed to find cave for {cavepath.lower()}")
-    else:
-        # not a cave, but that is fine.
-        # print(f'    ! No regex(standard identifier) cave match for {cavepath.lower()}')
-        return None
+        
+
+
+
 
 def datewallet(w, earliest):
     """Gets the date of the youngest survexblock associated with the wallet
@@ -141,7 +123,8 @@ def get_offending_filename(path):
     """
     return "/survexfile/" + path + ".svx"
 
-trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
+# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader..
+trip_people_cache = {}   # indexed by  survexblock, so never needs cleaning out
 def get_team_on_trip(survexblock):
     """Uses a cache to avoid a database query if it doesn't need to.
     Only used for complete team."""
@@ -165,8 +148,9 @@ def get_people_on_trip(survexblock):
     
     return list(set(people))
 
-trip_person_record = {}  # per survexblock, so robust wrt PUSH/POP begin/end
-trip_team_cache = {}  # per survexblock, so robust wrt PUSH/POP begin/end
+# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader
+trip_person_record = {}  # indexed by (survexblock, personexpedition) - so never needs cleaning out
+trip_team_cache = {}  #  indexed by  survexblock, so never needs cleaning out
 def put_person_on_trip(survexblock, personexpedition, tm):
     """Uses a cache to avoid a database query if it doesn't need to.
     Only used for a single person"""
@@ -206,18 +190,17 @@ def confirm_team_on_trip(survexblock):
     SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
     trip_team_cache[survexblock] = [] # in database now, so empty cache    
     
-def check_team_cache():
+def check_team_cache(label=None):
     global trip_team_cache
-
     message = f"! check_team_cache() called.. "
     print(message)
-
+    print(message, file=sys.stderr)
     for block in trip_team_cache:
-        message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block})  "
+        message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
         print(message)
-        
+        print(message, file=sys.stderr)
     
-person_pending_cache = {}  # per survexblock, so robust wrt PUSH/POP begin/end
+person_pending_cache = {}  # indexed per survexblock, so robust wrt PUSH/POP begin/end
 def add_to_pending(survexblock, tm):
     """Collects team names before we have a date so cannot validate against 
     expo attendance yet"""
@@ -1180,7 +1163,45 @@ class LoadingSurvex:
             self.svxprim[headpath.lower()] = primary
         return self.svxprim[headpath.lower()]
 
-    def ReportNonCaveIncludes(self, headpath, includelabel, depth):
+    def IdentifyCave(self, cavepath, svxid, depth):
+        """Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
+        return the cave object
+        
+        REWRITE ALL THIS and make a methoid on the class
+        """
+        caveslist = GetCaveLookup()
+        if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below
+            return caveslist[cavepath.lower()]
+        #     rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
+        path_match = self.rx_cave.search(cavepath) # use as Class method. 
+        if path_match:
+            sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
+            # guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626..
+            guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
+            for g in guesses:
+                if g in caveslist:
+                    caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107
+                    return caveslist[g]
+            print(f"    ! Failed to find cave for {cavepath.lower()}", file=sys.stderr)
+        else:
+            # not a cave, but that is fine.
+            if self.is_it_already_pending(cavepath, svxid, depth):
+                pass
+            else:
+                # It is too late to add it to the pending caves list here, they were already 
+                # processed in parsers/caves.py So we have to do a bespoke creation.
+                cave = create_new_cave(svxid)
+                
+                message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path)  is not a known cave.  Need to add to expoweb/cave_data/pendingcaves.txt ?  In '{includelabel}.svx' at depth:[{len(depth)}]."
+                print("\n" + message)
+                print("\n" + message, file=sys.stderr)
+                print(f"{self.pending}", end="", file=sys.stderr)
+                stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
+                    
+                print(f'    ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
+            return None
+            
+    def is_it_already_pending(self, headpath, includelabel, depth):
         """Ignore surface, kataser and gpx *include survex files"""
         if not self.pending:
             self.pending = set()
@@ -1199,7 +1220,7 @@ class LoadingSurvex:
             message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
             # print("\n"+message)
             # print("\n"+message,file=sys.stderr)
-            return
+            return True
         for i in self.ignoreprefix:
             if headpath.startswith(i):
                 message = (
@@ -1207,28 +1228,17 @@ class LoadingSurvex:
                 )
                 # print("\n"+message)
                 # print("\n"+message,file=sys.stderr)
-                return
+                return True
         caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
         if caveid in self.pending:
             # Yes we didn't find this cave, but we know it is a pending one. So not an error.
-            # print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
-            return
+            print(f'! ALREADY PENDING caveid {caveid}',file=sys.stderr)
+            return True
         id = caveid[5:]
         if id in self.pending:
-            print(f"! ALREADY PENDING {id}", file=sys.stderr)
-            return
+            print(f"! ALREADY PENDING id {id}", file=sys.stderr)
+            return True
 
-        # It is too late to add it to the pending caves list here, they were already 
-        # processed in parsers/caves.py So we have to do a bespoke creation.
-        svxpath= includelabel
-        cave = create_new_cave(svxpath)
-        
-        message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path)  is not a known cave.  Need to add to expoweb/cave_data/pendingcaves.txt ?  In '{includelabel}.svx' at depth:[{len(depth)}]."
-        print("\n" + message)
-        print("\n" + message, file=sys.stderr)
-        print(f"{self.pending}", end="", file=sys.stderr)
-        stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
-                
     def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         Creates a new current survexfile and valid .survexdirectory
@@ -1267,15 +1277,9 @@ class LoadingSurvex:
         newfile.save()  # until we do this there is no internal id so no foreign key works
         self.currentsurvexfile = newfile
         newfile.primary = self.set_primary(headpath)
-
-        # REPLACE all this IdentifyCave() stuff with GCaveLookup ?
-        cave = IdentifyCave(headpath)  # cave already exists in db
-        if not cave:
-            # probably a surface survey, or a cave in a new area 
-            # e.g. 1624 not previously managed, and not in the pending list
-            self.ReportNonCaveIncludes(headpath, svxid, depth)
-        #try again
-        cave = IdentifyCave(headpath)  
+        
+        # refactor this ! 
+        cave = self.IdentifyCave(headpath, svxid, depth)  # cave already exists in db?
         if cave:
             newfile.cave = cave
             # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
@@ -1530,7 +1534,7 @@ class LoadingSurvex:
         slengthtotal = 0.0
         nlegstotal = 0
         self.relativefilename = path
-        IdentifyCave(path)  # this will produce null for survex files which are geographic collections
+        #self.IdentifyCave(path, svxid, depth)  # this will produce null for survex files which are geographic collections
 
         self.currentsurvexfile = survexblock.survexfile
         self.currentsurvexfile.save()  # django insists on this although it is already saved !?
@@ -2198,7 +2202,6 @@ def FindAndLoadSurvex():
     )
     print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
 
-    check_team_cache()
     s_date = date.today().isoformat().replace('-','.')
     print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
     print(f"  - (except: {excpts})", file=sys.stderr)
@@ -2250,8 +2253,6 @@ def FindAndLoadSurvex():
 
     flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
     fcollate.write(f";*edulcni {UNSEENS}\n")
-    
-    check_team_cache()
 
     mem1 = get_process_memory()
     flinear.write(f"\n    - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
@@ -2294,6 +2295,7 @@ def FindAndLoadSurvex():
     # ps = pstats.Stats(pr2, stream=f)
     # ps.sort_stats(SortKey.CUMULATIVE)
     # ps.print_stats()
+
     mem1 = get_process_memory()
     print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
     print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)