Merge branch 'python3-new' of ssh://expo.survex.com/home/expo/troggle into python3-new

author: Martin Green <martin.speleo@gmail.com> 2022-08-01 16:05:17 +0200
committer: Martin Green <martin.speleo@gmail.com> 2022-08-01 16:05:17 +0200
commit: c1aaf0788524e69872162362b73504d89737c55d (patch)
tree: 25246b317ca209f236440b5d1a70a19baa786d2c /parsers/survex.py
parent: f491264b9ecb9724be99419bbc5331f9ca492bac (diff)
parent: eed35d01a86a365d464a6b1584d12b9cb40652f6 (diff)
download: troggle-c1aaf0788524e69872162362b73504d89737c55d.tar.gz
troggle-c1aaf0788524e69872162362b73504d89737c55d.tar.bz2
troggle-c1aaf0788524e69872162362b73504d89737c55d.zip
1 files changed, 30 insertions, 16 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index 7b94005..39d42dc 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -37,7 +37,6 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected
 - LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         
-- rx_qm recognises only simple survey point ids. EXTEND to cover more naming formats and test fully for 2023
 '''
 survexblockroot = None
 ROOTBLOCK = "rootblock"
@@ -131,8 +130,8 @@ class LoadingSurvex():
 
     rx_cave    = re.compile(r'(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)')
     rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
-    rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
-    rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
+    rx_comminc = re.compile(r'(?i)^\|\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
+    rx_commcni = re.compile(r'(?i)^\|\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
     rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
     rx_commref = re.compile(r'(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
     rx_wallet  = re.compile(r'(?i)^\s*wallet[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
@@ -178,13 +177,14 @@ class LoadingSurvex():
     callcount = 0
     caverncount = 0
     ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
-    ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
+    ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"]
     includedfilename =""
     currentsurvexblock = None
     currentsurvexfile = None
     currentcave = None
     caverndate = None
     currentpersonexped = []
+    pending = []
 
     def __init__(self):
         self.caveslist = GetCaveLookup()
@@ -690,9 +690,7 @@ class LoadingSurvex():
     def IdentifyCave(self, cavepath):
         if cavepath.lower() in self.caveslist:
             return self.caveslist[cavepath.lower()]
-        # TO DO - some of this is already done in generating self.caveslist so simplify this
-        # esp. as it is in a loop.
-        # TO DO recognise cave if different name, e.g. gruenstein == 281
+        # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
         path_match = self.rx_cave.search(cavepath)
         if path_match:
             sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -724,31 +722,46 @@ class LoadingSurvex():
     def ReportNonCaveIncludes(self, headpath, includelabel, depth):
         """Ignore surface, kataser and gpx *include survex files
         """
+        if not self.pending:
+            self.pending = set()
+            fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt")
+            if fpending.is_file():
+                with open(fpending, "r") as fo:
+                    cids = fo.readlines()
+                for cid in cids:
+                    self.pending.add(cid.rstrip('\n').upper())
+
         if headpath in self.ignorenoncave:
-            #message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
+            message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
             #print("\n"+message)
             #print("\n"+message,file=sys.stderr)
             return
         for i in self.ignoreprefix:
             if headpath.startswith(i):
                 message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
-                #print("\n"+message)
-                #print("\n"+message,file=sys.stderr)
+                # print("\n"+message)
+                # print("\n"+message,file=sys.stderr)
                 return
-        message = f" ! Error: FAILURE '{headpath}' while creating '{includelabel}' at depth:[{depth}]. Not a cave or in the ignore list:'{self.ignoreprefix}'"
-        # getting this triggered for gpx/2018 (cavern error) but not for gpx/2017 (no content).
+        caveid = f'{headpath[6:10]}-{headpath[11:]}'.upper()
+        if caveid in self.pending:
+           # Yes we didn't find this cave, but we know it is a pending one. So not an error.
+           # print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
+           return
+            
+        message = f" ! Error: not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}].  ignore prefix list:'{self.ignoreprefix}'"
         print("\n"+message)
         print("\n"+message,file=sys.stderr)
         DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath))
         print(f' # datastack in  LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr)
         for dict in self.datastack:
-            print(f'{dict["type"].upper()}   ', end="",file=sys.stderr)
+            print(f'<{dict["type"].upper()}   >', end="",file=sys.stderr)
         
 
     def LoadSurvexFile(self, svxid):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
         Creates a new current survexfile and valid .survexdirectory
+        Inspects the parent folder of the survexfile and uses that to decide if this is a cave we know
         The survexblock passed-in is not necessarily the parent. FIX THIS.
         """
         if debugprint:
@@ -780,7 +793,7 @@ class LoadingSurvex():
         if cave:
             newdirectory.cave = cave
             newfile.cave   = cave
-            # print(f"\n - New directory {newdirectory} for cave {newdirectory.cave}",file=sys.stderr)
+            # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
         else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list
             self.ReportNonCaveIncludes(headpath, svxid, depth)
             
@@ -862,6 +875,7 @@ class LoadingSurvex():
             
         included = self.rx_comminc.match(comment)
         # ;*include means 'we have been included'; whereas *include means 'proceed to include' 
+        # bug, If the original survex file contians the line ;*include then we pick it up ! So fix our special code to be ;|*include
         if included:
             self.ProcessIncludeLine(included)
 
@@ -1211,7 +1225,7 @@ class LoadingSurvex():
                         #--------------------------------------------------------
                         self.depthinclude += 1
                         fininclude = open(fullpath,'r')
-                        fcollate.write(";*include {}\n".format(includepath))
+                        fcollate.write(";|*include {}\n".format(includepath))
                         flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
                         push = includepath.lower()
                         self.includestack.append(push)
@@ -1226,7 +1240,7 @@ class LoadingSurvex():
                             print(message,file=sys.stderr)
                             DataIssue.objects.create(parser='survex', message=message,  url=get_offending_filename(path))
                         flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
-                        fcollate.write(";*edulcni {}\n".format(pop))
+                        fcollate.write(";|*edulcni {}\n".format(pop))
                         fininclude.close()
                         self.depthinclude -= 1
                         #--------------------------------------------------------
author	Martin Green <martin.speleo@gmail.com>	2022-08-01 16:05:17 +0200
committer	Martin Green <martin.speleo@gmail.com>	2022-08-01 16:05:17 +0200
commit	c1aaf0788524e69872162362b73504d89737c55d (patch)
tree	25246b317ca209f236440b5d1a70a19baa786d2c /parsers/survex.py
parent	f491264b9ecb9724be99419bbc5331f9ca492bac (diff)
parent	eed35d01a86a365d464a6b1584d12b9cb40652f6 (diff)
download	troggle-c1aaf0788524e69872162362b73504d89737c55d.tar.gz troggle-c1aaf0788524e69872162362b73504d89737c55d.tar.bz2 troggle-c1aaf0788524e69872162362b73504d89737c55d.zip