2 files changed, 75 insertions, 173 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 3c5d98e..ff87bcd 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -1,18 +1,27 @@
-# -*- coding: utf-8 -*-
 import os
 import re
 
 from django.conf import settings
 
-import troggle.core.models as models
+from troggle.core.models import DataIssue, get_process_memory
 import troggle.core.models_caves as models_caves
 
 def readcaves():
-    # Clear the cave data issues as we are reloading
-    models.DataIssue.objects.filter(parser='caves').delete()
+    print(" - Deleting Caves and Entrances")
+    models_caves.Cave.objects.all().delete()
+    models_caves.Entrance.objects.all().delete()
+    # Clear the cave data issues and the caves as we are reloading
+    DataIssue.objects.filter(parser='caves').delete()
+    DataIssue.objects.filter(parser='entrances').delete()
 
+    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
+    area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
+    area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
+
+    print (" - Setting pending caves")
     # Do this first, so that these empty entries are overwritten as they get properly created.
     # For those caves which do not have XML files even though they exist and have surveys
+    # also needs to be done *before* entrances so that the entrance-cave links work properly.
     forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", 
             "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", 
             "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", 
@@ -21,32 +30,30 @@ def readcaves():
         try:
             cave = models_caves.Cave(
                     unofficial_number = k, 
-                    official_name = "Mislaid cave - created as empty object. No XML available at this time.",
+                    official_name = "Pending cave write-up - creating as empty object. No XML file available yet.",
                     notes="_Survex file found in loser repo but no description in expoweb")
             if cave:
-                print("{} {}".format(cave.unofficial_number, cave.official_name))
+                cave.save() # must save to have id before foreign keys work
+                cave.area = area_1623
                 cave.save()
+                message = " ! {} {}".format(cave.unofficial_number, cave.official_name)
+                DataIssue.objects.create(parser='caves', message=message)
+                print(message)
             else:
                 print("Failed to create cave {} ".format(k))
         except:
-            message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k)
-            models.DataIssue.objects.create(parser='caves', message=message)
+            message = " ! Error. Cannot create pending cave, pending-id:{}".format(k)
+            DataIssue.objects.create(parser='caves', message=message)
             print(message)
-
-    # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
-    area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
-    area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
-    print(" - Reading Entrances")
+            raise
+    print(" - Reading Entrances from entrance descriptions xml files")
     for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
         if filename.endswith('.html'):
             readentrance(filename)
-    print (" - Reading Caves")
+    print(" - Reading Caves from cave descriptions xml files")
     for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
         if filename.endswith('.html'):
             readcave(filename)
-        
-
-
 
 def readentrance(filename):
   # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
@@ -120,7 +127,7 @@ def readentrance(filename):
                                                  primary = primary)
                     for k in kents:
                         message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) 
-                        models.DataIssue.objects.create(parser='caves', message=message)
+                        DataIssue.objects.create(parser='caves', message=message)
                         print(message)
                     for k in kents:
                         if k.slug() != None:
@@ -189,7 +196,7 @@ def readcave(filename):
                 kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
                 for k in kaves:
                     message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) 
-                    models.DataIssue.objects.create(parser='caves', message=message)
+                    DataIssue.objects.create(parser='caves', message=message)
                     print(message)
                 for k in kaves:
                     if k.slug() != None:
@@ -213,7 +220,7 @@ def readcave(filename):
                               primary = primary)
                 except:
                     message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context)
-                    models.DataIssue.objects.create(parser='caves', message=message)
+                    DataIssue.objects.create(parser='caves', message=message)
                     print(message)
                     
                 primary = False
@@ -225,7 +232,7 @@ def readcave(filename):
                     ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                 except:
                     message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)
-                    models.DataIssue.objects.create(parser='caves', message=message)
+                    DataIssue.objects.create(parser='caves', message=message)
                     print(message)
 
 
@@ -235,13 +242,13 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
         message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                            "itemname": itemname,
                                                                            "min": minItems} + context
-        models.DataIssue.objects.create(parser='caves', message=message)
+        DataIssue.objects.create(parser='caves', message=message)
         print(message)
         
     if maxItems is not None and len(items) > maxItems and printwarnings:
         message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
                                                                                "itemname": itemname,
                                                                                "max": maxItems} + context
-        models.DataIssue.objects.create(parser='caves', message=message)
+        DataIssue.objects.create(parser='caves', message=message)
         print(message)
     return items
 \ No newline at end of file
diff --git a/parsers/survex.py b/parsers/survex.py
index 8bcbea2..6d266a3 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -68,6 +68,8 @@ class LoadingSurvex():
     insp = ""
     callcount = 0
     stardata ={}
+    ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
+    ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
     includedfilename =""
     currentsurvexblock = None
     currentsurvexfile = None
@@ -308,7 +310,8 @@ class LoadingSurvex():
     def IdentifyCave(self, cavepath):
         if cavepath.lower() in self.caveslist:
             return self.caveslist[cavepath.lower()]
-
+        # TO DO - some of this is already done in generating self.caveslist so simplify this
+        # esp. as it is in a loop.
         path_match = self.rx_cave.search(cavepath)
         if path_match:
             sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -329,6 +332,19 @@ class LoadingSurvex():
             self.svxdirs[headpath.lower()] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile) 
         return self.svxdirs[headpath.lower()]
 
+    def ReportNonCaveIncludes(self, headpath, includelabel):
+        """Ignore surface, kataser and gps *include survex files
+        """
+        if headpath in self.ignorenoncave:
+            return
+        for i in self.ignoreprefix:
+            if headpath.startswith(i):
+                return
+        message = " ! {} is not a cave. (while creating {} sfile & sdirectory)".format(headpath, includelabel)
+        print(message)
+        print(message,file=sys.stderr)
+        models.DataIssue.objects.create(parser='survex', message=message)
+        
     def LoadSurvexFile(self, includelabel):
         """Creates SurvexFile in the database, and SurvexDirectory if needed
         with links to 'cave'
@@ -338,28 +354,37 @@ class LoadingSurvex():
         depth = " " * self.depthbegin
         print("{:2}{}   - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
 
-        newsurvexfile = models_survex.SurvexFile(path=includelabel)
+        newfile = models_survex.SurvexFile(path=includelabel)
         headpath, tail = os.path.split(includelabel)
-        newsurvexdirectory = self.GetSurvexDirectory(headpath)
-        newsurvexfile.survexdirectory = newsurvexdirectory
+        newdirectory = self.GetSurvexDirectory(headpath)
+        if not newdirectory:
+            message = " ! 'None' SurvexDirectory returned from GetSurvexDirectory({})".format(headpath)
+            print(message)
+            print(message,file=sys.stderr)
+            models.DataIssue.objects.create(parser='survex', message=message)
+        newfile.survexdirectory = newdirectory
 
         cave = self.IdentifyCave(headpath)
         if cave:
-            newsurvexdirectory.cave = cave
-            newsurvexfile.cave   = cave
-        # else:
-            # message = " ! Cannot identify cave from {} when creating sfile & sdirectory".format(headpath)
-            # print(message)
-            # print(message,file=sys.stderr)
-            # models.DataIssue.objects.create(parser='survex', message=message)
+            newdirectory.cave = cave
+            newfile.cave   = cave
+        else:
+            self.ReportNonCaveIncludes(headpath, includelabel)
+
+
+        if not newfile.survexdirectory:
+            message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+            print(message)
+            print(message,file=sys.stderr)
+            models.DataIssue.objects.create(parser='survex', message=message)
         self.currentsurvexfile.save() # django insists on this although it is already saved !?
         try:
-            newsurvexdirectory.save()
+            newdirectory.save()
         except:
-            print(newsurvexdirectory, file=sys.stderr)
-            print(newsurvexdirectory.primarysurvexfile, file=sys.stderr)
+            print(newdirectory, file=sys.stderr)
+            print(newdirectory.primarysurvexfile, file=sys.stderr)
             raise
-        self.currentsurvexfile  = newsurvexfile
+        self.currentsurvexfile  = newfile
 
     def ProcessIncludeLine(self, included):
         svxid = included.groups()[0]
@@ -418,129 +443,6 @@ class LoadingSurvex():
             if cave:
                 survexfile.cave = cave
 
-    def RecursiveRecursiveLoad(self, survexblock, survexfile, fin):
-        """Follows the *include links in all the survex files from the root file 1623.svx
-        and reads in the survex blocks, other data and the wallet references (scansfolder) as it
-        goes. This part of the data include process is where the maximum memory is used and where it
-        crashes on memory-constrained machines. Begin-end blocks may also be nested.
-        """
-        # self.LoadSurvexSetup(survexblock, survexfile)
-        # insp =self.insp
-        # previousnlegs = 0
-
-        # svxlines = fin.read().splitlines()
-        # # cannot close file now as  may be recursively called with the same fin if nested *begin-end
-        
-        # for svxline in svxlines:
-            # self.lineno += 1
-            # sline, comment = self.rx_comment.match(svxline.strip()).groups()
-            # if comment:
-                # self.LoadSurvexComment(survexblock, comment)
-            # if not sline:
-                # continue # skip blank lines
-
-            # # detect the star command
-            # mstar = self.rx_star.match(sline)
-            # if mstar: # yes we are reading a *cmd
-                # cmd, args = mstar.groups()
-                # cmd = cmd.lower()
-                # if re.match("include$(?i)", cmd):
-                    # cave = self.IdentifyCave(args) 
-                    # if cave:
-                            # survexfile.cave = cave
-
-                    # includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
-                    # print((insp+'   - INCLUDE-go path found, including - ' + args))
-
-                    # includesurvexfile = models_survex.SurvexFile(path=includepath)
-                    # includesurvexfile.save()
-                    # includesurvexfile.SetDirectory()
-                    # if includesurvexfile.exists():
-                        # survexblock.save()
-                        # self.insp += "> "
-                        # #--------------------------------------------------------
-                        # fininclude = includesurvexfile.OpenFile()
-                        # self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude)
-                        # fininclude.close()
-                        # #--------------------------------------------------------
-                        # self.insp = self.insp[2:]
-                        # insp = self.insp
-                        # print((insp+'   - INCLUDE-return from include - ' + includepath))
-                    # else:
-                        # print((insp+'    ! ERROR *include file not found for %s' % includesurvexfile))
-
-                # elif re.match("begin$(?i)", cmd):
-                    # # On a *begin statement we start a new survexblock.
-                    # # There should not be any *include inside  a begin-end block, so this is a simple
-                    # # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
-                    # if args:
-                        # newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
-                        # # Try to find the cave in the DB if not use the string as before
-                        # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
-                        # if path_match:
-                            # pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
-                            # # print(insp+pos_cave)
-                            # cave = models_caves.getCaveByReference(pos_cave)
-                            # if cave:
-                                # survexfile.cave = cave
-                        # else:
-                            # print((insp+'    - No match (b) for %s' % newsvxpath))
-
-                        # previousnlegs = self.survexlegsnumber
-                        # name = args.lower()
-                        # print(insp+'   - Begin found for:{}, creating new SurvexBlock '.format(name))
-                        # # the recursive call re-reads the entire file. This is wasteful. We should pass in only 
-                        # # the un-parsed part of the file.
-                        # survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, 
-                                # survexpath=survexblock.survexpath+"."+name, 
-                                # cave=survexfile.cave, survexfile=survexfile, 
-                                # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
-                        # survexblockdown.save()
-                        # survexblock.save()
-                        # survexblock = survexblockdown
-                        # print(insp+"   - BLOCK-enter nested *begin/*end block: '{}'".format(name))
-                        # self.insp += "> "
-                        # #--------------------------------------------------------
-                        # self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin)
-                        # #--------------------------------------------------------
-                        # # do not close the file as there may be more blocks in this one
-                        # # and it is re-read afresh with every nested begin-end block.
-                        # self.insp = self.insp[2:]
-                        # insp = self.insp
-                    # else:
-                        # self.depthbegin += 1
-
-                # elif re.match("end$(?i)", cmd):
-                    # if self.depthbegin:
-                        # print(insp+"   - BLOCK-return from nested *begin/*end block: '{}'".format(args))
-                        # self.depthbegin -= 1
-                    # else:
-                        # legsinblock = self.survexlegsnumber - previousnlegs
-                        # print(insp+"  - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
-                        # survexblock.legsall = legsinblock
-                        # survexblock.save()
-                        # return
-                # elif cmd == "ref":
-                    # self.LoadSurvexRef(survexblock, args)
-                # elif cmd == "flags":
-                    # self.LoadSurvexFlags(args, cmd)
-                # elif cmd == "data":
-                    # self.LoadSurvexDataCmd(survexblock, args)
-                # elif cmd == "set" and re.match("names(?i)", args):
-                    # pass
-                # elif re.match("date$(?i)", cmd):
-                    # self.LoadSurvexDate(survexblock, args)
-                # elif re.match("team$(?i)", cmd):
-                    # self.LoadSurvexTeam(survexblock, args)
-                # else:
-                    # self.LoadSurvexIgnore(survexblock, args, cmd)
-            # else: # not a *cmd so we are reading data OR rx_comment failed
-                # if "from" in self.stardata: # only interested in survey legs
-                    # self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
-                # else:
-                    # pass # ignore all other sorts of data
-        pass
-
     def LinearRecursiveLoad(self, survexblock, path, svxlines):
         """Loads a single survex file. Usually used to import all the survex files which have been collated
         into a single file. Loads the begin/end blocks recursively.
@@ -702,7 +604,9 @@ class LoadingSurvex():
                         flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
                         push = includesurvexfile.path.lower()
                         self.stackinclude.append(push)
+                        #-----------------
                         self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+                        #-----------------
                         pop = self.stackinclude.pop()
                         if pop != push:
                             message = "!!!!!!!    ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
@@ -710,8 +614,9 @@ class LoadingSurvex():
                             print(message,file=flinear)
                             print(message,file=sys.stderr)
                             models.DataIssue.objects.create(parser='survex', message=message)
-                        flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path))
-                        fcollate.write(";*edulcni {}\n".format(includesurvexfile.path))
+                        includesurvexfile = None
+                        flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
+                        fcollate.write(";*edulcni {}\n".format(pop))
                         fininclude.close()
                         self.depthinclude -= 1
                         #--------------------------------------------------------
@@ -820,16 +725,6 @@ def FindAndLoadSurvex(survexblockroot):
     mem1 = models.get_process_memory()
     svx_load = None
 
-    # print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr)
-    # svxlrl = LoadingSurvex()
-
-    # finroot = survexfileroot.OpenFile()
-    # svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
-    # finroot.close()
-    # survexlegsnumber = svxlrl.survexlegsnumber
-    # survexlegsalllength = svxlrl.survexlegsalllength
-    # svxlrl = None
-    
     # Close the logging file, Restore sys.stdout to our old saved file handle
     sys.stdout.close()
     print("+", file=sys.stderr)