summaryrefslogtreecommitdiffstats
path: root/parsers/survex.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/survex.py')
-rw-r--r--parsers/survex.py208
1 files changed, 152 insertions, 56 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index d37fea7..2740213 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -6,7 +6,7 @@ import copy
import subprocess
from pathlib import Path
-from datetime import datetime, timedelta, date
+from datetime import datetime, timedelta, date, timezone
from django.utils.timezone import get_current_timezone
from django.utils.timezone import make_aware
@@ -39,9 +39,12 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected
'''
survexblockroot = None
+survexomitsroot = None
ROOTBLOCK = "rootblock"
+OMITBLOCK = "omitblock"
METRESINFEET = 3.28084
+stop_dup_warning = False
debugprint = False # Turns on debug printout for just one *include file
debugprinttrigger = "!"
# debugprinttrigger = "caves-1623/40/old/EisSVH"
@@ -182,7 +185,7 @@ class LoadingSurvex():
callcount = 0
caverncount = 0
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
- ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"]
+ ignorenoncave = ["caves-1623", "caves-1623/2007-NEU","caves-1626", "caves-1624", "caves-1627", "fixedpts/gps/gps00raw", ""]
includedfilename =""
currentsurvexblock = None
currentsurvexfile = None
@@ -344,7 +347,7 @@ class LoadingSurvex():
the rest is discarded after error-checking.
Now skipping the error checking - returns as soon as the leg is not one we count.
- REPLACE ALL THIS by reading the .log output of cavern for the file
+ REPLACE ALL THIS by reading the .log output of cavern for the file. But we need the lengths per Block, not by File. Hmm.
"""
invalid_clino = 180.0
invalid_compass = 720.0
@@ -457,7 +460,7 @@ class LoadingSurvex():
print(("! Clino misread in", survexblock.survexfile.path))
print((" datastar:", datastar))
print((" Line:", ls))
- message = ' ! Value Error: Clino misread in line %s in %s' % (ls, survexblock.survexfile.path)
+ message = f' ! Value Error: Clino misread in line \'{sline.lower()}\' {datastar=} {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}'
DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path))
lclino = invalid_clino
@@ -668,17 +671,24 @@ class LoadingSurvex():
datastar["tape"] = i-1
self.datastar = copy.deepcopy(datastar)
return
- elif ls[0] == "cartesian" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar" or ls[0] == "passage":
- # message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
+ elif ls[0] == "passage" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar":
+ #message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
# print(message)
- # print(message,file=sys.stderr)
- # DataIssue.objects.create(parser='survex', message=message)
+ #print(message,file=sys.stderr)
+ #DataIssue.objects.create(parser='survex', message=message)
+ self.datastar["type"] = ls[0]
+ elif ls[0] == "cartesian": # We should not ignore this ?! Default for Germans ?
+ #message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
+ # print(message)
+ #print(message,file=sys.stderr)
+ #DataIssue.objects.create(parser='survex', message=message)
self.datastar["type"] = ls[0]
else:
message = " ! - Unrecognised *data statement '{}' {}|{}".format(args, survexblock.name, survexblock.survexpath)
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path))
+ self.datastar["type"] = ls[0]
def LoadSurvexFlags(self, args):
# Valid flags are DUPLICATE, SPLAY, and SURFACE, and a flag may be preceded with NOT to turn it off.
@@ -779,11 +789,11 @@ class LoadingSurvex():
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
return
-
- message = f" ! Error: {caveid} not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}]. ignore prefix list:'{self.ignoreprefix}'"
+
+ message = f" ! Warning: cave identifier '{caveid}' (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pending.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
print("\n"+message)
print("\n"+message,file=sys.stderr)
- DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath))
+ DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(includelabel))
print(f' # datastack in LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr)
for dict in self.datastack:
print(f'<{dict["type"].upper()} >', end="",file=sys.stderr)
@@ -1190,6 +1200,7 @@ class LoadingSurvex():
and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree and detects blocks included more than once.
"""
+ global stop_dup_warning
thissvxline = 0
indent = " " * self.depthinclude
sys.stderr.flush();
@@ -1202,18 +1213,23 @@ class LoadingSurvex():
if path in self.svxfileslist:
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
- message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
- print(message)
- print(message,file=flinear)
- print("\n"+message,file=sys.stderr)
- DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
- if self.svxfileslist.count(path) > 20:
- message = " ! ERROR. Survex file already *included 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
+ if stop_dup_warning:
+ #print("D",end="", file=sys.stderr)
+ pass
+ else:
+ message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message,file=flinear)
- print(message,file=sys.stderr)
+ #print(message,file=sys.stderr)
+ DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
+ if self.svxfileslist.count(path) > 2:
+ message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
+ print(message)
+ print(message,file=flinear)
+ #print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return
+ return
self.svxfileslist.append(path)
try:
@@ -1254,7 +1270,7 @@ class LoadingSurvex():
includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
- self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath))
+ self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath), path)
self.checkUniqueness(os.path.join(settings.SURVEX_DATA, includepath))
if os.path.isfile(fullpath):
#--------------------------------------------------------
@@ -1280,7 +1296,7 @@ class LoadingSurvex():
self.depthinclude -= 1
#--------------------------------------------------------
else:
- message = " ! ERROR *include file not found for:'{}'".format(includepath)
+ message = f" ! ERROR *include file '{includepath}' not found, listed in '{fin.name}'"
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
@@ -1329,7 +1345,7 @@ class LoadingSurvex():
print(message)
- def RunSurvexIfNeeded(self,fullpath):
+ def RunSurvexIfNeeded(self,fullpath, calledpath):
now = time.time()
cav_t = now - 365*24*3600
log_t = now - 365*24*3600
@@ -1368,7 +1384,7 @@ class LoadingSurvex():
outputdir = Path(svxpath).parent
if not svxpath.is_file():
- message = f' ! BAD survex file "{fullpath}" specified in *include (somewhere).. '
+ message = f' ! BAD survex file "{fullpath}" specified in *include in {calledpath} '
DataIssue.objects.create(parser='entrances', message=message)
print(message)
return
@@ -1410,6 +1426,7 @@ class LoadingSurvex():
def FindAndLoadSurvex(survexblockroot):
"""Follows the *include links successively to find files in the whole include tree
"""
+ global stop_dup_warning
print(' - redirecting stdout to svxblks.log...')
stdout_orig = sys.stdout
# Redirect sys.stdout to the file
@@ -1419,22 +1436,21 @@ def FindAndLoadSurvex(survexblockroot):
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
collatefilename = "_" + survexfileroot.path + ".svx"
+
svx_scan = LoadingSurvex()
svx_scan.callcount = 0
svx_scan.depthinclude = 0
fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path)
- # Rather than do this check for the presence of the .log and .3d files synchronously here,
- # we should instead run this in a separate thread asynchronously.
print(" - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr)
- svx_scan.RunSurvexIfNeeded(fullpathtotop)
+ svx_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
svx_scan.checkUniqueness(fullpathtotop)
indent=""
fcollate = open(collatefilename, 'w')
mem0 = get_process_memory()
- print(" - MEM:{:7.2f} MB START".format(mem0),file=sys.stderr)
+ print(" - MEM:{:7.2f} MB START".format(mem0),file=sys.stderr)
flinear = open('svxlinear.log', 'w')
flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path))
print(" ", file=sys.stderr,end='')
@@ -1447,6 +1463,7 @@ def FindAndLoadSurvex(survexblockroot):
from pstats import SortKey
pr = cProfile.Profile()
pr.enable()
+ #print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr)
#----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
#----------------------------------------------------------------
@@ -1455,47 +1472,113 @@ def FindAndLoadSurvex(survexblockroot):
ps = pstats.Stats(pr, stream=f)
ps.sort_stats(SortKey.CUMULATIVE)
ps.print_stats()
-
+
flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
mem1 = get_process_memory()
flinear.write("\n - MEM:{:.2f} MB STOP {}\n".format(mem1,survexfileroot.path))
- flinear.write(" - MEM:{:.3f} MB USED\n".format(mem1-mem0))
- svxfileslist = svx_scan.svxfileslist
- flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
- flinear.close()
- fcollate.close()
-
- print("\n - {:,} runs of survex 'cavern' refreshing .3d files \n".format(svx_scan.caverncount),file=sys.stderr)
-
- svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
- print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
+ flinear.write(" - MEM:{:.3f} MB ADDITIONALLY USED\n".format(mem1-mem0))
+ flinear.write(" - {:,} survex files in linear include list \n".format(len(svx_scan.svxfileslist)))
+
+ print(" - {:,} runs of survex 'cavern' refreshing .3d files".format(svx_scan.caverncount),file=sys.stderr)
+ print(" - {:,} survex files from tree in linear include list".format(len(svx_scan.svxfileslist)),file=sys.stderr)
mem1 = get_process_memory()
- print(" - MEM:{:7.2f} MB END ".format(mem0),file=sys.stderr)
- print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
+ print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
+ print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
- a = []
+ #
+ # Process all the omitted files in :loser: with some exceptions
+ #
+ unseens = set()
b=[]
for p in Path(settings.SURVEX_DATA).rglob('*.svx'):
if p.is_file():
po = p.relative_to(Path(settings.SURVEX_DATA))
pox = po.with_suffix('')
- if str(pox) not in svxfileslist:
- print(f"[{pox}]", file=sys.stderr)
- a.append(pox)
+ if str(pox) not in svx_scan.svxfileslist:
+ # print(f"[{pox}]", file=sys.stderr)
+ unseens.add(pox)
else:
- print("'", end=" ", file=sys.stderr)
b.append(pox)
+
+ if len(b) != len(svx_scan.svxfileslist):
+ print(f" ! Mismatch. {len(b)} survex files found which should be {len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
+
+ excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "_unseens"]
+ removals = []
+ for x in unseens:
+ for o in excpts:
+ if str(x).strip().startswith(o):
+ removals.append(x)
+ for x in removals:
+ unseens.remove(x)
+ print(f" - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr)
+ print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
+
+ with open(Path(settings.SURVEX_DATA, '_unseens.svx'), 'w') as u:
+ u.write(f"; {len(unseens):,} survex files not *included by {settings.SURVEX_TOPNAME} (which are {len(svx_scan.svxfileslist):,} files)\n")
+ u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n")
+ u.write(f"; omitting any file beginning with {excpts}\n\n")
+ u.write(f"*begin unseens\n")
+ for x in sorted(unseens):
+ u.write(f" *include {x}\n")
+ u.write(f"*end unseens\n")
- print("=>", len(a), len(b), len(svxfileslist), file=sys.stderr)
+ survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
+
+ omit_scan = LoadingSurvex()
+ omit_scan.callcount = 0
+ omit_scan.depthinclude = 0
+ fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, '_unseens.svx')
- for i in [0,1,2,3,4,5]:
- print(f"==> [{svxfileslist[i]}]", file=sys.stderr)
+ # copy the list to prime the next pass through the files
+ omit_scan.svxfileslist = svx_scan.svxfileslist[:]
+ svx_scan.svxfileslist = [] # free memory
+ svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
+ print(" - RunSurvexIfNeeded cavern on '{}'".format(fullpathtotop), file=sys.stderr)
+ omit_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
+ omit_scan.checkUniqueness(fullpathtotop)
+
+ mem0 = get_process_memory()
+ print(" - MEM:{:7.2f} MB START '_unseens'".format(mem0),file=sys.stderr)
+ #flinear = open('svxlinear.log', 'w')
+ flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n")
+ print(" ", file=sys.stderr,end='')
+
+ finroot = open(fullpathtotop)
+ fcollate.write(";*include {}\n".format('_unseens.svx'))
+ flinear.write("{:2} {} *include {}\n".format(omit_scan.depthinclude, indent, '_unseens'))
+ stop_dup_warning = True
+ #----------------------------------------------------------------
+ omit_scan.PushdownStackScan(survexblockroot, '_unseens', finroot, flinear, fcollate)
+ #----------------------------------------------------------------
+ stop_dup_warning = False
+
+ flinear.write("{:2} {} *edulcni {}\n".format(omit_scan.depthinclude, indent, '_unseens'))
+ fcollate.write(";*edulcni {}\n".format('_unseens.svx'))
+ mem1 = get_process_memory()
+ flinear.write("\n - MEM:{:.2f} MB STOP {} OMIT\n".format(mem1,'_unseens.svx'))
+ flinear.write(" - MEM:{:.3f} MB ADDITIONALLY USED OMIT\n".format(mem1-mem0))
+ flinear.write(" - {:,} survex files in linear include list OMIT \n".format(len(omit_scan.svxfileslist)))
+
+ flinear.close()
+ fcollate.close()
- svxfileslist = [] # free memory
+ print("\n - {:,} runs of survex 'cavern' refreshing .3d files in the unseen list \n".format(omit_scan.caverncount),file=sys.stderr)
+
+ print("\n - {:,} survex files in linear include list including previously unseen ones \n".format(len(omit_scan.svxfileslist)),file=sys.stderr)
+ omit_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
+
+ mem1 = get_process_memory()
+ print(" - MEM:{:7.2f} MB END ".format(mem1),file=sys.stderr)
+ print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
+
+
+
+
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
@@ -1503,6 +1586,8 @@ def FindAndLoadSurvex(survexblockroot):
print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
svx_load = LoadingSurvex()
+ mem1 = get_process_memory()
+ print(" - MEM:{:7.2f} MB after creating empty loading object.".format(mem1),file=sys.stderr)
svx_load.survexdict[survexfileroot.survexdirectory] = []
svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot)
@@ -1525,7 +1610,7 @@ def FindAndLoadSurvex(survexblockroot):
# ps.print_stats()
print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
- print(" - MEM:{:7.3f} MB USED".format(mem1-mem0),file=sys.stderr)
+ print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
# Close the logging file, Restore sys.stdout to our old saved file handle
sys.stdout.close()
@@ -1536,12 +1621,12 @@ def FindAndLoadSurvex(survexblockroot):
legsnumber = svx_load.legsnumber
mem1 = get_process_memory()
- print(" - Number of SurvexDirectories: {}".format(len(svx_load.survexdict)))
+ print(" - Number of SurvexDirectories: {:,}".format(len(svx_load.survexdict)))
tf=0
for d in svx_load.survexdict:
tf += len(svx_load.survexdict[d])
- print(" - Number of SurvexFiles: {}".format(tf))
- print(f" - Number of Survex legs: {legsnumber}")
+ print(f" - Number of SurvexFiles: {tf:,}")
+ print(f" - Number of Survex legs: {legsnumber:,}")
svx_load = None
return legsnumber
@@ -1562,6 +1647,14 @@ def MakeSurvexFileRoot():
fileroot.survexdirectory = directoryroot # i.e. SURVEX_DATA/SURVEX_TOPNAME
fileroot.save() # mutually dependent objects need a double-save like this
return fileroot
+
+def MakeOmitFileRoot(fn):
+ """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
+ """
+ fileroot = SurvexFile(path=fn, cave=None)
+ fileroot.survexdirectory = SurvexDirectory.objects.get(path=settings.SURVEX_DATA)
+ fileroot.save()
+ return fileroot
def LoadSurvexBlocks():
@@ -1571,7 +1664,7 @@ def LoadSurvexBlocks():
SurvexDirectory.objects.all().delete()
SurvexPersonRole.objects.all().delete()
SurvexStation.objects.all().delete()
- print(" - survex Data Issues flushed")
+ print(" - survex Data Issues flushed")
DataIssue.objects.filter(parser='survex').delete()
DataIssue.objects.filter(parser='svxdate').delete()
DataIssue.objects.filter(parser='survexleg').delete()
@@ -1588,9 +1681,12 @@ def LoadSurvexBlocks():
# fix by restarting db on server
# sudo service mariadb stop
# sudo service mariadb start
-
-
survexblockroot.save()
+
+ omitsfileroot = MakeOmitFileRoot("_unseens.svx")
+ survexomitsroot = SurvexBlock(name=OMITBLOCK, survexpath="", cave=None, survexfile=omitsfileroot,
+ legsall=0, legslength=0.0)
+ survexomitsroot.save()
print(' - Loading Survex Blocks...')
memstart = get_process_memory()