summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py53
-rw-r--r--parsers/survex.py195
2 files changed, 75 insertions, 173 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 3c5d98e..ff87bcd 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -1,18 +1,27 @@
-# -*- coding: utf-8 -*-
import os
import re
from django.conf import settings
-import troggle.core.models as models
+from troggle.core.models import DataIssue, get_process_memory
import troggle.core.models_caves as models_caves
def readcaves():
- # Clear the cave data issues as we are reloading
- models.DataIssue.objects.filter(parser='caves').delete()
+ print(" - Deleting Caves and Entrances")
+ models_caves.Cave.objects.all().delete()
+ models_caves.Entrance.objects.all().delete()
+ # Clear the cave data issues and the caves as we are reloading
+ DataIssue.objects.filter(parser='caves').delete()
+ DataIssue.objects.filter(parser='entrances').delete()
+ # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
+ area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
+ area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
+
+ print (" - Setting pending caves")
# Do this first, so that these empty entries are overwritten as they get properly created.
# For those caves which do not have XML files even though they exist and have surveys
+ # also needs to be done *before* entrances so that the entrance-cave links work properly.
forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02",
"2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
"2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
@@ -21,32 +30,30 @@ def readcaves():
try:
cave = models_caves.Cave(
unofficial_number = k,
- official_name = "Mislaid cave - created as empty object. No XML available at this time.",
+ official_name = "Pending cave write-up - creating as empty object. No XML file available yet.",
notes="_Survex file found in loser repo but no description in expoweb")
if cave:
- print("{} {}".format(cave.unofficial_number, cave.official_name))
+ cave.save() # must save to have id before foreign keys work
+ cave.area = area_1623
cave.save()
+ message = " ! {} {}".format(cave.unofficial_number, cave.official_name)
+ DataIssue.objects.create(parser='caves', message=message)
+ print(message)
else:
print("Failed to create cave {} ".format(k))
except:
- message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k)
- models.DataIssue.objects.create(parser='caves', message=message)
+ message = " ! Error. Cannot create pending cave, pending-id:{}".format(k)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
-
- # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
- area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
- area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
- print(" - Reading Entrances")
+ raise
+ print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readentrance(filename)
- print (" - Reading Caves")
+ print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readcave(filename)
-
-
-
def readentrance(filename):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
@@ -120,7 +127,7 @@ def readentrance(filename):
primary = primary)
for k in kents:
message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
for k in kents:
if k.slug() != None:
@@ -189,7 +196,7 @@ def readcave(filename):
kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves:
message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
for k in kaves:
if k.slug() != None:
@@ -213,7 +220,7 @@ def readcave(filename):
primary = primary)
except:
message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context)
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
primary = False
@@ -225,7 +232,7 @@ def readcave(filename):
ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -235,13 +242,13 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname,
"min": minItems} + context
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
if maxItems is not None and len(items) > maxItems and printwarnings:
message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
"itemname": itemname,
"max": maxItems} + context
- models.DataIssue.objects.create(parser='caves', message=message)
+ DataIssue.objects.create(parser='caves', message=message)
print(message)
return items \ No newline at end of file
diff --git a/parsers/survex.py b/parsers/survex.py
index 8bcbea2..6d266a3 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -68,6 +68,8 @@ class LoadingSurvex():
insp = ""
callcount = 0
stardata ={}
+ ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
+ ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
includedfilename =""
currentsurvexblock = None
currentsurvexfile = None
@@ -308,7 +310,8 @@ class LoadingSurvex():
def IdentifyCave(self, cavepath):
if cavepath.lower() in self.caveslist:
return self.caveslist[cavepath.lower()]
-
+ # TO DO - some of this is already done in generating self.caveslist so simplify this
+ # esp. as it is in a loop.
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -329,6 +332,19 @@ class LoadingSurvex():
self.svxdirs[headpath.lower()] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile)
return self.svxdirs[headpath.lower()]
+ def ReportNonCaveIncludes(self, headpath, includelabel):
+ """Ignore surface, kataser and gps *include survex files
+ """
+ if headpath in self.ignorenoncave:
+ return
+ for i in self.ignoreprefix:
+ if headpath.startswith(i):
+ return
+ message = " ! {} is not a cave. (while creating {} sfile & sdirectory)".format(headpath, includelabel)
+ print(message)
+ print(message,file=sys.stderr)
+ models.DataIssue.objects.create(parser='survex', message=message)
+
def LoadSurvexFile(self, includelabel):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
with links to 'cave'
@@ -338,28 +354,37 @@ class LoadingSurvex():
depth = " " * self.depthbegin
print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
- newsurvexfile = models_survex.SurvexFile(path=includelabel)
+ newfile = models_survex.SurvexFile(path=includelabel)
headpath, tail = os.path.split(includelabel)
- newsurvexdirectory = self.GetSurvexDirectory(headpath)
- newsurvexfile.survexdirectory = newsurvexdirectory
+ newdirectory = self.GetSurvexDirectory(headpath)
+ if not newdirectory:
+ message = " ! 'None' SurvexDirectory returned from GetSurvexDirectory({})".format(headpath)
+ print(message)
+ print(message,file=sys.stderr)
+ models.DataIssue.objects.create(parser='survex', message=message)
+ newfile.survexdirectory = newdirectory
cave = self.IdentifyCave(headpath)
if cave:
- newsurvexdirectory.cave = cave
- newsurvexfile.cave = cave
- # else:
- # message = " ! Cannot identify cave from {} when creating sfile & sdirectory".format(headpath)
- # print(message)
- # print(message,file=sys.stderr)
- # models.DataIssue.objects.create(parser='survex', message=message)
+ newdirectory.cave = cave
+ newfile.cave = cave
+ else:
+ self.ReportNonCaveIncludes(headpath, includelabel)
+
+
+ if not newfile.survexdirectory:
+ message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+ print(message)
+ print(message,file=sys.stderr)
+ models.DataIssue.objects.create(parser='survex', message=message)
self.currentsurvexfile.save() # django insists on this although it is already saved !?
try:
- newsurvexdirectory.save()
+ newdirectory.save()
except:
- print(newsurvexdirectory, file=sys.stderr)
- print(newsurvexdirectory.primarysurvexfile, file=sys.stderr)
+ print(newdirectory, file=sys.stderr)
+ print(newdirectory.primarysurvexfile, file=sys.stderr)
raise
- self.currentsurvexfile = newsurvexfile
+ self.currentsurvexfile = newfile
def ProcessIncludeLine(self, included):
svxid = included.groups()[0]
@@ -418,129 +443,6 @@ class LoadingSurvex():
if cave:
survexfile.cave = cave
- def RecursiveRecursiveLoad(self, survexblock, survexfile, fin):
- """Follows the *include links in all the survex files from the root file 1623.svx
- and reads in the survex blocks, other data and the wallet references (scansfolder) as it
- goes. This part of the data include process is where the maximum memory is used and where it
- crashes on memory-constrained machines. Begin-end blocks may also be nested.
- """
- # self.LoadSurvexSetup(survexblock, survexfile)
- # insp =self.insp
- # previousnlegs = 0
-
- # svxlines = fin.read().splitlines()
- # # cannot close file now as may be recursively called with the same fin if nested *begin-end
-
- # for svxline in svxlines:
- # self.lineno += 1
- # sline, comment = self.rx_comment.match(svxline.strip()).groups()
- # if comment:
- # self.LoadSurvexComment(survexblock, comment)
- # if not sline:
- # continue # skip blank lines
-
- # # detect the star command
- # mstar = self.rx_star.match(sline)
- # if mstar: # yes we are reading a *cmd
- # cmd, args = mstar.groups()
- # cmd = cmd.lower()
- # if re.match("include$(?i)", cmd):
- # cave = self.IdentifyCave(args)
- # if cave:
- # survexfile.cave = cave
-
- # includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
- # print((insp+' - INCLUDE-go path found, including - ' + args))
-
- # includesurvexfile = models_survex.SurvexFile(path=includepath)
- # includesurvexfile.save()
- # includesurvexfile.SetDirectory()
- # if includesurvexfile.exists():
- # survexblock.save()
- # self.insp += "> "
- # #--------------------------------------------------------
- # fininclude = includesurvexfile.OpenFile()
- # self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude)
- # fininclude.close()
- # #--------------------------------------------------------
- # self.insp = self.insp[2:]
- # insp = self.insp
- # print((insp+' - INCLUDE-return from include - ' + includepath))
- # else:
- # print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
-
- # elif re.match("begin$(?i)", cmd):
- # # On a *begin statement we start a new survexblock.
- # # There should not be any *include inside a begin-end block, so this is a simple
- # # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
- # if args:
- # newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
- # # Try to find the cave in the DB if not use the string as before
- # path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
- # if path_match:
- # pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
- # # print(insp+pos_cave)
- # cave = models_caves.getCaveByReference(pos_cave)
- # if cave:
- # survexfile.cave = cave
- # else:
- # print((insp+' - No match (b) for %s' % newsvxpath))
-
- # previousnlegs = self.survexlegsnumber
- # name = args.lower()
- # print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
- # # the recursive call re-reads the entire file. This is wasteful. We should pass in only
- # # the un-parsed part of the file.
- # survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
- # survexpath=survexblock.survexpath+"."+name,
- # cave=survexfile.cave, survexfile=survexfile,
- # legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
- # survexblockdown.save()
- # survexblock.save()
- # survexblock = survexblockdown
- # print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name))
- # self.insp += "> "
- # #--------------------------------------------------------
- # self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin)
- # #--------------------------------------------------------
- # # do not close the file as there may be more blocks in this one
- # # and it is re-read afresh with every nested begin-end block.
- # self.insp = self.insp[2:]
- # insp = self.insp
- # else:
- # self.depthbegin += 1
-
- # elif re.match("end$(?i)", cmd):
- # if self.depthbegin:
- # print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args))
- # self.depthbegin -= 1
- # else:
- # legsinblock = self.survexlegsnumber - previousnlegs
- # print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
- # survexblock.legsall = legsinblock
- # survexblock.save()
- # return
- # elif cmd == "ref":
- # self.LoadSurvexRef(survexblock, args)
- # elif cmd == "flags":
- # self.LoadSurvexFlags(args, cmd)
- # elif cmd == "data":
- # self.LoadSurvexDataCmd(survexblock, args)
- # elif cmd == "set" and re.match("names(?i)", args):
- # pass
- # elif re.match("date$(?i)", cmd):
- # self.LoadSurvexDate(survexblock, args)
- # elif re.match("team$(?i)", cmd):
- # self.LoadSurvexTeam(survexblock, args)
- # else:
- # self.LoadSurvexIgnore(survexblock, args, cmd)
- # else: # not a *cmd so we are reading data OR rx_comment failed
- # if "from" in self.stardata: # only interested in survey legs
- # self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
- # else:
- # pass # ignore all other sorts of data
- pass
-
def LinearRecursiveLoad(self, survexblock, path, svxlines):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
into a single file. Loads the begin/end blocks recursively.
@@ -702,7 +604,9 @@ class LoadingSurvex():
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
push = includesurvexfile.path.lower()
self.stackinclude.append(push)
+ #-----------------
self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+ #-----------------
pop = self.stackinclude.pop()
if pop != push:
message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
@@ -710,8 +614,9 @@ class LoadingSurvex():
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path))
- fcollate.write(";*edulcni {}\n".format(includesurvexfile.path))
+ includesurvexfile = None
+ flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
+ fcollate.write(";*edulcni {}\n".format(pop))
fininclude.close()
self.depthinclude -= 1
#--------------------------------------------------------
@@ -820,16 +725,6 @@ def FindAndLoadSurvex(survexblockroot):
mem1 = models.get_process_memory()
svx_load = None
- # print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr)
- # svxlrl = LoadingSurvex()
-
- # finroot = survexfileroot.OpenFile()
- # svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
- # finroot.close()
- # survexlegsnumber = svxlrl.survexlegsnumber
- # survexlegsalllength = svxlrl.survexlegsalllength
- # svxlrl = None
-
# Close the logging file, Restore sys.stdout to our old saved file handle
sys.stdout.close()
print("+", file=sys.stderr)