summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py9
-rw-r--r--parsers/imports.py12
-rw-r--r--parsers/logbooks.py3
-rw-r--r--parsers/survex.py90
4 files changed, 65 insertions, 49 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index d1e7406..2bb2ccc 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -192,6 +192,8 @@ def readcave(filename):
url = url[0],
filename = filename)
except:
+ # this slow db query happens on every cave, but on import we have all this in memory
+ # and don't need to do a db query. Fix this to speed it up!
# need to cope with duplicates
print(" ! FAILED to get only one CAVE when updating using: "+filename)
kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
@@ -206,6 +208,8 @@ def readcave(filename):
c = k
for area_slug in areas:
+ # this slow db query happens on every cave, but on import we have all this in memory
+ # and don't need to do a db query. Fix this to speed it up!
area = models_caves.Area.objects.filter(short_name = area_slug)
if area:
newArea = area[0]
@@ -216,6 +220,8 @@ def readcave(filename):
primary = True
for slug in slugs:
try:
+ # this slow db query happens on every cave, but on import we have all this in memory
+ # and don't need to do a db query. Fix this to speed it up!
cs = models_caves.CaveSlug.objects.update_or_create(cave = c,
slug = slug,
primary = primary)
@@ -225,10 +231,13 @@ def readcave(filename):
print(message)
primary = False
+
for entrance in entrances:
slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
try:
+ # this slow db query happens on every entrance, but on import we have all this in memory
+ # and don't need to do a db query. Fix this to speed it up!
entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug)
ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
diff --git a/parsers/imports.py b/parsers/imports.py
index c2965c2..f8c98a3 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -16,21 +16,21 @@ import troggle.parsers.logbooks
import troggle.parsers.QMs
def import_caves():
- print("Importing Caves to ",end="")
+ print("-- Importing Caves to ",end="")
print(django.db.connections.databases['default']['NAME'])
troggle.parsers.caves.readcaves()
def import_people():
- print("Importing People (folk.csv) to ",end="")
+ print("-- Importing People (folk.csv) to ",end="")
print(django.db.connections.databases['default']['NAME'])
troggle.parsers.people.LoadPersonsExpos()
def import_surveyscans():
- print("Importing Survey Scans")
+ print("-- Importing Survey Scans")
troggle.parsers.surveys.LoadListScans()
def import_logbooks():
- print("Importing Logbooks")
+ print("-- Importing Logbooks")
troggle.parsers.logbooks.LoadLogbooks()
def import_QMs():
@@ -40,7 +40,7 @@ def import_QMs():
def import_survex():
# when this import is moved to the top with the rest it all crashes horribly
import troggle.parsers.survex
- print("Importing Survex Blocks")
+ print("-- Importing Survex Blocks")
print(" - Survex Blocks")
troggle.parsers.survex.LoadSurvexBlocks()
print(" - Survex entrances x/y/z Positions")
@@ -53,6 +53,6 @@ def import_loadpos():
troggle.parsers.survex.LoadPos()
def import_drawingsfiles():
- print("Importing Drawings files")
+ print("-- Importing Drawings files")
troggle.parsers.surveys.LoadDrawingFiles()
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index c4f2c9c..b18e839 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -114,6 +114,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
expeditionday = expedition.get_expedition_day(date)
lookupAttribs={'date':date, 'title':title}
# 'cave' is converted to a string doing this, which renders as the cave slug.
+ # but it is a db query which we should try to avoid - rewrite this
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug':slugify(title)[:50], 'entry_type':entry_type}
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
@@ -356,6 +357,8 @@ def SetDatesFromLogbookEntries(expedition):
Sets the date_from and date_to field for an expedition based on persontrips.
Then sets the expedition date_from and date_to based on the personexpeditions.
"""
+ # Probably a faster way to do this. This uses a lot of db queries, but we have all this
+ # in memory..
for personexpedition in expedition.personexpedition_set.all():
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
# sequencing is difficult to do
diff --git a/parsers/survex.py b/parsers/survex.py
index da0395d..7db8af0 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -324,7 +324,8 @@ class LoadingSurvex():
return self.caveslist[g]
print(' ! Failed to find cave for {}'.format(cavepath.lower()))
else:
- print(' ! No regex cave match for %s' % cavepath.lower())
+ # not a cave, but that is fine.
+ # print(' ! No regex(standard identifier) cave match for %s' % cavepath.lower())
return None
def GetSurvexDirectory(self, headpath):
@@ -353,17 +354,17 @@ class LoadingSurvex():
print("\n"+message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- def LoadSurvexFile(self, includelabel):
+ def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
with links to 'cave'
- Creates a new current survexblock with valid .survexfile and valid .survexdirectory
+ Creates a new current survexfile and valid .survexdirectory
The survexblock passed-in is not necessarily the parent. FIX THIS.
"""
depth = " " * self.depthbegin
- print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
- headpath, tail = os.path.split(includelabel)
+ print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
+ headpath = os.path.dirname(svxid)
- newfile = models_survex.SurvexFile(path=includelabel)
+ newfile = models_survex.SurvexFile(path=svxid)
newfile.save() # until we do this there is no internal id so no foreign key works
self.currentsurvexfile = newfile
newdirectory = self.GetSurvexDirectory(headpath)
@@ -383,10 +384,10 @@ class LoadingSurvex():
newfile.cave = cave
#print("\n"+str(newdirectory.cave),file=sys.stderr)
else:
- self.ReportNonCaveIncludes(headpath, includelabel)
+ self.ReportNonCaveIncludes(headpath, svxid)
if not newfile.survexdirectory:
- message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
+ message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
@@ -401,7 +402,7 @@ class LoadingSurvex():
def ProcessIncludeLine(self, included):
svxid = included.groups()[0]
#depth = " " * self.depthbegin
- #print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid))
+ #print("{:2}{} - Include survexfile:'{}' {}".format(self.depthbegin, depth, svxid, included))
self.LoadSurvexFile(svxid)
self.stacksvxfiles.append(self.currentsurvexfile)
@@ -426,8 +427,10 @@ class LoadingSurvex():
self.LoadSurvexQM(survexblock, qmline)
included = self.rx_comminc.match(comment)
- # ;*include means we have been included; not 'proceed to include' which *include means
+ # ;*include means 'we have been included'; whereas *include means 'proceed to include'
if included:
+ #depth = " " * self.depthbegin
+ #print("{:2}{} - Include comment:'{}' {}".format(self.depthbegin, depth, comment, included))
self.ProcessIncludeLine(included)
edulcni = self.rx_commcni.match(comment)
@@ -457,7 +460,7 @@ class LoadingSurvex():
def LinearLoad(self, survexblock, path, svxlines):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
- into a single file. Loads the begin/end blocks recursively.
+ into a single file. Loads the begin/end blocks using a stack for labels.
"""
self.relativefilename = path
cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
@@ -466,19 +469,25 @@ class LoadingSurvex():
self.currentsurvexfile.save() # django insists on this although it is already saved !?
blockcount = 0
+ lineno = 0
def tickle():
nonlocal blockcount
blockcount +=1
if blockcount % 10 ==0 :
print(".", file=sys.stderr,end='')
- if blockcount % 500 ==0 :
+ if blockcount % 200 ==0 :
print("\n", file=sys.stderr,end='')
- sys.stderr.flush();
+ print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr)
+ sys.stderr.flush()
for svxline in svxlines:
- sline, comment = self.rx_comment.match(svxline.strip()).groups()
+ lineno += 1
+ sline, comment = self.rx_comment.match(svxline).groups()
if comment:
+ depth = " " * self.depthbegin
+ print("{:4} {:2}{} - Include comment:'{}' {}".format(lineno, self.depthbegin, depth, comment, sline))
self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
+
if not sline:
continue # skip blank lines
@@ -503,10 +512,10 @@ class LoadingSurvex():
pathlist += "." + id
newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock,
survexpath=pathlist,
- title = survexblock.title, # copy parent inititally
cave=self.currentcave, survexfile=self.currentsurvexfile,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
newsurvexblock.save()
+ newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally
survexblock = newsurvexblock
# survexblock.survexfile.save()
survexblock.save() # django insists on this , but we want to save at the end !
@@ -564,7 +573,7 @@ class LoadingSurvex():
else:
pass # ignore all other sorts of data
- def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
+ def RecursiveScan(self, survexblock, path, fin, flinear, fcollate):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree
@@ -577,27 +586,27 @@ class LoadingSurvex():
if self.callcount % 500 ==0 :
print("\n", file=sys.stderr,end='')
- if survexfile in self.svxfileslist:
- message = " * Warning. Survex file already seen: {}".format(survexfile.path)
+ if path in self.svxfileslist:
+ message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path)
print(message)
print(message,file=flinear)
- print(message,file=sys.stderr)
+ print("\n"+message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- if self.svxfileslist.count(survexfile) > 20:
- message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
+ if self.svxfileslist.count(path) > 20:
+ message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
return
- self.svxfileslist.append(survexfile)
+ self.svxfileslist.append(path)
svxlines = fin.read().splitlines()
for svxline in svxlines:
self.lineno += 1
includestmt =self.rx_include.match(svxline)
if not includestmt:
- fcollate.write("{}\n".format(svxline))
+ fcollate.write("{}\n".format(svxline.strip()))
sline, comment = self.rx_comment.match(svxline.strip()).groups()
mstar = self.rx_star.match(sline)
@@ -605,40 +614,35 @@ class LoadingSurvex():
cmd, args = mstar.groups()
cmd = cmd.lower()
if re.match("(?i)include$", cmd):
- includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
- path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
-
- includesurvexfile = models_survex.SurvexFile(path=includepath)
- includesurvexfile.save()
+ includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
+ #path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
- if includesurvexfile.exists():
- # do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
+ fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
+ if os.path.isfile(fullpath):
#--------------------------------------------------------
self.depthinclude += 1
- fininclude = includesurvexfile.OpenFile()
- fcollate.write(";*include {}\n".format(includesurvexfile.path))
- flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
- push = includesurvexfile.path.lower()
+ fininclude = open(fullpath,'r')
+ fcollate.write(";*include {}\n".format(includepath))
+ flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
+ push = includepath.lower()
self.stackinclude.append(push)
#-----------------
- self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
+ self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate)
#-----------------
pop = self.stackinclude.pop()
if pop != push:
- message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
+ message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.stackinclude)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
- includesurvexfile.path += "-TEMP"
- includesurvexfile = None
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
fcollate.write(";*edulcni {}\n".format(pop))
fininclude.close()
self.depthinclude -= 1
#--------------------------------------------------------
else:
- message = " ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath)
+ message = " ! ERROR *include file not found for:'{}'".format(includepath)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
@@ -659,7 +663,7 @@ class LoadingSurvex():
args = " "
popargs = self.stackbegin.pop()
if popargs != args.lower():
- message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
+ message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin)
print(message)
print(message,file=flinear)
print(message,file=sys.stderr)
@@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot):
fcollate.write(";*include {}\n".format(survexfileroot.path))
flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
#----------------------------------------------------------------
- svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
+ svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
#----------------------------------------------------------------
flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
@@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot):
flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
flinear.close()
fcollate.close()
- svx_scan = None
+ svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
mem1 = models.get_process_memory()
@@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot):
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
# look in MapLocations() for how we find the entrances
- print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
+ print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
svx_load = LoadingSurvex()
svx_load.svxdirs[""] = survexfileroot.survexdirectory