summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/QMs.py2
-rw-r--r--parsers/survex.py183
-rw-r--r--parsers/surveys.py23
3 files changed, 80 insertions, 128 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index 025438e..d96b665 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,7 +1,7 @@
import csv
import os
import re
-#from datetime import *
+from datetime import date
from django.conf import settings
diff --git a/parsers/survex.py b/parsers/survex.py
index f236ec8..a290a83 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -2,6 +2,8 @@ import sys
import os
import re
import time
+import resource
+
from datetime import datetime, timedelta
from subprocess import call, Popen, PIPE
@@ -29,14 +31,12 @@ ROOTBLOCK = "rootblock"
def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
+ """This reads compass, clino and tape data but only keeps the tape lengths,
+ the rest is discarded after error-checking.
+ """
global survexlegsalllength
global survexlegsnumber
- # The try catches here need replacing as they are relatively expensive
ls = sline.lower().split()
- #ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]])
- #ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
-
-# survexleg = models_survex.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
survexleg = models_survex.SurvexLeg()
# this next fails for two surface survey svx files which use / for decimal point
# e.g. '29/09' in the tape measurement, or use decimals but in brackets, e.g. (06.05)
@@ -97,11 +97,9 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
if cave:
survexleg.cave = cave
- # only save proper legs
# No need to save as we are measuring lengths only on parsing now.
# delete the object so that django autosaving doesn't save it.
survexleg = None
- #survexleg.save()
itape = stardata.get("tape")
if itape:
@@ -111,7 +109,6 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
except ValueError:
print("! Length not added")
# No need to save as we are measuring lengths only on parsing now.
- #survexblock.save()
def LoadSurvexEquate(survexblock, sline):
@@ -146,12 +143,12 @@ rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*
# $(?i)""", re.X) # the end (do the whole thing case insensitively)
rx_team = re.compile(r"(?i)(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$")
-rx_team_member = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
+rx_person = re.compile(r"(?i) and | / |, | & | \+ |^both$|^none$")
rx_qm = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
insp = ""
callcount = 0
-def RecursiveLoad(survexblock, survexfile, fin, textlines):
+def RecursiveLoad(survexblock, survexfile, fin):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads in the survex blocks, other data and the wallet references (survexscansfolder) as it
goes. This part of the data import process is where the maximum memory is used and where it
@@ -165,7 +162,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
global callcount
global survexlegsnumber
- print(insp+" - Reading file: " + survexblock.survexfile.path + " <> " + survexfile.path)
+ print(insp+" - MEM:{} Reading. parent:{} <> {} ".format(get_process_memory(),survexblock.survexfile.path,survexfile.path))
stamp = datetime.now()
lineno = 0
@@ -199,12 +196,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if len(wallet)<2:
wallet = "0" + wallet
refscan = "%s#%s%s" % (yr, letterx, wallet )
- survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
- if survexscansfolders:
- survexblock.survexscansfolder = survexscansfolders[0]
+ manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
+ if manyscansfolders:
+ survexblock.survexscansfolder = manyscansfolders[0]
survexblock.save()
else:
- message = ' ! Wallet ; ref {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
+ message = ' ! Wallet ; ref {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
print((insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
@@ -265,12 +262,12 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
assert (int(wallet)<100), "Wallet number more than 100: %s" % wallet
refscan = "%s#%s%s" % (yr, letterx, wallet)
- survexscansfolders = models_survex.SurvexScansFolder.objects.filter(walletname=refscan)
- if survexscansfolders:
- survexblock.survexscansfolder = survexscansfolders[0]
+ manyscansfolders = models_survex.ScansFolder.objects.filter(walletname=refscan)
+ if manyscansfolders:
+ survexblock.survexscansfolder = manyscansfolders[0]
survexblock.save()
else:
- message = ' ! Wallet *REF {} - NOT found in survexscansfolders {}'.format(refscan, survexblock.survexfile.path)
+ message = ' ! Wallet *REF {} - NOT found in manyscansfolders {}'.format(refscan, survexblock.survexfile.path)
print((insp+message))
models.DataIssue.objects.create(parser='survex', message=message)
continue
@@ -291,17 +288,18 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
cmd = cmd.lower()
if re.match("include$(?i)", cmd):
includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)))
- print((insp+' - Include path found including - ' + includepath))
- # Try to find the cave in the DB if not use the string as before
+ print((insp+' - Include path found, including - ' + includepath))
+ # Try to find the cave in the DB. if not, use the string as before
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
if path_match:
pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
- # print(insp+pos_cave)
+ print((insp+' - Match in DB (i) for cave {}.'.format(pos_cave)))
cave = models_caves.getCaveByReference(pos_cave)
if cave:
survexfile.cave = cave
else:
- print((insp+' - No match in DB (i) for %s, so loading..' % includepath))
+ print((insp+' - NO Match in DB (i) for a cave for {}'.format(includepath)))
+
includesurvexfile = models_survex.SurvexFile(path=includepath)
includesurvexfile.save()
includesurvexfile.SetDirectory()
@@ -309,10 +307,17 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
survexblock.save()
fininclude = includesurvexfile.OpenFile()
insp += "> "
- RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
+ RecursiveLoad(survexblock, includesurvexfile, fininclude)
+ #--------------------------------------------------------
+ fininclude.close()
insp = insp[2:]
+ else:
+ print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
elif re.match("begin$(?i)", cmd):
+ # On a *begin statement we start a new survexblock.
+ # There should not be any *include inside a begin-end block, so this is a simple
+ # load not a recursive load. But there may be many blocks in one file.
if line:
newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
# Try to find the cave in the DB if not use the string as before
@@ -328,16 +333,16 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
previousnlegs = survexlegsnumber
name = line.lower()
- print((insp+' - Begin found for: ' + name))
-# survexblockdown = models_survex.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
+ print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock, survexpath=survexblock.survexpath+"."+name,
cave=survexfile.cave, survexfile=survexfile, legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
survexblockdown.save()
survexblock.save()
survexblock = survexblockdown
- textlinesdown = [ ]
insp += "> "
- RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
+ RecursiveLoad(survexblockdown, survexfile, fin)
+ #--------------------------------------------------------
+ # do not close the file as there may be more blocks in this one
insp = insp[2:]
else:
iblankbegins += 1
@@ -346,7 +351,6 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
if iblankbegins:
iblankbegins -= 1
else:
- # .text not used, using it for number of legs per block
legsinblock = survexlegsnumber - previousnlegs
print(insp+"LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,survexlegsnumber))
survexblock.legsall = legsinblock
@@ -370,7 +374,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
# print(insp+' - Team found: ')
mteammember = rx_team.match(line)
if mteammember:
- for tm in rx_team_member.split(mteammember.group(2)):
+ for tm in rx_person.split(mteammember.group(2)):
if tm:
personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
if (personexpedition, tm) not in teammembers:
@@ -431,6 +435,31 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
timetaken = endstamp - stamp
# print(insp+' - Time to process: ' + str(timetaken))
+def get_process_memory():
+ usage=resource.getrusage(resource.RUSAGE_SELF)
+ return usage[2]/1024.0
+
+
+def FindAndLoadAllSurvex(survexblockroot, survexfileroot):
+ """Follows the *include links recursively to find files
+ """
+ print(' - redirecting stdout to loadsurvexblks.log...')
+ stdout_orig = sys.stdout
+ # Redirect sys.stdout to the file
+ sys.stdout = open('loadsurvexblks.log', 'w')
+
+ finroot = survexfileroot.OpenFile()
+ RecursiveLoad(survexblockroot, survexfileroot, finroot)
+ finroot.close()
+
+ # Close the logging file
+ sys.stdout.close()
+ print("+", file=sys.stderr)
+ sys.stderr.flush();
+ # Restore sys.stdout to our old saved file handler
+ sys.stdout = stdout_orig
+
+
def LoadAllSurvexBlocks():
global survexlegsalllength
global survexlegsnumber
@@ -441,47 +470,28 @@ def LoadAllSurvexBlocks():
models_survex.SurvexFile.objects.all().delete()
models_survex.SurvexDirectory.objects.all().delete()
models_survex.SurvexEquate.objects.all().delete()
- #models_survex.SurvexLeg.objects.all().delete()
models_survex.SurvexTitle.objects.all().delete()
models_survex.SurvexPersonRole.objects.all().delete()
models_survex.SurvexStation.objects.all().delete()
print(" - Data flushed")
- # Clear the data issues as we are reloading
models.DataIssue.objects.filter(parser='survex').delete()
- print(' - Loading All Survex Blocks...')
-
- print(' - redirecting stdout to loadsurvexblks.log...')
- stdout_orig = sys.stdout
- # Redirect sys.stdout to the file
- sys.stdout = open('loadsurvexblks.log', 'w')
-
- survexfile = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
- survexfile.save()
- survexfile.SetDirectory()
-
- #Load all
- # this is the first so id=1
- survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfile,
+
+ survexfileroot = models_survex.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
+ survexfileroot.save()
+ survexfileroot.SetDirectory()
+ survexblockroot = models_survex.SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot,
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
+ # this is the first so id=1
survexblockroot.save()
- fin = survexfile.OpenFile()
- textlines = [ ]
- # The real work starts here
- RecursiveLoad(survexblockroot, survexfile, fin, textlines)
- fin.close()
+
+ print(' - Loading All Survex Blocks...')
+ FindAndLoadAllSurvex(survexblockroot, survexfileroot)
+
survexblockroot.totalleglength = survexlegsalllength
survexblockroot.legsall = survexlegsnumber
- #survexblockroot.text = "".join(textlines) these are all blank
survexblockroot.save()
- # Close the file
- sys.stdout.close()
- print("+", file=sys.stderr)
- sys.stderr.flush();
-
- # Restore sys.stdout to our old saved file handler
- sys.stdout = stdout_orig
print(" - total number of survex legs: {}m".format(survexlegsnumber))
print(" - total leg lengths loaded: {}m".format(survexlegsalllength))
print(' - Loaded All Survex Blocks.')
@@ -500,48 +510,6 @@ def LoadPos():
topdata = settings.SURVEX_DATA + settings.SURVEX_TOPNAME
print((' - Generating a list of Pos from %s.svx and then loading...' % (topdata)))
- # TO DO - remove the cache file apparatus. Not needed. Only laser points and entrances loaded now.
-
- # Be careful with the cache file.
- # If LoadPos has been run before,
- # but without cave import being run before,
- # then *everything* may be in the fresh 'not found' cache file.
-
- # cachefile = settings.SURVEX_DATA + "posnotfound.cache"
- # notfoundbefore = {}
- # if os.path.isfile(cachefile):
- # # this is not a good test. 1623.svx may never change but *included files may have done.
- # # When the *include is unrolled, we will be able to get a proper timestamp to use
- # # and can increase the timeout from 3 days to 30 days.
- # updtsvx = os.path.getmtime(topdata + ".svx")
- # updtcache = os.path.getmtime(cachefile)
- # age = updtcache - updtsvx
- # print((' svx: %s cache: %s not-found cache is fresher by: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) ))))
-
- # now = time.time()
- # if now - updtcache > 3*24*60*60:
- # print(" cache is more than 3 days old. Deleting.")
- # os.remove(cachefile)
- # elif age < 0 :
- # print(" cache is stale. Deleting.")
- # os.remove(cachefile)
- # else:
- # print(" cache is fresh. Reading...")
- # try:
- # with open(cachefile, "r") as f:
- # for line in f:
- # l = line.rstrip()
- # if l in notfoundbefore:
- # notfoundbefore[l] +=1 # should not be duplicates
- # print(" DUPLICATE ", line, notfoundbefore[l])
- # else:
- # notfoundbefore[l] =1
- # except:
- # print(" FAILURE READ opening cache file %s" % (cachefile))
- # raise
-
-
-# notfoundnow =[]
found = 0
skip = {}
print("\n") # extra line because cavern overwrites the text buffer somehow
@@ -615,18 +583,5 @@ def LoadPos():
models.DataIssue.objects.create(parser='survex', message=message)
raise
- #print(" - %s failed lookups of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip)))
print(" - {} SurvexStation entrances found.".format(found))
- # if found > 10: # i.e. a previous cave import has been done
- # try:
- # with open(cachefile, "w") as f:
- # c = len(notfoundnow)+len(skip)
- # for i in notfoundnow:
- # pass #f.write("%s\n" % i)
- # for j in skip:
- # pass #f.write("%s\n" % j) # NB skip not notfoundbefore
- # print((' Not-found cache file written: %s entries' % c))
- # except:
- # print(" FAILURE WRITE opening cache file %s" % (cachefile))
- # raise \ No newline at end of file
diff --git a/parsers/surveys.py b/parsers/surveys.py
index baa8725..f3edebe 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -1,6 +1,3 @@
-from __future__ import (absolute_import, division,
- print_function, unicode_literals)
-
import sys
import os
import types
@@ -17,7 +14,7 @@ from functools import reduce
import settings
#from troggle.core.models import *
#from troggle.core.models_caves import *
-#from troggle.core.models_survex import *
+from troggle.core.models_survex import SurvexScanSingle, ScansFolder, TunnelFile
def get_or_create_placeholder(year):
""" All surveys must be related to a logbookentry. We don't have a way to
@@ -83,15 +80,15 @@ def LoadListScans():
print(' - Loading Survey Scans')
SurvexScanSingle.objects.all().delete()
- SurvexScansFolder.objects.all().delete()
+ ScansFolder.objects.all().delete()
print(' - deleting all scansFolder and scansSingle objects')
# first do the smkhs (large kh survey scans) directory
- survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs")
+ manyscansfoldersmkhs = ScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "../surveys/smkhs"), walletname="smkhs")
print("smkhs", end=' ')
- if os.path.isdir(survexscansfoldersmkhs.fpath):
- survexscansfoldersmkhs.save()
- LoadListScansFile(survexscansfoldersmkhs)
+ if os.path.isdir(manyscansfoldersmkhs.fpath):
+ manyscansfoldersmkhs.save()
+ LoadListScansFile(manyscansfoldersmkhs)
# iterate into the surveyscans directory
@@ -106,13 +103,13 @@ def LoadListScans():
for fy, ffy, fisdiry in GetListDir(ff):
if fisdiry:
assert fisdiry, ffy
- survexscansfolder = SurvexScansFolder(fpath=ffy, walletname=fy)
+ survexscansfolder = ScansFolder(fpath=ffy, walletname=fy)
survexscansfolder.save()
LoadListScansFile(survexscansfolder)
# do the
elif f != "thumbs":
- survexscansfolder = SurvexScansFolder(fpath=ff, walletname=f)
+ survexscansfolder = ScansFolder(fpath=ff, walletname=f)
survexscansfolder.save()
LoadListScansFile(survexscansfolder)
@@ -121,7 +118,7 @@ def FindTunnelScan(tunnelfile, path):
scansfolder, scansfile = None, None
mscansdir = re.search(rb"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
if mscansdir:
- scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
+ scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1))
if len(scansfolderl):
assert len(scansfolderl) == 1
scansfolder = scansfolderl[0]
@@ -136,7 +133,7 @@ def FindTunnelScan(tunnelfile, path):
scansfile = scansfilel[0]
if scansfolder:
- tunnelfile.survexscansfolders.add(scansfolder)
+ tunnelfile.manyscansfolders.add(scansfolder)
if scansfile:
tunnelfile.survexscans.add(scansfile)