summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Sargent <philip@Muscogee.localdomain>2020-04-30 23:15:57 +0100
committerPhilip Sargent <philip@Muscogee.localdomain>2020-04-30 23:15:57 +0100
commit39c622d5bfab0ddd4c75b9d643ecbe10d724e022 (patch)
tree88f55f5a90ade94bccaa35693dd1a5ed86d7cece
parent76a6b501f3fdebe370c9ad37679c97681c47af67 (diff)
downloadtroggle-39c622d5bfab0ddd4c75b9d643ecbe10d724e022.tar.gz
troggle-39c622d5bfab0ddd4c75b9d643ecbe10d724e022.tar.bz2
troggle-39c622d5bfab0ddd4c75b9d643ecbe10d724e022.zip
dbReset now loads into memory first (fast err checking), then into db
-rw-r--r--.gitignore1
-rw-r--r--core/views_other.py5
-rw-r--r--databaseReset.py126
-rw-r--r--parsers/survex.py28
4 files changed, 123 insertions, 37 deletions
diff --git a/.gitignore b/.gitignore
index 39d4835..ea7063e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,4 @@ ignored-files.log
tunnel-import.log
posnotfound
troggle.sqlite-journal
+loadsurvexblks.log
diff --git a/core/views_other.py b/core/views_other.py
index 1297e7f..cc9782b 100644
--- a/core/views_other.py
+++ b/core/views_other.py
@@ -55,8 +55,9 @@ def controlPanel(request):
#importlist is mostly here so that things happen in the correct order.
#http post data seems to come in an unpredictable order, so we do it this way.
- importlist=['reload_db', 'import_people', 'import_cavetab', 'import_logbooks', 'import_surveys', 'import_QMs']
- databaseReset.make_dirs()
+ importlist=['reinit_db', 'import_people', 'import_caves', 'import_logbooks',
+ 'import_survexblks', 'import_QMs', 'import_survexpos', 'import_surveyscans', 'import_tunnelfiles']
+ databaseReset.dirsredirect()
for item in importlist:
if item in request.POST:
print("running"+ " databaseReset."+item+"()")
diff --git a/databaseReset.py b/databaseReset.py
index 6c03509..2387a44 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -5,7 +5,7 @@ import settings
os.environ['PYTHONPATH'] = settings.PYTHON_PATH
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
from django.core import management
-from django.db import connection
+from django.db import connection, close_old_connections
from django.contrib.auth.models import User
from django.http import HttpResponse
from django.core.urlresolvers import reverse
@@ -13,6 +13,9 @@ from troggle.core.models import Cave, Entrance
import troggle.flatpages.models
import json
+# NOTE databaseRest.py is *imported* by views_other.py as it is used in the control panel
+# presented there.
+
databasename=settings.DATABASES['default']['NAME']
expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
@@ -22,17 +25,18 @@ def reinit_db():
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it.
"""
+ currentdbname = settings.DATABASES['default']['NAME']
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
try:
- os.remove(databasename)
+ os.remove(currentdbname)
except OSError:
pass
else:
cursor = connection.cursor()
- cursor.execute("DROP DATABASE %s" % databasename)
- cursor.execute("CREATE DATABASE %s" % databasename)
- cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
- cursor.execute("USE %s" % databasename)
+ cursor.execute("DROP DATABASE %s" % currentdbname)
+ cursor.execute("CREATE DATABASE %s" % currentdbname)
+ cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname)
+ cursor.execute("USE %s" % currentdbname)
syncuser()
def syncuser():
@@ -73,7 +77,7 @@ def import_logbooks():
def import_QMs():
print("Importing QMs (old caves)")
import parsers.QMs
- # import process itself runs on qm.csv in only 3 caves, not 264!
+ # import process itself runs on qm.csv in only 3 old caves, not the modern ones!
def import_survexblks():
import parsers.survex
@@ -159,7 +163,7 @@ def dumplogbooks():
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class JobQueue():
- """A list of import operations to run. Always reports times
+ """A list of import operations to run. Always reports profile times
in the same order.
"""
def __init__(self,run):
@@ -173,7 +177,7 @@ class JobQueue():
for k in self.results_order:
self.results[k]=[]
self.tfile = "import_profile.json"
- self.htmlfile = "profile.html"
+ self.htmlfile = "profile.html" # for HTML results table. Not yet done.
#Adding elements to queue - enqueue
def enq(self,label,func):
@@ -186,7 +190,9 @@ class JobQueue():
# return self.queue.pop()
# return ("Queue Empty!")
- def run(self):
+ def loadprofiles(self):
+ """Load timings for previous runs from file
+ """
if os.path.isfile(self.tfile):
try:
f = open(self.tfile, "r")
@@ -197,9 +203,26 @@ class JobQueue():
print "FAILURE parsing JSON file %s" % (self.tfile)
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close()
-
for j in self.results_order:
self.results[j].append(None) # append a placeholder
+ return True
+
+ def saveprofiles(self):
+ with open(self.tfile, 'w') as f:
+ json.dump(self.results, f)
+ return True
+
+ def memdumpsql(self):
+ djconn = django.db.connection
+ from dump import _iterdump
+ with open('memdump.sql', 'w') as f:
+ for line in _iterdump(djconn):
+ f.write('%s\n' % line.encode("utf8"))
+ return True
+
+ def runqonce(self):
+ """Run all the jobs in the queue provided once
+ """
print "** Running job ", self.runlabel
jobstart = time.time()
@@ -216,26 +239,68 @@ class JobQueue():
self.results[i[0]].pop() # the null item
self.results[i[0]].append(duration)
- with open(self.tfile, 'w') as f:
- json.dump(self.results, f)
jobend = time.time()
jobduration = jobend-jobstart
- print "** Ended all jobs. %.1f seconds" % jobduration
+ print "** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration)
+
+ return True
+
+
+ def run(self):
+ self.loadprofiles()
- # currently uses django db whatever it was. CHANGE this to explicitly use
- # a new sqlite3 db and then import the sql dump of that into the troggle db
- # instead of loading directly into the troggle sqlite db.
- # in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
- # loaded in 6 minutes that way
- djconn = django.db.connection
- from dump import _iterdump
- with open('memdump.sql', 'w') as f:
- for line in _iterdump(djconn):
- f.write('%s\n' % line.encode("utf8"))
+ dbengine = settings.DATABASES['default']['ENGINE']
+ dbname = settings.DATABASES['default']['NAME']
+
+ if dbname ==":memory:":
+ # just run, and save the sql file
+ print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
+ self.runqonce()
+ self.memdumpsql()
+ self.saveprofiles()
+ else:
+ # run all the imports through :memory: first
+ settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3'
+ settings.DATABASES['default']['NAME'] = ":memory:"
+ print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
+
+ # but because the user may be expecting to add this to a db with lots of tables already there,
+ # the jobque may not start from scratch so we need to initialise the db properly first.
+ # But initiating twice crashes, so be sure to do it once only.
+ if ("reinit",reinit_db) not in self.queue:
+ reinit_db()
+ if ("dirsredirect",dirsredirect) not in self.queue:
+ dirsredirect()
+ if ("caves",import_caves) not in self.queue:
+ import_caves()
+ if ("people",import_people) not in self.queue:
+ import_people()
+
+ django.db.close_old_connections() # maybe not needed here
+
+ self.runqonce()
+ self.memdumpsql()
+ self.showprofile()
+
+ # restore the original db and import again
+ # if we wanted to, we could re-import the SQL generated in the first pass to be
+ # blazing fast. But for the present just re-import the lot.
+ settings.DATABASES['default']['ENGINE'] = dbengine
+ settings.DATABASES['default']['NAME'] = dbname
+ print "-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']
+
+ for j in self.results_order:
+ self.results[j].pop() # throw away results from :memory: run
+ self.results[j].append(None) # append a placeholder
+
+ django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py
+ #django.setup() # should this be needed?
- # now import the memory image sql into
- ####(to do)
+
+ self.runqonce() # crashes because it thinks it has no migrations to apply, when it does.
+ self.saveprofiles()
+
return True
def showprofile(self):
@@ -277,9 +342,10 @@ class JobQueue():
percen = 100* (r[i] - r[i-1])/r[i-1]
if abs(percen) >0.1:
print '%8.1f%%' % percen,
- else:
- print " - ",
+ else:
+ print " - ",
print ""
+ print "\n"
return True
@@ -333,8 +399,8 @@ if __name__ == "__main__":
jq.enq("reinit",reinit_db)
jq.enq("dirsredirect",dirsredirect)
jq.enq("caves",import_caves)
- jq.enq("survexblks",import_survexblks)
- jq.enq("survexpos",import_survexpos)
+ jq.enq("people",import_people)
+ jq.enq("scans",import_surveyscans)
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
elif "logbooks" in sys.argv:
diff --git a/parsers/survex.py b/parsers/survex.py
index 5720b11..6fb7c62 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -10,7 +10,9 @@ from django.utils.timezone import make_aware
import re
import os
+import time
from datetime import datetime, timedelta
+import sys
line_leg_regex = re.compile(r"[\d\-+.]+$")
@@ -179,7 +181,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
# print('QM res station %s' % qm_resolve_station)
# print('QM notes %s' % qm_notes)
- # If the QM isn't resolved (has a resolving station) thn load it
+ # If the QM isn't resolved (has a resolving station) then load it
if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
# If we can find a section (survex note chunck, named)
@@ -364,6 +366,11 @@ def LoadAllSurvexBlocks():
print(" - Data flushed")
print(' - Loading All Survex Blocks...')
+
+ print(' - redirecting stdout to loadsurvexblks.log ...')
+ stdout_orig = sys.stdout
+ # Redirect sys.stdout to the file
+ sys.stdout = open('loadsurvexblks.log', 'w')
survexfile = models.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
survexfile.save()
@@ -379,6 +386,11 @@ def LoadAllSurvexBlocks():
fin.close()
survexblockroot.text = "".join(textlines)
survexblockroot.save()
+
+ # Close the file
+ sys.stdout.close()
+ # Restore sys.stdout to our old saved file handler
+ sys.stdout = stdout_orig
print(' - Loaded All Survex Blocks.')
@@ -399,13 +411,18 @@ def LoadPos():
# but without cave import being run before,
# then *everything* may be in the fresh 'not found' cache file.
- cachefile = settings.SURVEX_DATA + "posnotfound"
+ cachefile = settings.SURVEX_DATA + "posnotfound.cache"
notfoundbefore = {}
if os.path.isfile(cachefile):
updtsvx = os.path.getmtime(topdata + ".svx")
updtcache = os.path.getmtime(cachefile)
age = updtcache - updtsvx
- print(' svx: %s cache: %s cache age: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) )))
+ print(' svx: %s cache: %s not-found cache is fresher by: %s' % (updtsvx, updtcache, str(timedelta(seconds=age) )))
+
+ now = time.time()
+ if now - updtcache > 30*24*60*60:
+ print " cache is more than 30 days old. Deleting."
+ os.remove(cachefile)
if age < 0 :
print " cache is stale."
os.remove(cachefile)
@@ -432,6 +449,8 @@ def LoadPos():
# cavern defaults to using same cwd as supplied input file
call([settings.CAVERN, "--output=%s.3d" % (topdata), "%s.svx" % (topdata)])
call([settings.THREEDTOPOS, '%s.3d' % (topdata)], cwd = settings.SURVEX_DATA)
+ print " - This next bit takes a while. Matching ~32,000 survey positions. Be patient..."
+
posfile = open("%s.pos" % (topdata))
posfile.readline() #Drop header
for line in posfile.readlines():
@@ -449,9 +468,8 @@ def LoadPos():
ss.save()
found += 1
except:
- #print "%s in %s.pos not found in lookup of SurvexStation.objects" % (name, settings.SURVEX_TOPNAME)
notfoundnow.append(name)
- print " - %s stations NOT found in lookup of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip))
+ print " - %s stations not found in lookup of SurvexStation.objects. %s found. %s skipped." % (len(notfoundnow),found, len(skip))
if found > 10: # i.e. a previous cave import has been done
try: