summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--databaseReset.py136
-rw-r--r--dump.py69
-rw-r--r--parsers/survex.py2
3 files changed, 136 insertions, 71 deletions
diff --git a/databaseReset.py b/databaseReset.py
index 4b45f36..9adfa4a 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -11,6 +11,7 @@ from django.http import HttpResponse
from django.core.urlresolvers import reverse
from troggle.core.models import Cave, Entrance
import troggle.flatpages.models
+import json
databasename=settings.DATABASES['default']['NAME']
expouser=settings.EXPOUSER
@@ -61,7 +62,7 @@ def import_survex():
def import_QMs():
import parsers.QMs
- # import process runs on qm.csv in only 3 caves, not 264!
+ # import process itself runs on qm.csv in only 3 caves, not 264!
def import_surveys():
import parsers.surveys
@@ -80,21 +81,6 @@ def pageredirects():
f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
f.save()
-def reset(): # unused now that we have a jobqueue
- """ Wipe the troggle database and import everything from legacy data
- """
- reload_db()
- make_dirs()
- pageredirects()
- import_caves()
- import_people()
- import_surveyscans()
- import_logbooks()
- import_QMs()
- import_survex()
- import_tunnelfiles()
- import_surveys()
-
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def import_auto_logbooks():
import parsers.logbooks
@@ -149,12 +135,11 @@ def dumplogbooks():
f.write(unicode(output).encode( "utf-8" ))
f.close()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-class JobQueue():
- """ A list of import operations to run. Always reports times
- in the same order """
-
- #Constructor creates a list
+class JobQueue():
+ """A list of import operations to run. Always reports times
+ in the same order.
+ """
def __init__(self,run):
self.runlabel = run
self.queue = [] # tuples of (jobname, jobfunction)
@@ -165,81 +150,88 @@ class JobQueue():
"tunnel", "surveys", "test", "makedirs", "redirect" ]
for k in self.results_order:
self.results[k]=[]
+ self.tfile = "import_profile.json"
+ self.htmlfile = "profile.html"
- #Adding elements to queue
+ #Adding elements to queue - enqueue
def enq(self,label,func):
self.queue.append((label,func))
return True
- #Removing the last element from the queue
- def deq(self):
- if len(self.queue)>0:
- return self.queue.pop()
- return ("Queue Empty!")
-
- def size(self):
- return len(self.queue)
+ #Removing the last element from the queue - dequeue
+ # def deq(self):
+ # if len(self.queue)>0:
+ # return self.queue.pop()
+ # return ("Queue Empty!")
def run(self):
- import json
- tfile = "import_profile.json"
- if os.path.isfile(tfile):
+ if os.path.isfile(self.tfile):
try:
- f = open(tfile, "r")
+ f = open(self.tfile, "r")
data = json.load(f)
for j in data:
self.results[j] = data[j]
except:
- print "FAILURE parsing JSON file %s" % (tfile)
+ print "FAILURE parsing JSON file %s" % (self.tfile)
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
f.close()
- for i in self.queue:
- print i, self.results[i[0]]
- self.results[i[0]].append(1.0)
-
print "** Running job ", self.runlabel
+ jobstart = time.time()
+ self.results["date"].append(jobstart)
+ self.results["runlabel"].append(self.runlabel)
+
for i in self.queue:
- #print "*- Running \"", i[0], "\""
start = time.time()
- i[1]()
+ i[1]() # looks ugly but invokes function passed in the second item in the tuple
duration = time.time()-start
print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
self.results[i[0]].append(duration)
- self.results["date"].append(start)
- self.results["runlabel"].append(self.runlabel)
- print "** Ended all jobs."
- #print self.results
- with open(tfile, 'w') as f:
- json.dump(self.results, f)
-
+ with open(self.tfile, 'w') as f:
+ json.dump(self.results, f)
+
+ jobend = time.time()
+ jobduration = jobend-jobstart
+ print "** Ended all jobs. %.1f seconds" % jobduration
+
+ # currently uses django db whatever it was. CHANGE this to explicitly use
+ # a new sqlite3 db and then import the sql dump of that into the troggle db
+ # instead of loading directly into the troggle sqlite db.
+ # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
+ # loaded in 6 minutes that way
+ djconn = django.db.connection
+ from dump import _iterdump
+ with open('memdump.sql', 'w') as f:
+ for line in _iterdump(djconn):
+ f.write('%s\n' % line.encode("utf8"))
+
+ # now import the memory image sql into (to do)
+ return True
+
+ def showprofile(self):
+ """Prints out the time it took to run the jobqueue"""
for k in self.results_order:
percen=0
- if k == "runlabel":
- pass
- if k =="date":
+ lst = self.results[k]
+ if k == "runlabel":
+ r = lst[len(lst)-1]
+ print '%15s %s' % (k,r)
+ elif k =="date":
# Calculate dates as days before present to one decimal place
- pass
- elif len(self.results[k])>3:
- lst = self.results[k]
+ r = lst[len(lst)-1]
+ if len(lst)>2:
+ days = (lst[len(lst)-2]-r)/(24*60*60)
+ print '%15s %8.1f days ago' % (k,days)
+ elif len(lst)>2:
e = len(lst)-1
percen = 100* (lst[e] - lst[e-1])/lst[e-1]
- if abs(percen) >0.1:
- print '%15s %8.1f%%' % (k, percen)
- else:
- print '%15s ' % (k)
-
+ if abs(percen) >0.1:
+ print '%15s %8.1f%%' % (k, percen)
+ else:
+ print '%15s ' % (k)
return True
-def importtest():
- from random import randrange
- k = 0
- for i in range(5+randrange(15)):
- for j in range(i):
- k += i
- #print k,
- return True
def usage():
print("""Usage is 'python databaseReset.py <command> [runlabel]'
@@ -277,9 +269,12 @@ if __name__ == "__main__":
jq = JobQueue(runlabel)
if "test" in sys.argv:
- jq.enq("test",importtest)
- jq.enq("caves",importtest)
- jq.enq("people",importtest)
+ jq.enq("reload",reload_db)
+ jq.enq("makedirs",make_dirs)
+ jq.enq("caves",import_caves)
+ jq.enq("survex",import_survex)
+ jq.enq("surveys",import_surveys)
+
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
elif "logbooks" in sys.argv:
@@ -335,3 +330,4 @@ if __name__ == "__main__":
usage()
jq.run()
+ jq.showprofile()
diff --git a/dump.py b/dump.py
new file mode 100644
index 0000000..9206690
--- /dev/null
+++ b/dump.py
@@ -0,0 +1,69 @@
+# Mimic the sqlite3 console shell's .dump command
+# Author: Paul Kippes <kippesp@gmail.com>
+
+# Every identifier in sql is quoted based on a comment in sqlite
+# documentation "SQLite adds new keywords from time to time when it
+# takes on new features. So to prevent your code from being broken by
+# future enhancements, you should normally quote any identifier that
+# is an English language word, even if you do not have to."
+
+def _iterdump(connection):
+ """
+ Returns an iterator to the dump of the database in an SQL text format.
+ Used to produce an SQL dump of the database. Useful to save an in-memory
+ database for later restoration. This function should not be called
+ directly but instead called from the Connection method, iterdump().
+ """
+
+ cu = connection.cursor()
+ yield('BEGIN TRANSACTION;')
+
+ # sqlite_master table contains the SQL CREATE statements for the database.
+ q = """
+ SELECT "name", "type", "sql"
+ FROM "sqlite_master"
+ WHERE "sql" NOT NULL AND
+ "type" == 'table'
+ ORDER BY "name"
+ """
+ schema_res = cu.execute(q)
+ for table_name, type, sql in schema_res.fetchall():
+ if table_name == 'sqlite_sequence':
+ yield('DELETE FROM "sqlite_sequence";')
+ elif table_name == 'sqlite_stat1':
+ yield('ANALYZE "sqlite_master";')
+ elif table_name.startswith('sqlite_'):
+ continue
+ # NOTE: Virtual table support not implemented
+ #elif sql.startswith('CREATE VIRTUAL TABLE'):
+ # qtable = table_name.replace("'", "''")
+ # yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\
+ # "VALUES('table','{0}','{0}',0,'{1}');".format(
+ # qtable,
+ # sql.replace("''")))
+ else:
+ yield('{0};'.format(sql))
+
+ # Build the insert statement for each row of the current table
+ table_name_ident = table_name.replace('"', '""')
+ res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident))
+ column_names = [str(table_info[1]) for table_info in res.fetchall()]
+ q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format(
+ table_name_ident,
+ ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names))
+ query_res = cu.execute(q)
+ for row in query_res:
+ yield(row[0]) # '{0}'.format(row[0]) had unicode errors
+
+ # Now when the type is 'index', 'trigger', or 'view'
+ q = """
+ SELECT "name", "type", "sql"
+ FROM "sqlite_master"
+ WHERE "sql" NOT NULL AND
+ "type" IN ('index', 'trigger', 'view')
+ """
+ schema_res = cu.execute(q)
+ for name, type, sql in schema_res.fetchall():
+ yield('{0};'.format(sql))
+
+ yield('COMMIT;') \ No newline at end of file
diff --git a/parsers/survex.py b/parsers/survex.py
index 3cda53a..3d78c59 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -401,4 +401,4 @@ def LoadPos():
ss.z = float(z)
ss.save()
except:
- print("%s not parsed in survex %s" % (name, pos))
+ print "%s not parsed in survex %s.pos" % (name, settings.SURVEX_TOPNAME) \ No newline at end of file