diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2020-04-16 20:36:42 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2020-04-16 20:36:42 +0100 |
commit | b123f6ada7ef78526878ac84c259c93df4bfa9e3 (patch) | |
tree | dfe2303df01a61bfdddcf810d48c96058240d461 | |
parent | e5c288c7641e5d381babb3c5a800d640df9aeb96 (diff) | |
download | troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.gz troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.bz2 troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.zip |
Dumps loaded data into a .sql file
-rw-r--r-- | databaseReset.py | 136 | ||||
-rw-r--r-- | dump.py | 69 | ||||
-rw-r--r-- | parsers/survex.py | 2 |
3 files changed, 136 insertions, 71 deletions
diff --git a/databaseReset.py b/databaseReset.py index 4b45f36..9adfa4a 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -11,6 +11,7 @@ from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models import Cave, Entrance import troggle.flatpages.models +import json databasename=settings.DATABASES['default']['NAME'] expouser=settings.EXPOUSER @@ -61,7 +62,7 @@ def import_survex(): def import_QMs(): import parsers.QMs - # import process runs on qm.csv in only 3 caves, not 264! + # import process itself runs on qm.csv in only 3 caves, not 264! def import_surveys(): import parsers.surveys @@ -80,21 +81,6 @@ def pageredirects(): f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL) f.save() -def reset(): # unused now that we have a jobqueue - """ Wipe the troggle database and import everything from legacy data - """ - reload_db() - make_dirs() - pageredirects() - import_caves() - import_people() - import_surveyscans() - import_logbooks() - import_QMs() - import_survex() - import_tunnelfiles() - import_surveys() - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def import_auto_logbooks(): import parsers.logbooks @@ -149,12 +135,11 @@ def dumplogbooks(): f.write(unicode(output).encode( "utf-8" )) f.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -class JobQueue(): - """ A list of import operations to run. Always reports times - in the same order """ - - #Constructor creates a list +class JobQueue(): + """A list of import operations to run. Always reports times + in the same order. + """ def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) @@ -165,81 +150,88 @@ class JobQueue(): "tunnel", "surveys", "test", "makedirs", "redirect" ] for k in self.results_order: self.results[k]=[] + self.tfile = "import_profile.json" + self.htmlfile = "profile.html" - #Adding elements to queue + #Adding elements to queue - enqueue def enq(self,label,func): self.queue.append((label,func)) return True - #Removing the last element from the queue - def deq(self): - if len(self.queue)>0: - return self.queue.pop() - return ("Queue Empty!") - - def size(self): - return len(self.queue) + #Removing the last element from the queue - dequeue + # def deq(self): + # if len(self.queue)>0: + # return self.queue.pop() + # return ("Queue Empty!") def run(self): - import json - tfile = "import_profile.json" - if os.path.isfile(tfile): + if os.path.isfile(self.tfile): try: - f = open(tfile, "r") + f = open(self.tfile, "r") data = json.load(f) for j in data: self.results[j] = data[j] except: - print "FAILURE parsing JSON file %s" % (tfile) + print "FAILURE parsing JSON file %s" % (self.tfile) # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() - for i in self.queue: - print i, self.results[i[0]] - self.results[i[0]].append(1.0) - print "** Running job ", self.runlabel + jobstart = time.time() + self.results["date"].append(jobstart) + self.results["runlabel"].append(self.runlabel) + for i in self.queue: - #print "*- Running \"", i[0], "\"" start = time.time() - i[1]() + i[1]() # looks ugly but invokes function passed in the second item in the tuple duration = time.time()-start print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration self.results[i[0]].append(duration) - self.results["date"].append(start) - self.results["runlabel"].append(self.runlabel) - print "** Ended all jobs." - #print self.results - with open(tfile, 'w') as f: - json.dump(self.results, f) - + with open(self.tfile, 'w') as f: + json.dump(self.results, f) + + jobend = time.time() + jobduration = jobend-jobstart + print "** Ended all jobs. %.1f seconds" % jobduration + + # currently uses django db whatever it was. CHANGE this to explicitly use + # a new sqlite3 db and then import the sql dump of that into the troggle db + # instead of loading directly into the troggle sqlite db. + # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be + # loaded in 6 minutes that way + djconn = django.db.connection + from dump import _iterdump + with open('memdump.sql', 'w') as f: + for line in _iterdump(djconn): + f.write('%s\n' % line.encode("utf8")) + + # now import the memory image sql into (to do) + return True + + def showprofile(self): + """Prints out the time it took to run the jobqueue""" for k in self.results_order: percen=0 - if k == "runlabel": - pass - if k =="date": + lst = self.results[k] + if k == "runlabel": + r = lst[len(lst)-1] + print '%15s %s' % (k,r) + elif k =="date": # Calculate dates as days before present to one decimal place - pass - elif len(self.results[k])>3: - lst = self.results[k] + r = lst[len(lst)-1] + if len(lst)>2: + days = (lst[len(lst)-2]-r)/(24*60*60) + print '%15s %8.1f days ago' % (k,days) + elif len(lst)>2: e = len(lst)-1 percen = 100* (lst[e] - lst[e-1])/lst[e-1] - if abs(percen) >0.1: - print '%15s %8.1f%%' % (k, percen) - else: - print '%15s ' % (k) - + if abs(percen) >0.1: + print '%15s %8.1f%%' % (k, percen) + else: + print '%15s ' % (k) return True -def importtest(): - from random import randrange - k = 0 - for i in range(5+randrange(15)): - for j in range(i): - k += i - #print k, - return True def usage(): print("""Usage is 'python databaseReset.py <command> [runlabel]' @@ -277,9 +269,12 @@ if __name__ == "__main__": jq = JobQueue(runlabel) if "test" in sys.argv: - jq.enq("test",importtest) - jq.enq("caves",importtest) - jq.enq("people",importtest) + jq.enq("reload",reload_db) + jq.enq("makedirs",make_dirs) + jq.enq("caves",import_caves) + jq.enq("survex",import_survex) + jq.enq("surveys",import_surveys) + elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: @@ -335,3 +330,4 @@ if __name__ == "__main__": usage() jq.run() + jq.showprofile() @@ -0,0 +1,69 @@ +# Mimic the sqlite3 console shell's .dump command
+# Author: Paul Kippes <kippesp@gmail.com>
+
+# Every identifier in sql is quoted based on a comment in sqlite
+# documentation "SQLite adds new keywords from time to time when it
+# takes on new features. So to prevent your code from being broken by
+# future enhancements, you should normally quote any identifier that
+# is an English language word, even if you do not have to."
+
+def _iterdump(connection):
+ """
+ Returns an iterator to the dump of the database in an SQL text format.
+ Used to produce an SQL dump of the database. Useful to save an in-memory
+ database for later restoration. This function should not be called
+ directly but instead called from the Connection method, iterdump().
+ """
+
+ cu = connection.cursor()
+ yield('BEGIN TRANSACTION;')
+
+ # sqlite_master table contains the SQL CREATE statements for the database.
+ q = """
+ SELECT "name", "type", "sql"
+ FROM "sqlite_master"
+ WHERE "sql" NOT NULL AND
+ "type" == 'table'
+ ORDER BY "name"
+ """
+ schema_res = cu.execute(q)
+ for table_name, type, sql in schema_res.fetchall():
+ if table_name == 'sqlite_sequence':
+ yield('DELETE FROM "sqlite_sequence";')
+ elif table_name == 'sqlite_stat1':
+ yield('ANALYZE "sqlite_master";')
+ elif table_name.startswith('sqlite_'):
+ continue
+ # NOTE: Virtual table support not implemented
+ #elif sql.startswith('CREATE VIRTUAL TABLE'):
+ # qtable = table_name.replace("'", "''")
+ # yield("INSERT INTO sqlite_master(type,name,tbl_name,rootpage,sql)"\
+ # "VALUES('table','{0}','{0}',0,'{1}');".format(
+ # qtable,
+ # sql.replace("''")))
+ else:
+ yield('{0};'.format(sql))
+
+ # Build the insert statement for each row of the current table
+ table_name_ident = table_name.replace('"', '""')
+ res = cu.execute('PRAGMA table_info("{0}")'.format(table_name_ident))
+ column_names = [str(table_info[1]) for table_info in res.fetchall()]
+ q = """SELECT 'INSERT INTO "{0}" VALUES({1})' FROM "{0}";""".format(
+ table_name_ident,
+ ",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names))
+ query_res = cu.execute(q)
+ for row in query_res:
+ yield(row[0]) # '{0}'.format(row[0]) had unicode errors
+
+ # Now when the type is 'index', 'trigger', or 'view'
+ q = """
+ SELECT "name", "type", "sql"
+ FROM "sqlite_master"
+ WHERE "sql" NOT NULL AND
+ "type" IN ('index', 'trigger', 'view')
+ """
+ schema_res = cu.execute(q)
+ for name, type, sql in schema_res.fetchall():
+ yield('{0};'.format(sql))
+
+ yield('COMMIT;')
\ No newline at end of file diff --git a/parsers/survex.py b/parsers/survex.py index 3cda53a..3d78c59 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -401,4 +401,4 @@ def LoadPos(): ss.z = float(z) ss.save() except: - print("%s not parsed in survex %s" % (name, pos)) + print "%s not parsed in survex %s.pos" % (name, settings.SURVEX_TOPNAME)
\ No newline at end of file |