Dumps loaded data into a .sql file

author: Philip Sargent <philip.sargent@gmail.com> 2020-04-16 20:36:42 +0100
committer: Philip Sargent <philip.sargent@gmail.com> 2020-04-16 20:36:42 +0100
commit: b123f6ada7ef78526878ac84c259c93df4bfa9e3 (patch)
tree: dfe2303df01a61bfdddcf810d48c96058240d461 /databaseReset.py
parent: e5c288c7641e5d381babb3c5a800d640df9aeb96 (diff)
download: troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.gz
troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.bz2
troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.zip
1 files changed, 66 insertions, 70 deletions
diff --git a/databaseReset.py b/databaseReset.py
index 4b45f36..9adfa4a 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -11,6 +11,7 @@ from django.http import HttpResponse
 from django.core.urlresolvers import reverse
 from troggle.core.models import Cave, Entrance
 import troggle.flatpages.models
+import json
 
 databasename=settings.DATABASES['default']['NAME']
 expouser=settings.EXPOUSER
@@ -61,7 +62,7 @@ def import_survex():
 
 def import_QMs():
     import parsers.QMs
-    # import process runs on qm.csv in only 3 caves, not 264!
+    # import process itself runs on qm.csv in only 3 caves, not 264!
     
 def import_surveys():
     import parsers.surveys
@@ -80,21 +81,6 @@ def pageredirects():
         f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
         f.save()
 
-def reset(): # unused now that we have a jobqueue
-    """ Wipe the troggle database and import everything from legacy data
-    """
-    reload_db()
-    make_dirs()
-    pageredirects()
-    import_caves()
-    import_people()
-    import_surveyscans()
-    import_logbooks()
-    import_QMs()
-    import_survex()
-    import_tunnelfiles()
-    import_surveys()
-
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 def import_auto_logbooks():
     import parsers.logbooks
@@ -149,12 +135,11 @@ def dumplogbooks():
             f.write(unicode(output).encode( "utf-8" ))
             f.close()
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
-class JobQueue():
-    """ A list of import operations to run. Always reports times
-    in the same order """
 
-
-    #Constructor creates a list
+class JobQueue():
+    """A list of import operations to run. Always reports times
+    in the same order. 
+    """
     def __init__(self,run):
         self.runlabel = run
         self.queue = [] # tuples of (jobname, jobfunction)
@@ -165,81 +150,88 @@ class JobQueue():
             "tunnel", "surveys", "test", "makedirs", "redirect" ]
         for k in self.results_order:
             self.results[k]=[]
+        self.tfile = "import_profile.json"
+        self.htmlfile = "profile.html"
 
-    #Adding elements to queue
+    #Adding elements to queue - enqueue
     def enq(self,label,func):
         self.queue.append((label,func))
         return True
 
-    #Removing the last element from the queue
-    def deq(self):
-        if len(self.queue)>0:
-            return self.queue.pop()
-        return ("Queue Empty!")
-
-    def size(self):
-        return len(self.queue)
+    #Removing the last element from the queue - dequeue
+    # def deq(self):
+    #     if len(self.queue)>0:
+    #         return self.queue.pop()
+    #     return ("Queue Empty!")
 
     def run(self):
-        import json
-        tfile = "import_profile.json"
-        if os.path.isfile(tfile):
+        if os.path.isfile(self.tfile):
             try:
-                f = open(tfile, "r")
+                f = open(self.tfile, "r")
                 data = json.load(f)
                 for j in data:
                     self.results[j] = data[j]
             except:
-                print "FAILURE parsing JSON file %s" % (tfile)
+                print "FAILURE parsing JSON file %s" % (self.tfile)
                 # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
             f.close()
 
-        for i in self.queue:
-            print i, self.results[i[0]]
-            self.results[i[0]].append(1.0) 
-
         print "** Running job ", self.runlabel
+        jobstart = time.time()
+        self.results["date"].append(jobstart)
+        self.results["runlabel"].append(self.runlabel)
+
         for i in self.queue:
-           #print "*- Running \"", i[0], "\""
            start = time.time()
-           i[1]()
+           i[1]()    #  looks ugly but invokes function passed in the second item in the tuple
            duration = time.time()-start
            print "\n*- Ended \"",  i[0], "\"  %.1f seconds" % duration
            self.results[i[0]].append(duration)
-        self.results["date"].append(start)
-        self.results["runlabel"].append(self.runlabel)
-        print "** Ended all jobs."
-        #print self.results
 
-        with open(tfile, 'w') as f:
-            json.dump(self.results, f)
-        
+        with open(self.tfile, 'w') as f:
+            json.dump(self.results, f)     
+
+        jobend = time.time()
+        jobduration = jobend-jobstart
+        print "** Ended all jobs. %.1f seconds" % jobduration
+
+        # currently uses django db whatever it was. CHANGE this to explicitly use
+        # a new sqlite3 db and then import the sql dump of that into the troggle db
+        # instead of loading directly into the troggle sqlite db.
+        # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
+        # loaded in 6 minutes that way
+        djconn = django.db.connection
+        from dump import _iterdump
+        with open('memdump.sql', 'w') as f:
+            for line in _iterdump(djconn):
+                f.write('%s\n' % line.encode("utf8"))
+
+        # now import the memory image sql into   (to do)   
+        return True
+
+    def showprofile(self):
+        """Prints out the time it took to run the jobqueue"""
         for k in self.results_order:
             percen=0
-            if k == "runlabel":     
-                pass
-            if k =="date":
+            lst = self.results[k]  
+            if k == "runlabel": 
+                r =   lst[len(lst)-1]
+                print '%15s %s' % (k,r)
+            elif k =="date":
                 # Calculate dates as days before present to one decimal place
-                pass
-            elif len(self.results[k])>3:        
-                lst = self.results[k]
+                r =   lst[len(lst)-1]
+                if len(lst)>2:
+                    days = (lst[len(lst)-2]-r)/(24*60*60)
+                    print '%15s %8.1f days ago' % (k,days)
+            elif len(lst)>2:        
                 e = len(lst)-1
                 percen = 100* (lst[e] - lst[e-1])/lst[e-1]
-            if abs(percen) >0.1:
-                print '%15s %8.1f%%' % (k,  percen)
-            else:
-                print '%15s ' % (k)
-
+                if abs(percen) >0.1:
+                    print '%15s %8.1f%%' % (k,  percen)
+                else:
+                    print '%15s ' % (k)
         return True
 
-def importtest():
-    from  random import randrange
-    k = 0
-    for i  in range(5+randrange(15)):
-        for j in range(i):
-            k += i
-        #print k,
-    return True
 
 def usage():
     print("""Usage is 'python databaseReset.py <command> [runlabel]'
@@ -277,9 +269,12 @@ if __name__ == "__main__":
     jq = JobQueue(runlabel)
 
     if "test" in sys.argv:
-        jq.enq("test",importtest)
-        jq.enq("caves",importtest)
-        jq.enq("people",importtest)
+        jq.enq("reload",reload_db)
+        jq.enq("makedirs",make_dirs)
+        jq.enq("caves",import_caves)
+        jq.enq("survex",import_survex)
+        jq.enq("surveys",import_surveys)
+
     elif "caves" in sys.argv:
         jq.enq("caves",import_caves)
     elif "logbooks" in sys.argv:
@@ -335,3 +330,4 @@ if __name__ == "__main__":
         usage()
 
     jq.run()
+    jq.showprofile()
author	Philip Sargent <philip.sargent@gmail.com>	2020-04-16 20:36:42 +0100
committer	Philip Sargent <philip.sargent@gmail.com>	2020-04-16 20:36:42 +0100
commit	b123f6ada7ef78526878ac84c259c93df4bfa9e3 (patch)
tree	dfe2303df01a61bfdddcf810d48c96058240d461 /databaseReset.py
parent	e5c288c7641e5d381babb3c5a800d640df9aeb96 (diff)
download	troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.gz troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.tar.bz2 troggle-b123f6ada7ef78526878ac84c259c93df4bfa9e3.zip