diff options
-rw-r--r-- | core/TESTS/tests.py | 11 | ||||
-rw-r--r-- | core/views_other.py | 4 | ||||
-rw-r--r-- | databaseReset.py | 157 | ||||
-rw-r--r-- | parsers/QMs.py | 12 | ||||
-rw-r--r-- | parsers/caves.py | 4 | ||||
-rw-r--r-- | parsers/logbooks.py | 28 | ||||
-rw-r--r-- | parsers/survex.py | 2 | ||||
-rw-r--r-- | settings.py | 3 | ||||
-rw-r--r-- | urls.py | 4 |
9 files changed, 136 insertions, 89 deletions
diff --git a/core/TESTS/tests.py b/core/TESTS/tests.py index 5a4dd8d..c3ab815 100644 --- a/core/TESTS/tests.py +++ b/core/TESTS/tests.py @@ -51,7 +51,16 @@ class SimpleTest(SimpleTestCase): from troggle.core.models_caves import CaveSlug, Cave, CaveAndEntrance, QM, CaveDescription, EntranceSlug, Entrance, Area, SurvexStation from troggle.core.forms import CaveForm, CaveAndEntranceFormSet, VersionControlCommentForm, EntranceForm, EntranceLetterForm from troggle.helper import login_required_if_public - + def test_import_parses_mix(self): + import troggle.parsers.survex + import troggle.parsers.caves + import troggle.settings + import troggle.flatpages.models + import troggle.logbooksdump + import troggle.parsers.people + import troggle.parsers.surveys + import troggle.parsers.logbooks + import troggle.parsers.QMs __test__ = {"doctest": """ Another way to test that 1 + 1 is equal to 2. diff --git a/core/views_other.py b/core/views_other.py index 0b56786..c31777a 100644 --- a/core/views_other.py +++ b/core/views_other.py @@ -15,6 +15,7 @@ from troggle.core.models_survex import SurvexLeg from troggle.helper import login_required_if_public from troggle.core.forms import UploadFileForm +print("** importing troggle/core/views_other.py") def showrequest(request): return HttpResponse(request.GET) @@ -57,11 +58,12 @@ def controlPanel(request): jobs_completed=[] if request.method=='POST': if request.user.is_superuser: + # NONE of this works now that databaseReset has been so extensively rewritten #importlist is mostly here so that things happen in the correct order. #http post data seems to come in an unpredictable order, so we do it this way. importlist=['reinit_db', 'import_people', 'import_caves', 'import_logbooks', - 'import_survexblks', 'import_QMs', 'import_survexpos', 'import_surveyscans', 'import_tunnelfiles'] + 'import_survexblks', 'import_QMs', 'import_surveyscans', 'import_tunnelfiles'] databaseReset.dirsredirect() for item in importlist: if item in request.POST: diff --git a/databaseReset.py b/databaseReset.py index 2319827..0d5d5c2 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -1,5 +1,4 @@ -from __future__ import (absolute_import, division, - print_function) +import sys import os import time import timeit @@ -16,9 +15,18 @@ from django.http import HttpResponse from django.core.urlresolvers import reverse from troggle.core.models_caves import Cave, Entrance -import troggle.settings +import troggle.parsers.caves +#import troggle.settings import troggle.flatpages.models import troggle.logbooksdump +import troggle.parsers.people +import troggle.parsers.surveys +import troggle.parsers.logbooks +import troggle.parsers.QMs + +import troggle.core.models +import troggle.core.models_survex +import django # NOTE databaseReset.py is *imported* by views_other.py as it is used in the control panel # presented there. @@ -31,9 +39,9 @@ if os.geteuid() == 0: expouser=settings.EXPOUSER expouserpass=settings.EXPOUSERPASS expouseremail=settings.EXPOUSER_EMAIL +print(" - SETTINGS: {} ({:.5}...) <{}> on module loading".format(expouser, expouserpass, expouseremail)) + -def call_django_tests(n): - management.call_command('test', verbosity=n) def reinit_db(): """Rebuild database from scratch. Deletes the file first if sqlite is used, @@ -51,6 +59,7 @@ def reinit_db(): cursor.execute("CREATE DATABASE %s" % currentdbname) cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % currentdbname) cursor.execute("USE %s" % currentdbname) + print(" - SETTINGS: {} ({:.5}...) <{}> before calling syncuser()".format(expouser, expouserpass, expouseremail)) syncuser() def syncuser(): @@ -74,37 +83,32 @@ def dirsredirect(): f.save() def import_caves(): - import troggle.parsers.caves print("Importing Caves") troggle.parsers.caves.readcaves() def import_people(): - import troggle.parsers.people print("Importing People (folk.csv)") troggle.parsers.people.LoadPersonsExpos() +def import_surveyscans(): + print("Importing Survey Scans") + troggle.parsers.surveys.LoadListScans() + def import_logbooks(): - import troggle.parsers.logbooks print("Importing Logbooks") troggle.parsers.logbooks.LoadLogbooks() def import_QMs(): print("Importing QMs (old caves)") - import troggle.parsers.QMs - # import process itself runs on qm.csv in only 3 old caves, not the modern ones! + troggle.parsers.QMs.Load_QMs() -def import_surveyscans(): - import troggle.parsers.surveys - print("Importing Survey Scans") - troggle.parsers.surveys.LoadListScans() - def import_survexblks(): - import troggle.parsers.survex + # when this import is moved to the top with the rest it all crashes horribly + import troggle.parsers.survex print("Importing Survex Blocks") troggle.parsers.survex.LoadAllSurvexBlocks() def import_survexpos(): - import troggle.parsers.survex print("Importing Survex x/y/z Positions") troggle.parsers.survex.LoadPos() @@ -117,7 +121,6 @@ def import_surveyimgs(): #troggle.parsers.surveys.parseSurveys(logfile=settings.LOGFILE) def import_tunnelfiles(): - import troggle.parsers.surveys print("Importing Tunnel files") troggle.parsers.surveys.LoadTunnelFiles() @@ -140,6 +143,10 @@ class JobQueue(): """A list of import operations to run. Always reports profile times in the same order. """ + dbengine = "" + dbname = "" + dbdefault ="" + def __init__(self,run): self.runlabel = run self.queue = [] # tuples of (jobname, jobfunction) @@ -217,10 +224,54 @@ class JobQueue(): jobend = time.time() jobduration = jobend-jobstart print("** Ended job %s - %.1f seconds total." % (self.runlabel,jobduration)) - return True - - + + def store_dbsettings(self): + self.dbengine = settings.DATABASES['default']['ENGINE'] + self.dbname = settings.DATABASES['default']['NAME'] + self.dbdefault = settings.DATABASES['default'] + + def restore_dbsettings(self): + settings.DATABASES['default'] = self.dbdefault + settings.DATABASES['default']['ENGINE'] = self.dbengine + settings.DATABASES['default']['NAME'] = self.dbname + + def set_in_memory_dbsettings(self): + django.db.close_old_connections() # needed if MySQL running? + settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3', + 'AUTOCOMMIT': True, + 'ATOMIC_REQUESTS': False, + 'NAME': ':memory:', + 'CONN_MAX_AGE': 0, + 'TIME_ZONE': 'UTC', + 'OPTIONS': {}, + 'HOST': '', + 'USER': '', + 'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None}, + 'PASSWORD': '', + 'PORT': ''} + settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3' + settings.DATABASES['default']['NAME'] = ":memory:" + + def append_placeholders(self): + for j in self.results_order: + self.results[j].append(None) # append a placeholder + + def run_now_django_tests(self,n): + self.store_dbsettings() + # this leaves the db set to :memory: whatever it was initially + management.call_command('test', verbosity=n) + django.db.close_old_connections() + self.restore_dbsettings() + + def skip_memory_phase(self): + if not self.runlabel: + return True + else: + if self.runlabel == "" or self.runlabel[0:2] == "F-": + return True + return False + def run(self): """First runs all the jobs in the queue against a scratch in-memory db then re-runs the import against the db specified in settings.py @@ -229,51 +280,24 @@ class JobQueue(): relinquish some kind of db connection (not fixed yet) """ self.loadprofiles() - # save db settings for later - dbengine = settings.DATABASES['default']['ENGINE'] - dbname = settings.DATABASES['default']['NAME'] - dbdefault = settings.DATABASES['default'] - - skipmem = False - if self.runlabel: - if self.runlabel == "": - skipmem = True - elif self.runlabel[0:2] == "F-": - skipmem = True - else: - skipmem = True - - print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) + self.store_dbsettings() + + print("-- phase 0 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME']) #print "-- DATABASES.default", settings.DATABASES['default'] - if dbname ==":memory:": + if self.dbname ==":memory:": # just run, and save the sql file self.runqonce() self.memdumpsql() # saved contents of scratch db, could be imported later.. self.saveprofiles() - elif skipmem: + elif self.skip_memory_phase(): self.runqonce() self.saveprofiles() else: - django.db.close_old_connections() # needed if MySQL running? # run all the imports through :memory: first - settings.DATABASES['default']['ENGINE'] = 'django.db.backends.sqlite3' - settings.DATABASES['default']['NAME'] = ":memory:" - settings.DATABASES['default'] = {'ENGINE': 'django.db.backends.sqlite3', - 'AUTOCOMMIT': True, - 'ATOMIC_REQUESTS': False, - 'NAME': ':memory:', - 'CONN_MAX_AGE': 0, - 'TIME_ZONE': 'UTC', - 'OPTIONS': {}, - 'HOST': '', - 'USER': '', - 'TEST': {'COLLATION': None, 'CHARSET': None, 'NAME': None, 'MIRROR': None}, - 'PASSWORD': '', - 'PORT': ''} - - - print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) + self.set_in_memory_dbsettings() + + print("-- phase 1 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME']) #print("-- DATABASES.default", settings.DATABASES['default']) # but because the user may be expecting to add this to a db with lots of tables already there, @@ -281,7 +305,6 @@ class JobQueue(): # because we are using an empty :memory: database # But initiating twice crashes it; so be sure to do it once only. - # Damn. syncdb() is still calling MySQL somehow **conn_params not sqlite3. So crashes on expo server. if ("reinit",reinit_db) not in self.queue: reinit_db() @@ -301,20 +324,18 @@ class JobQueue(): # restore the original db and import again # if we wanted to, we could re-import the SQL generated in the first pass to be # blazing fast. But for the present just re-import the lot. - settings.DATABASES['default'] = dbdefault - settings.DATABASES['default']['ENGINE'] = dbengine - settings.DATABASES['default']['NAME'] = dbname - print("-- ", settings.DATABASES['default']['NAME'], settings.DATABASES['default']['ENGINE']) + self.restore_dbsettings() + print("-- phase 2 ", settings.DATABASES['default']['ENGINE'], settings.DATABASES['default']['NAME']) django.db.close_old_connections() # maybe not needed here for j in self.results_order: self.results[j].pop() # throw away results from :memory: run - self.results[j].append(None) # append a placeholder + self.append_placeholders() django.db.close_old_connections() # magic rune. works. found by looking in django.db__init__.py #django.setup() # should this be needed? - self.runqonce() # crashes because it thinks it has no migrations to apply, when it does. + self.runqonce() self.saveprofiles() return True @@ -405,9 +426,6 @@ def usage(): """) if __name__ == "__main__": - import troggle.core.models - import sys - import django django.setup() if os.geteuid() == 0: @@ -419,16 +437,16 @@ if __name__ == "__main__": else: runlabel=None - call_django_tests(1) jq = JobQueue(runlabel) + jq.run_now_django_tests(1) if len(sys.argv)==1: usage() exit() elif "test" in sys.argv: - call_django_tests(2) jq.enq("caves",import_caves) jq.enq("people",import_people) + #jq.run_now_django_tests(2) elif "caves" in sys.argv: jq.enq("caves",import_caves) elif "logbooks" in sys.argv: @@ -466,6 +484,9 @@ if __name__ == "__main__": # writeCaves() elif "profile" in sys.argv: jq.loadprofiles() + # need to increment everything runq does + print("!! - days before appears as 0.00 - to be fixed") + jq.append_placeholders() jq.showprofile() exit() elif "help" in sys.argv: diff --git a/parsers/QMs.py b/parsers/QMs.py index e4ff29d..92e6c71 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -1,5 +1,3 @@ -# -*- coding: UTF-8 -*- - import csv import os import re @@ -113,8 +111,8 @@ def parse_KH_QMs(kh, inputFile): save_carefully(QM,lookupArgs,nonLookupArgs) - -parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv") -parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv") -parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm") -#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") +def Load_QMs(): + parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv") + parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv") + parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm") + #parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") diff --git a/parsers/caves.py b/parsers/caves.py index 514aa78..0d2ddfb 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -8,8 +8,8 @@ import troggle.core.models as models import troggle.core.models_caves as models_caves def readcaves(): - # Clear the cave data issues as we are reloading + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. models.DataIssue.objects.filter(parser='caves').delete() area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) @@ -26,6 +26,7 @@ def readcaves(): def readentrance(filename): + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() context = "in file %s" % filename @@ -89,6 +90,7 @@ def readentrance(filename): primary = False def readcave(filename): + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: contents = f.read() context = " in file %s" % filename diff --git a/parsers/logbooks.py b/parsers/logbooks.py index ce78e6d..5061d62 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -292,7 +292,7 @@ def SetDatesFromLogbookEntries(expedition): persontrip.save() -def LoadLogbookForExpedition(expedition): +def LoadLogbookForExpedition(expedition,numentries): """ Parses all logbook entries for one expedition """ global logentries @@ -358,8 +358,6 @@ def LoadLogbookForExpedition(expedition): else: print(" ! NO TRIP entries found in logbook, check the syntax.") - logentries=[] # flush for next year - if logbook_cached: i=0 for entrytuple in range(len(logentries)): @@ -368,19 +366,33 @@ def LoadLogbookForExpedition(expedition): EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground,\ entry_type) i +=1 - + return len(logentries) def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. """ DataIssue.objects.filter(parser='logbooks').delete() expos = Expedition.objects.all() + if len(expos) <= 1: + print(" ! No expeditions found. Load 'people' first.") nologbook = ["1976", "1977","1978","1979","1980","1980","1981","1983","1984", "1985","1986","1987","1988","1989","1990",] - for expo in expos: - if expo.year not in nologbook: - print((" - Logbook for: " + expo.year)) - LoadLogbookForExpedition(expo) + entries = {"2020": 0, "2019": 40, "2018": 148, "2017": 120, "2016": 162, "2015": 158, + "2014": 130, "2013": 102, "2012": 150, "2011": 136, "2010": 44, "2009": 104, + "2008": 98, "2007": 222, "2006": 48, "2005": 110, "2004": 152, "2003": 80, "2002": 62, + "2001": 96, "2000": 108, "1999": 158, "1998": 86, "1997": 106, "1996": 188, "1995": 82, + "1994": 64, "1993": 82, "1992": 122, "1991": 76, "1982": 76} + try: + os.remove("loadlogbk.log") + except OSError: + pass + with open("loadlogbk.log", "a") as log: + for expo in expos: + if expo.year not in nologbook: + print((" - Logbook for: " + expo.year)) + numentries = LoadLogbookForExpedition(expo, entries[expo.year]) + log.write("{} {} should be {}\n".format(expo.year, numentries, entries[expo.year])) + dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S) diff --git a/parsers/survex.py b/parsers/survex.py index ccdd527..b2d1e50 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1,5 +1,3 @@ -from __future__ import (absolute_import, division, - print_function) import sys import os import re diff --git a/settings.py b/settings.py index 7002715..391e635 100644 --- a/settings.py +++ b/settings.py @@ -19,6 +19,8 @@ import urllib.parse import django +print("** importing troggle/settings.py") + # Note that this builds upon the django system installed # global settings in # django/conf/global_settings.py which is automatically loaded first. @@ -158,3 +160,4 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner' from localsettings import * #localsettings needs to take precedence. Call it to override any existing vars. +print("** finished importing troggle/settings.py") @@ -42,6 +42,7 @@ actualurlpatterns = patterns('', url(r'^newfile', views_other.newFile, name="newFile"), # oddly broken, needs investigating more url(r'^getEntrances/(?P<caveslug>.*)', views_caves.get_entrances, name = "get_entrances"), #works + # e.g. /getEntrances/1623-161 url(r'^getQMs/(?P<caveslug>.*)', views_caves.get_qms, name = "get_qms"), # no template "get_qms"? url(r'^getPeople/(?P<expeditionslug>.*)', views_logbooks.get_people, name = "get_people"), # fails url(r'^getLogBookEntries/(?P<expeditionslug>.*)', views_logbooks.get_logbook_entries, name = "get_logbook_entries"), #works @@ -121,7 +122,8 @@ actualurlpatterns = patterns('', #(r'^survey_scans/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.SURVEY_SCANS, 'show_indexes':True}), url(r'^survey_scans/$', view_surveys.surveyscansfolders, name="surveyscansfolders"), - url(r'^survey_scans/(?P<path>[^/]+)/$', view_surveys.surveyscansfolder, name="surveyscansfolder"), + url(r'^survey_scans/(?P<path>[^/]+)/$', view_surveys.surveyscansfolder, name="surveyscansfolder"), + # This next line is beyond daft. If anyone uploads a file *anywhere* in SURVEY_SCANS which doesn't match, troggle crashes horribly. Has been failing for pdf and JPG files for years: url(r'^survey_scans/(?P<path>[^/]+)/(?P<file>[^/]+(?:png|jpg|pdf|jpeg|PNG|JPG|PDF|JPEG))$', view_surveys.surveyscansingle, name="surveyscansingle"), |