summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--databaseReset.py186
-rw-r--r--parsers/caves.py8
-rw-r--r--parsers/people.py4
-rw-r--r--parsers/surveys.py14
4 files changed, 128 insertions, 84 deletions
diff --git a/databaseReset.py b/databaseReset.py
index a4687cd..dadb2dc 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -18,7 +18,10 @@ expouser=settings.EXPOUSER
expouserpass=settings.EXPOUSERPASS
expouseremail=settings.EXPOUSER_EMAIL
-def reload_db():
+def reinit_db():
+ """Rebuild database from scratch. Deletes the file first if sqlite is used,
+ otherwise it drops the database and creates it.
+ """
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.sqlite3':
try:
os.remove(databasename)
@@ -30,26 +33,27 @@ def reload_db():
cursor.execute("CREATE DATABASE %s" % databasename)
cursor.execute("ALTER DATABASE %s CHARACTER SET=utf8" % databasename)
cursor.execute("USE %s" % databasename)
- management.call_command('syncdb', interactive=False)
- user = User.objects.create_user(expouser, expouseremail, expouserpass)
- user.is_staff = True
- user.is_superuser = True
- user.save()
+ syncuser()
def syncuser():
- """Sync user - needed after reload"""
+ """Sync user - needed after reload
+ """
+ print("Synchronizing user")
management.call_command('syncdb', interactive=False)
user = User.objects.create_user(expouser, expouseremail, expouserpass)
user.is_staff = True
user.is_superuser = True
user.save()
-
-def make_dirs():
- """Make directories that troggle requires"""
+def dirsredirect():
+ """Make directories that troggle requires and sets up page redirects
+ """
#should also deal with permissions here.
if not os.path.isdir(settings.PHOTOS_ROOT):
os.mkdir(settings.PHOTOS_ROOT)
+ for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
+ f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
+ f.save()
def import_caves():
import parsers.caves
@@ -58,38 +62,49 @@ def import_caves():
def import_people():
import parsers.people
+ print("Importing People (folk.csv)")
parsers.people.LoadPersonsExpos()
def import_logbooks():
import parsers.logbooks
+ print("Importing Logbooks")
parsers.logbooks.LoadLogbooks()
+def import_QMs():
+ print("Importing QMs (old caves)")
+ import parsers.QMs
+ # import process itself runs on qm.csv in only 3 caves, not 264!
+
def import_survex():
import parsers.survex
+ print("Importing Survex Blocks")
parsers.survex.LoadAllSurvexBlocks()
+ print("Importing Survex Positions")
+ parsers.survex.LoadPos()
+
+def import_survexpos():
+ import parsers.survex
+ print("Importing Survex Positions")
parsers.survex.LoadPos()
-def import_QMs():
- import parsers.QMs
- # import process itself runs on qm.csv in only 3 caves, not 264!
-
def import_surveys():
+ """This appears to store data in unused objects. The code is kept
+ for future re-working to manage progress against notes, plans and elevs.
+ """
import parsers.surveys
+ print("Importing surveys")
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
def import_surveyscans():
import parsers.surveys
+ print("Importing Survey Scans")
parsers.surveys.LoadListScans()
def import_tunnelfiles():
import parsers.surveys
+ print("Importing Tunnel files")
parsers.surveys.LoadTunnelFiles()
-def pageredirects():
- for oldURL, newURL in [("indxal.htm", reverse("caveindex"))]:
- f = troggle.flatpages.models.Redirect(originalURL = oldURL, newURL = newURL)
- f.save()
-
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def import_auto_logbooks():
import parsers.logbooks
@@ -154,9 +169,9 @@ class JobQueue():
self.queue = [] # tuples of (jobname, jobfunction)
self.results = {}
self.results_order=[
- "date","runlabel","reload", "caves", "people",
+ "date","runlabel","reinit", "caves", "people",
"logbooks", "scans", "QMs", "survex",
- "tunnel", "surveys", "test", "makedirs", "redirect", "syncuser" ]
+ "tunnel", "surveys", "test", "dirsredirect", "syncuser", "survexpos" ]
for k in self.results_order:
self.results[k]=[]
self.tfile = "import_profile.json"
@@ -191,12 +206,12 @@ class JobQueue():
self.results["runlabel"].append(self.runlabel)
for i in self.queue:
- start = time.time()
- i[1]() # looks ugly but invokes function passed in the second item in the tuple
- duration = time.time()-start
- print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
- self.results[i[0]].append(duration)
-
+ start = time.time()
+ i[1]() # looks ugly but invokes function passed in the second item in the tuple
+ duration = time.time()-start
+ print "\n*- Ended \"", i[0], "\" %.1f seconds" % duration
+ self.results[i[0]].append(duration)
+
with open(self.tfile, 'w') as f:
json.dump(self.results, f)
@@ -207,7 +222,7 @@ class JobQueue():
# currently uses django db whatever it was. CHANGE this to explicitly use
# a new sqlite3 db and then import the sql dump of that into the troggle db
# instead of loading directly into the troggle sqlite db.
- # in-menmor ":memory:" sqlite is ~ 7x faster and all of troggle can be
+ # in-memory ":memory:" sqlite is ~ 7x faster and all of troggle can be
# loaded in 6 minutes that way
djconn = django.db.connection
from dump import _iterdump
@@ -221,52 +236,76 @@ class JobQueue():
def showprofile(self):
"""Prints out the time it took to run the jobqueue"""
for k in self.results_order:
- percen=0
- lst = self.results[k]
- if k == "runlabel":
- r = lst[len(lst)-1]
- print '%15s %s' % (k,r)
+ if k =="dirsredirect":
+ break
+ elif k =="syncuser":
+ break
+ elif k =="test":
+ break
elif k =="date":
- # Calculate dates as days before present to one decimal place
- r = lst[len(lst)-1]
- if len(lst)>2:
- days = (lst[len(lst)-2]-r)/(24*60*60)
- print '%15s %8.1f days ago' % (k,days)
- elif len(lst)>2:
- e = len(lst)-1
- percen = 100* (lst[e] - lst[e-1])/lst[e-1]
- if abs(percen) >0.1:
- print '%15s %8.1f%%' % (k, percen)
- else:
- print '%15s ' % (k)
+ print " days ago ",
+ else:
+ print '%9s (s)' % k,
+ percen=0
+ r = self.results[k]
+ #print "min=",min
+
+ for i in range(len(r)):
+ if k == "runlabel":
+ if r[i]:
+ rp = r[i]
+ else:
+ rp = " - "
+ print '%8s' % rp,
+ elif k =="date":
+ # Calculate dates as days before present
+ if r[i]:
+ if i == len(r)-1:
+ print " this",
+ else:
+ # prints one place to the left of where you expect
+ days = (r[i]-r[len(r)-1])/(24*60*60)
+ print '%8.2f' % days,
+ elif r[i]:
+ print '%8.1f' % r[i],
+ if i == len(r)-1 and r[i-1]:
+ percen = 100* (r[i] - r[i-1])/r[i-1]
+ if abs(percen) >0.1:
+ print '%8.1f%%' % percen,
+ else:
+ print " - ",
+ print ""
return True
def usage():
print("""Usage is 'python databaseReset.py <command> [runlabel]'
where command is:
- reset - this is normal usage, clear database and reread everything from files - time-consuming
+ reset - normal usage: clear database and reread everything from files - time-consuming
caves - read in the caves
- logbooks - read in just the logbooks
+ logbooks - read in the logbooks
people - read in the people from folk.csv
- QMs - read in the QM csv files
- reload_db - clear database (delete everything) and make empty tables
- scans - NOT the scanned surveynotes ?!
+ QMs - read in the QM csv files (older caves only)
+ reinit - clear database (delete everything) and make empty tables. Import nothing.
+ scans - the survey scans in all the wallets
survex - read in the survex files - all the survex blocks
- surveys - read in the scanned surveynotes
- tunnel - read in the Tunnel files - which scans the surveyscans too
-
survexpos - just the Pos out of the survex files (not part of reset)
+ tunnel - read in the Tunnel files - which scans the survey scans too
+
resetend - (archaic?)
writecaves - *disabled* (archaic?)
autologbooks - read in autologbooks (what are these?)
dumplogbooks - write out autologbooks (not working?)
- syncuser - needed after reloading database rom SQL backup
+ syncuser - needed after reloading database from SQL backup
+ surveys - read in scans by expo, must run after "people". Not used.
test - testing...
and [runlabel] is an optional string identifying this run of the script
in the stored profiling data 'import-profile.json'
+
+ caves and logbooks must be run on an empty db before the others as they
+ set up db tables used by the others.
""")
if __name__ == "__main__":
@@ -275,31 +314,35 @@ if __name__ == "__main__":
import django
django.setup()
- runlabel = sys.argv[len(sys.argv)-1]
+ if len(sys.argv)>2:
+ runlabel = sys.argv[len(sys.argv)-1]
+ else:
+ runlabel=None
+
jq = JobQueue(runlabel)
-
- if "test" in sys.argv:
- jq.enq("reload",reload_db)
- jq.enq("makedirs",make_dirs)
+
+ if len(sys.argv)==1:
+ usage()
+ exit()
+ elif "test" in sys.argv:
+ jq.enq("reinit",reinit_db)
+ jq.enq("dirsredirect",dirsredirect)
jq.enq("caves",import_caves)
- jq.enq("survex",import_survex)
- jq.enq("surveys",import_surveys)
-
+ #jq.enq("people",import_people)
+ #jq.enq("logbooks",import_logbooks)
elif "caves" in sys.argv:
jq.enq("caves",import_caves)
elif "logbooks" in sys.argv:
- # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
jq.enq("logbooks",import_logbooks)
elif "people" in sys.argv:
- jq.enq("logbooks",import_people)
+ jq.enq("people",import_people)
elif "QMs" in sys.argv:
jq.enq("QMs",import_QMs)
elif "reload_db" in sys.argv:
jq.enq("reload",reload_db)
elif "reset" in sys.argv:
- jq.enq("reload",reload_db)
- jq.enq("makedirs",make_dirs)
- jq.enq("redirect",pageredirects)
+ jq.enq("reinit",reinit_db)
+ jq.enq("dirsredirect",dirsredirect)
jq.enq("caves",import_caves)
jq.enq("people",import_people)
jq.enq("scans",import_surveyscans)
@@ -307,16 +350,12 @@ if __name__ == "__main__":
jq.enq("QMs",import_QMs)
jq.enq("survex",import_survex)
jq.enq("tunnel",import_tunnelfiles)
- jq.enq("surveys",import_surveys)
elif "scans" in sys.argv:
jq.enq("scans",import_surveyscans)
elif "survex" in sys.argv:
- # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
jq.enq("survex",import_survex)
elif "survexpos" in sys.argv:
- # management.call_command('syncdb', interactive=False) # this sets the path so that import settings works in import_survex
- import parsers.survex
- jq.enq("survexpos",parsers.survex.LoadPos)
+ jq.enq("survexpos",import_survexpos)
elif "surveys" in sys.argv:
jq.enq("surveys",import_surveys)
elif "tunnel" in sys.argv:
@@ -336,8 +375,9 @@ if __name__ == "__main__":
elif "dumplogbooks" in sys.argv:
dumplogbooks()
else:
- print("%s not recognised" % sys.argv)
usage()
+ print("%s not recognised as a command." % sys.argv[1])
+ exit()
jq.run()
jq.showprofile()
diff --git a/parsers/caves.py b/parsers/caves.py
index 606007f..4f65675 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -152,7 +152,7 @@ def readcave(filename):
slug = slug,
primary = primary)
except:
- message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+ message = " ! Can't find text (slug): %s, skipping %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -164,7 +164,7 @@ def readcave(filename):
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
- message = "Entrance text (slug) %s missing %s" % (slug, context)
+ message = " ! Entrance text (slug) %s missing %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -172,14 +172,14 @@ def readcave(filename):
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems and printwarnings:
- message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
+ message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname,
"min": minItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
if maxItems is not None and len(items) > maxItems and printwarnings:
- message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
+ message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
"itemname": itemname,
"max": maxItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
diff --git a/parsers/people.py b/parsers/people.py
index f7e2f50..32ab2c5 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -58,7 +58,7 @@ def LoadPersonsExpos():
header = dict(zip(headers, range(len(headers))))
# make expeditions
- print("Loading expeditions")
+ print(" - Loading expeditions")
years = headers[5:]
for year in years:
@@ -68,7 +68,7 @@ def LoadPersonsExpos():
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
# make persons
- print("Loading personexpeditions")
+ print(" - Loading personexpeditions")
for personline in personreader:
name = personline[header["Name"]]
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 9bd063d..450725c 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None):
scanObj.save()
except (IOError, OSError):
yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
- print("No folder found for " + expedition.year + " at:- " + yearPath)
+ print(" ! No folder found for " + expedition.year + " at:- " + yearPath)
# dead
def parseSurveys(logfile=None):
try:
readSurveysFromCSV()
except (IOError, OSError):
- print("Survey CSV not found..")
+ print(" ! Survey CSV not found..")
pass
+ print " - Loading scans by expedition year"
for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then
+ print "%s" % expedition,
parseSurveyScans(expedition)
# dead
@@ -190,7 +192,7 @@ def GetListDir(sdir):
def LoadListScansFile(survexscansfolder):
gld = [ ]
- # flatten out any directories in these book files
+ # flatten out any directories in these wallet folders - should not be any
for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
if fisdiryf:
gld.extend(GetListDir(ffyf))
@@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder):
for (fyf, ffyf, fisdiryf) in gld:
#assert not fisdiryf, ffyf
- if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
+ if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
survexscansingle.save()
@@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder):
# and builds up the models we can access later
def LoadListScans():
- print('Loading Survey Scans...')
+ print(' - Loading Survey Scans... (deleting all objects first)')
SurvexScanSingle.objects.all().delete()
SurvexScansFolder.objects.all().delete()
@@ -221,12 +223,14 @@ def LoadListScans():
# iterate into the surveyscans directory
+ print ' - ',
for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
if not fisdir:
continue
# do the year folders
if re.match(r"\d\d\d\d$", f):
+ print "%s" % f,
for fy, ffy, fisdiry in GetListDir(ff):
if fisdiry:
assert fisdiry, ffy