summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorPhilip Sargent <philip@Muscogee.localdomain>2020-04-27 23:51:41 +0100
committerPhilip Sargent <philip@Muscogee.localdomain>2020-04-27 23:51:41 +0100
commita8460065a41a76ea2ea59ce09daff8e5bff51aea (patch)
tree2a5715c59511e1487a94f14d185531bc4cab431a /parsers
parent2b39dec560b8029e3d0ef6f1fae2b1ecfc759f97 (diff)
downloadtroggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.tar.gz
troggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.tar.bz2
troggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.zip
Thorough spring clean and profiling
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py8
-rw-r--r--parsers/people.py4
-rw-r--r--parsers/surveys.py14
3 files changed, 15 insertions, 11 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 606007f..4f65675 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -152,7 +152,7 @@ def readcave(filename):
slug = slug,
primary = primary)
except:
- message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+ message = " ! Can't find text (slug): %s, skipping %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -164,7 +164,7 @@ def readcave(filename):
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
- message = "Entrance text (slug) %s missing %s" % (slug, context)
+ message = " ! Entrance text (slug) %s missing %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -172,14 +172,14 @@ def readcave(filename):
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems and printwarnings:
- message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
+ message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname,
"min": minItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
if maxItems is not None and len(items) > maxItems and printwarnings:
- message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
+ message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
"itemname": itemname,
"max": maxItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
diff --git a/parsers/people.py b/parsers/people.py
index f7e2f50..32ab2c5 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -58,7 +58,7 @@ def LoadPersonsExpos():
header = dict(zip(headers, range(len(headers))))
# make expeditions
- print("Loading expeditions")
+ print(" - Loading expeditions")
years = headers[5:]
for year in years:
@@ -68,7 +68,7 @@ def LoadPersonsExpos():
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
# make persons
- print("Loading personexpeditions")
+ print(" - Loading personexpeditions")
for personline in personreader:
name = personline[header["Name"]]
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 9bd063d..450725c 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None):
scanObj.save()
except (IOError, OSError):
yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
- print("No folder found for " + expedition.year + " at:- " + yearPath)
+ print(" ! No folder found for " + expedition.year + " at:- " + yearPath)
# dead
def parseSurveys(logfile=None):
try:
readSurveysFromCSV()
except (IOError, OSError):
- print("Survey CSV not found..")
+ print(" ! Survey CSV not found..")
pass
+ print " - Loading scans by expedition year"
for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then
+ print "%s" % expedition,
parseSurveyScans(expedition)
# dead
@@ -190,7 +192,7 @@ def GetListDir(sdir):
def LoadListScansFile(survexscansfolder):
gld = [ ]
- # flatten out any directories in these book files
+ # flatten out any directories in these wallet folders - should not be any
for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
if fisdiryf:
gld.extend(GetListDir(ffyf))
@@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder):
for (fyf, ffyf, fisdiryf) in gld:
#assert not fisdiryf, ffyf
- if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
+ if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
survexscansingle.save()
@@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder):
# and builds up the models we can access later
def LoadListScans():
- print('Loading Survey Scans...')
+ print(' - Loading Survey Scans... (deleting all objects first)')
SurvexScanSingle.objects.all().delete()
SurvexScansFolder.objects.all().delete()
@@ -221,12 +223,14 @@ def LoadListScans():
# iterate into the surveyscans directory
+ print ' - ',
for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
if not fisdir:
continue
# do the year folders
if re.match(r"\d\d\d\d$", f):
+ print "%s" % f,
for fy, ffy, fisdiry in GetListDir(ff):
if fisdiry:
assert fisdiry, ffy