diff options
author | Philip Sargent <philip@Muscogee.localdomain> | 2020-04-27 23:51:41 +0100 |
---|---|---|
committer | Philip Sargent <philip@Muscogee.localdomain> | 2020-04-27 23:51:41 +0100 |
commit | a8460065a41a76ea2ea59ce09daff8e5bff51aea (patch) | |
tree | 2a5715c59511e1487a94f14d185531bc4cab431a /parsers | |
parent | 2b39dec560b8029e3d0ef6f1fae2b1ecfc759f97 (diff) | |
download | troggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.tar.gz troggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.tar.bz2 troggle-a8460065a41a76ea2ea59ce09daff8e5bff51aea.zip |
Thorough spring clean and profiling
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/caves.py | 8 | ||||
-rw-r--r-- | parsers/people.py | 4 | ||||
-rw-r--r-- | parsers/surveys.py | 14 |
3 files changed, 15 insertions, 11 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 606007f..4f65675 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -152,7 +152,7 @@ def readcave(filename): slug = slug, primary = primary) except: - message = "Can't find text (slug): %s, skipping %s" % (slug, context) + message = " ! Can't find text (slug): %s, skipping %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) @@ -164,7 +164,7 @@ def readcave(filename): entrance = models.Entrance.objects.get(entranceslug__slug = slug) ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: - message = "Entrance text (slug) %s missing %s" % (slug, context) + message = " ! Entrance text (slug) %s missing %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) @@ -172,14 +172,14 @@ def readcave(filename): def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) if len(items) < minItems and printwarnings: - message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), + message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items), "itemname": itemname, "min": minItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) if maxItems is not None and len(items) > maxItems and printwarnings: - message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), + message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, "max": maxItems} + context models.DataIssue.objects.create(parser='caves', message=message) diff --git a/parsers/people.py b/parsers/people.py index f7e2f50..32ab2c5 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -58,7 +58,7 @@ def LoadPersonsExpos(): header = dict(zip(headers, range(len(headers)))) # make expeditions - print("Loading expeditions") + print(" - Loading expeditions") years = headers[5:] for year in years: @@ -68,7 +68,7 @@ def LoadPersonsExpos(): save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs) # make persons - print("Loading personexpeditions") + print(" - Loading personexpeditions") for personline in personreader: name = personline[header["Name"]] diff --git a/parsers/surveys.py b/parsers/surveys.py index 9bd063d..450725c 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None): scanObj.save() except (IOError, OSError): yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year) - print("No folder found for " + expedition.year + " at:- " + yearPath) + print(" ! No folder found for " + expedition.year + " at:- " + yearPath) # dead def parseSurveys(logfile=None): try: readSurveysFromCSV() except (IOError, OSError): - print("Survey CSV not found..") + print(" ! Survey CSV not found..") pass + print " - Loading scans by expedition year" for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then + print "%s" % expedition, parseSurveyScans(expedition) # dead @@ -190,7 +192,7 @@ def GetListDir(sdir): def LoadListScansFile(survexscansfolder): gld = [ ] - # flatten out any directories in these book files + # flatten out any directories in these wallet folders - should not be any for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) @@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder): for (fyf, ffyf, fisdiryf) in gld: #assert not fisdiryf, ffyf - if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf): + if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf): survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder) survexscansingle.save() @@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder): # and builds up the models we can access later def LoadListScans(): - print('Loading Survey Scans...') + print(' - Loading Survey Scans... (deleting all objects first)') SurvexScanSingle.objects.all().delete() SurvexScansFolder.objects.all().delete() @@ -221,12 +223,14 @@ def LoadListScans(): # iterate into the surveyscans directory + print ' - ', for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")): if not fisdir: continue # do the year folders if re.match(r"\d\d\d\d$", f): + print "%s" % f, for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: assert fisdiry, ffy |