diff options
author | Sam Wenham <sam@wenhams.co.uk> | 2020-02-24 15:04:07 +0000 |
---|---|---|
committer | Sam Wenham <sam@wenhams.co.uk> | 2020-02-24 15:04:07 +0000 |
commit | 43dfe946b6385037141b94b775112037fe5f032f (patch) | |
tree | 72c352e67166f0702ab93d7063c902ff1bd2c621 /parsers | |
parent | 656ddcfe93061f2f82160ff8cc45b98d27e28bd3 (diff) | |
download | troggle-django-1.10.tar.gz troggle-django-1.10.tar.bz2 troggle-django-1.10.zip |
Just removing dud whitespacedjango-1.10
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/QMs.py | 16 | ||||
-rw-r--r-- | parsers/caves.py | 6 | ||||
-rw-r--r-- | parsers/logbooks.py | 43 | ||||
-rw-r--r-- | parsers/people.py | 30 | ||||
-rw-r--r-- | parsers/subcaves.py | 8 | ||||
-rw-r--r-- | parsers/survex.py | 26 | ||||
-rw-r--r-- | parsers/surveys.py | 44 |
7 files changed, 87 insertions, 86 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py index 602b7af..9de3b4d 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -30,7 +30,7 @@ def parseCaveQMs(cave,inputFile): kh=Cave.objects.get(official_name="Kaninchenhöhle") except Cave.DoesNotExist: print("KH is not in the database. Please run parsers.cavetab first.") - parse_KH_QMs(kh, inputFile=inputFile) + parse_KH_QMs(kh, inputFile=inputFile) return qmPath = settings.EXPOWEB+inputFile @@ -46,7 +46,7 @@ def parseCaveQMs(cave,inputFile): if cave=='stein': placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 204", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":steinBr}) elif cave=='hauch': - placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl}) + placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl}) if hadToCreate: print(cave + " placeholder logbook entry for " + str(year) + " added to database") QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb") @@ -59,7 +59,7 @@ def parseCaveQMs(cave,inputFile): newQM.grade=line[1] newQM.area=line[2] newQM.location_description=line[3] - + newQM.completion_description=line[4] newQM.nearest_station_description=line[5] if newQM.completion_description: # Troggle checks if QMs are completed by checking if they have a ticked_off_by trip. In the table, completion is indicated by the presence of a completion discription. @@ -74,11 +74,11 @@ def parseCaveQMs(cave,inputFile): print("overwriting " + str(preexistingQM) +"\r") else: # otherwise, print that it was ignored print("preserving " + str(preexistingQM) + ", which was edited in admin \r") - + except QM.DoesNotExist: #if there is no pre-existing QM, save the new one - newQM.save() + newQM.save() print("QM "+str(newQM) + ' added to database\r') - + except KeyError: #check on this one continue except IndexError: @@ -106,9 +106,9 @@ def parse_KH_QMs(kh, inputFile): 'nearest_station_name':res['nearest_station'], 'location_description':res['description'] } - + save_carefully(QM,lookupArgs,nonLookupArgs) - + parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv") parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv") diff --git a/parsers/caves.py b/parsers/caves.py index 606007f..bd79634 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -155,7 +155,7 @@ def readcave(filename): message = "Can't find text (slug): %s, skipping %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) - + primary = False for entrance in entrances: slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] @@ -167,7 +167,7 @@ def readcave(filename): message = "Entrance text (slug) %s missing %s" % (slug, context) models.DataIssue.objects.create(parser='caves', message=message) print(message) - + def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) @@ -177,7 +177,7 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, "min": minItems} + context models.DataIssue.objects.create(parser='caves', message=message) print(message) - + if maxItems is not None and len(items) > maxItems and printwarnings: message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items), "itemname": itemname, diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 5bbbd98..0bd7370 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -18,7 +18,7 @@ from fuzzywuzzy import fuzz from utils import save_carefully -# +# # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc) # @@ -111,7 +111,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ lookupAttribs={'date':date, 'title':title} nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type} lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs) - + for tripperson, time_underground in trippersons: lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} @@ -216,7 +216,7 @@ def Parseloghtml01(year, expedition, txt): tripdate, triptitle, trippeople = tripheader.split("|") ldate = ParseDate(tripdate.strip(), year) - + mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext) if mtu: tu = mtu.group(1) @@ -228,7 +228,7 @@ def Parseloghtml01(year, expedition, txt): tripcave = triptitles[0].strip() ltriptext = triptext - + mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext) if mtail: #print mtail.group(0) @@ -240,7 +240,6 @@ def Parseloghtml01(year, expedition, txt): ltriptext = re.sub(r"</?u>", "_", ltriptext) ltriptext = re.sub(r"</?i>", "''", ltriptext) ltriptext = re.sub(r"</?b>", "'''", ltriptext) - #print ldate, trippeople.strip() # could includ the tripid (url link for cross referencing) @@ -301,7 +300,7 @@ def SetDatesFromLogbookEntries(expedition): def LoadLogbookForExpedition(expedition): """ Parses all logbook entries for one expedition """ - + expowebbase = os.path.join(settings.EXPOWEB, "years") yearlinks = settings.LOGBOOK_PARSER_SETTINGS @@ -344,7 +343,7 @@ def LoadLogbooks(): expos = models.Expedition.objects.all() for expo in expos: print("\nLoading Logbook for: " + expo.year) - + # Load logbook for expo LoadLogbookForExpedition(expo) @@ -378,17 +377,17 @@ def parseAutoLogBookEntry(filename): expedition = models.Expedition.objects.get(year = expeditionYearMatch.groups()[0]) personExpeditionNameLookup = GetPersonExpeditionNameLookup(expedition) except models.Expedition.DoesNotExist: - errors.append("Expedition not in database") + errors.append("Expedition not in database") else: - errors.append("Expediton Year could not be parsed") + errors.append("Expediton Year could not be parsed") titleMatch = titleRegex.search(contents) if titleMatch: title, = titleMatch.groups() if len(title) > settings.MAX_LOGBOOK_ENTRY_TITLE_LENGTH: - errors.append("Title too long") + errors.append("Title too long") else: - errors.append("Title could not be found") + errors.append("Title could not be found") caveMatch = caveRegex.search(contents) if caveMatch: @@ -397,24 +396,24 @@ def parseAutoLogBookEntry(filename): cave = models.getCaveByReference(caveRef) except AssertionError: cave = None - errors.append("Cave not found in database") + errors.append("Cave not found in database") else: cave = None locationMatch = locationRegex.search(contents) if locationMatch: - location, = locationMatch.groups() + location, = locationMatch.groups() else: location = None - + if cave is None and location is None: - errors.append("Location nor cave could not be found") + errors.append("Location nor cave could not be found") reportMatch = reportRegex.search(contents) if reportMatch: report, = reportMatch.groups() else: - errors.append("Contents could not be found") + errors.append("Contents could not be found") if errors: return errors # Easiest to bail out at this point as we need to make sure that we know which expedition to look for people from. people = [] @@ -429,7 +428,7 @@ def parseAutoLogBookEntry(filename): author = bool(author) else: errors.append("Persons name could not be found") - + TUMatch = TURegex.search(contents) if TUMatch: TU, = TUMatch.groups() @@ -439,15 +438,15 @@ def parseAutoLogBookEntry(filename): people.append((name, author, TU)) if errors: return errors # Bail out before commiting to the database - logbookEntry = models.LogbookEntry(date = date, + logbookEntry = models.LogbookEntry(date = date, expedition = expedition, - title = title, cave = cave, place = location, + title = title, cave = cave, place = location, text = report, slug = slugify(title)[:50], filename = filename) logbookEntry.save() for name, author, TU in people: - models.PersonTrip(personexpedition = personExpo, - time_underground = TU, - logbook_entry = logbookEntry, + models.PersonTrip(personexpedition = personExpo, + time_underground = TU, + logbook_entry = logbookEntry, is_logbook_entry_author = author).save() print(logbookEntry) diff --git a/parsers/people.py b/parsers/people.py index 28a036a..0d253c9 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -12,22 +12,22 @@ def saveMugShot(mugShotPath, mugShotFilename, person): mugShotFilename=mugShotFilename[2:] else: mugShotFilename=mugShotFilename # just in case one doesn't - + dummyObj=models.DPhoto(file=mugShotFilename) - + #Put a copy of the file in the right place. mugShotObj.file.path is determined by the django filesystemstorage specified in models.py if not os.path.exists(dummyObj.file.path): shutil.copy(mugShotPath, dummyObj.file.path) - + mugShotObj, created = save_carefully( models.DPhoto, lookupAttribs={'is_mugshot':True, 'file':mugShotFilename}, nonLookupAttribs={'caption':"Mugshot for "+person.first_name+" "+person.last_name} ) - + if created: mugShotObj.contains_person.add(person) - mugShotObj.save() + mugShotObj.save() def parseMugShotAndBlurb(personline, header, person): """create mugshot Photo instance""" @@ -45,20 +45,20 @@ def parseMugShotAndBlurb(personline, header, person): person.save() def LoadPersonsExpos(): - + persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) personreader = csv.reader(persontab) headers = personreader.next() header = dict(zip(headers, range(len(headers)))) - + # make expeditions print("Loading expeditions") years = headers[5:] - + for year in years: lookupAttribs = {'year':year} nonLookupAttribs = {'name':"CUCC expo %s" % year} - + save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs) # make persons @@ -91,7 +91,7 @@ def LoadPersonsExpos(): person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs) parseMugShotAndBlurb(personline=personline, header=header, person=person) - + # make person expedition from table for year, attended in zip(headers, personline)[5:]: expedition = models.Expedition.objects.get(year=year) @@ -108,10 +108,10 @@ def GetPersonExpeditionNameLookup(expedition): res = Gpersonexpeditionnamelookup.get(expedition.name) if res: return res - + res = { } duplicates = set() - + print("Calculating GetPersonExpeditionNameLookup for " + expedition.year) personexpeditions = models.PersonExpedition.objects.filter(expedition=expedition) htmlparser = HTMLParser() @@ -139,16 +139,16 @@ def GetPersonExpeditionNameLookup(expedition): possnames.append(personexpedition.nickname.lower() + " " + l[0]) if str(personexpedition.nickname.lower() + l[0]) not in possnames: possnames.append(personexpedition.nickname.lower() + l[0]) - + for possname in possnames: if possname in res: duplicates.add(possname) else: res[possname] = personexpedition - + for possname in duplicates: del res[possname] - + Gpersonexpeditionnamelookup[expedition.name] = res return res diff --git a/parsers/subcaves.py b/parsers/subcaves.py index 739af44..364da0d 100644 --- a/parsers/subcaves.py +++ b/parsers/subcaves.py @@ -1,5 +1,7 @@ ''' -This module is the part of troggle that parses descriptions of cave parts (subcaves) from the legacy html files and saves them in the troggle database as instances of the model Subcave. Unfortunately, this parser can not be very flexible because the legacy format is poorly structured. +This module is the part of troggle that parses descriptions of cave parts (subcaves) from the legacy html +files and saves them in the troggle database as instances of the model Subcave. +Unfortunately, this parser can not be very flexible because the legacy format is poorly structured. ''' import sys, os @@ -29,12 +31,12 @@ def importSubcaves(cave): link[0]) subcaveFile=open(subcaveFilePath,'r') description=subcaveFile.read().decode('iso-8859-1').encode('utf-8') - + lookupAttribs={'title':link[1], 'cave':cave} nonLookupAttribs={'description':description} newSubcave=save_carefully(Subcave,lookupAttribs=lookupAttribs,nonLookupAttribs=nonLookupAttribs) - logging.info("Added " + unicode(newSubcave) + " to " + unicode(cave)) + logging.info("Added " + unicode(newSubcave) + " to " + unicode(cave)) except IOError: logging.info("Subcave import couldn't open "+subcaveFilePath) diff --git a/parsers/survex.py b/parsers/survex.py index 01f6d21..c70a80b 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -19,12 +19,12 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave): ls = sline.lower().split() ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]]) ssto = survexblock.MakeSurvexStation(ls[stardata["to"]]) - + survexleg = models.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto) if stardata["type"] == "normal": try: survexleg.tape = float(ls[stardata["tape"]]) - except ValueError: + except ValueError: print("Tape misread in", survexblock.survexfile.path) print("Stardata:", stardata) print("Line:", ls) @@ -69,7 +69,7 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave): # only save proper legs survexleg.save() - + itape = stardata.get("tape") if itape: try: @@ -106,7 +106,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): stardata = stardatadefault teammembers = [ ] - # uncomment to print out all files during parsing + # uncomment to print out all files during parsing print(" - Reading file: " + survexblock.survexfile.path) stamp = datetime.now() lineno = 0 @@ -198,7 +198,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): #print('Cave -sline ' + str(cave)) if not sline: continue - + # detect the star command mstar = regex_star.match(sline) if not mstar: @@ -214,7 +214,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): # print(' - Passage: ') #Missing "station" in stardata. continue - + # detect the star command cmd, line = mstar.groups() cmd = cmd.lower() @@ -238,7 +238,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): survexblock.save() fininclude = includesurvexfile.OpenFile() RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines) - + elif re.match("begin$(?i)", cmd): if line: newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line)) @@ -265,7 +265,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown) else: iblankbegins += 1 - + elif re.match("end$(?i)", cmd): if iblankbegins: iblankbegins -= 1 @@ -277,7 +277,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): timetaken = endstamp - stamp # print(' - Time to process: ' + str(timetaken)) return - + elif re.match("date$(?i)", cmd): if len(line) == 10: #print(' - Date found: ' + line) @@ -288,7 +288,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): survexblock.expedition = expeditions[0] survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date) survexblock.save() - + elif re.match("team$(?i)", cmd): pass # print(' - Team found: ') @@ -304,13 +304,13 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): if personexpedition: personrole.person=personexpedition.person personrole.save() - + elif cmd == "title": #print(' - Title found: ') survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave) survextitle.save() pass - + elif cmd == "require": # should we check survex version available for processing? pass @@ -329,7 +329,7 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines): stardata = stardatadefault else: assert ls[0] == "passage", line - + elif cmd == "equate": #print(' - Equate found: ') LoadSurvexEquate(survexblock, line) diff --git a/parsers/surveys.py b/parsers/surveys.py index 0eed6f0..d5dc128 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -24,7 +24,7 @@ def readSurveysFromCSV(): try: # could probably combine these two surveytab = open(os.path.join(settings.SURVEY_SCANS, "Surveys.csv")) except IOError: - import cStringIO, urllib + import cStringIO, urllib surveytab = cStringIO.StringIO(urllib.urlopen(settings.SURVEY_SCANS + "/Surveys.csv").read()) dialect=csv.Sniffer().sniff(surveytab.read()) surveytab.seek(0,0) @@ -37,7 +37,7 @@ def readSurveysFromCSV(): print("There are no expeditions in the database. Please run the logbook parser.") sys.exit() - + logging.info("Deleting all scanned images") ScannedImage.objects.all().delete() @@ -48,7 +48,7 @@ def readSurveysFromCSV(): for survey in surveyreader: # I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that. - walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) + walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) # print(walletNumberLetter.groups()) year=survey[header['Year']] @@ -139,7 +139,7 @@ def parseSurveys(logfile=None): except (IOError, OSError): print("Survey CSV not found..") pass - + for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then parseSurveyScans(expedition) @@ -169,21 +169,21 @@ def GetListDir(sdir): def LoadListScansFile(survexscansfolder): gld = [ ] - + # flatten out any directories in these book files for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath): if fisdiryf: gld.extend(GetListDir(ffyf)) else: gld.append((fyf, ffyf, fisdiryf)) - + for (fyf, ffyf, fisdiryf) in gld: #assert not fisdiryf, ffyf if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf): survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder) survexscansingle.save() - + # this iterates through the scans directories (either here or on the remote server) # and builds up the models we can access later def LoadListScans(): @@ -194,17 +194,17 @@ def LoadListScans(): SurvexScansFolder.objects.all().delete() # first do the smkhs (large kh survey scans) directory - survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "smkhs"), walletname="smkhs") + survexscansfoldersmkhs = SurvexScansFolder(fpath=os.path.join(settings.SURVEY_SCANS, "smkhs"), walletname="smkhs") if os.path.isdir(survexscansfoldersmkhs.fpath): survexscansfoldersmkhs.save() LoadListScansFile(survexscansfoldersmkhs) - - + + # iterate into the surveyscans directory for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")): if not fisdir: continue - + # do the year folders if re.match(r"\d\d\d\d$", f): for fy, ffy, fisdiry in GetListDir(ff): @@ -213,13 +213,13 @@ def LoadListScans(): survexscansfolder = SurvexScansFolder(fpath=ffy, walletname=fy) survexscansfolder.save() LoadListScansFile(survexscansfolder) - - # do the + + # do the elif f != "thumbs": survexscansfolder = SurvexScansFolder(fpath=ff, walletname=f) survexscansfolder.save() LoadListScansFile(survexscansfolder) - + def FindTunnelScan(tunnelfile, path): scansfolder, scansfile = None, None @@ -235,12 +235,12 @@ def FindTunnelScan(tunnelfile, path): print(scansfilel, len(scansfilel)) assert len(scansfilel) == 1 scansfile = scansfilel[0] - + if scansfolder: tunnelfile.survexscansfolders.add(scansfolder) if scansfile: tunnelfile.survexscans.add(scansfile) - + elif path and not re.search(r"\.(?:png|jpg|jpeg)$(?i)", path): name = os.path.split(path)[1] print("ttt", tunnelfile.tunnelpath, path, name) @@ -260,22 +260,22 @@ def SetTunnelfileInfo(tunnelfile): fin = open(ff) ttext = fin.read() fin.close() - + mtype = re.search("<(fontcolours|sketch)", ttext) #assert mtype, ff if mtype: tunnelfile.bfontcolours = (mtype.group(1)=="fontcolours") tunnelfile.npaths = len(re.findall("<skpath", ttext)) tunnelfile.save() - + # <tunnelxml tunnelversion="version2009-06-21 Matienzo" tunnelproject="ireby" tunneluser="goatchurch" tunneldate="2009-06-29 23:22:17"> # <pcarea area_signal="frame" sfscaledown="12.282584" sfrotatedeg="-90.76982" sfxtrans="11.676667377221136" sfytrans="-15.677173422877454" sfsketch="204description/scans/plan(38).png" sfstyle="" nodeconnzsetrelative="0.0"> for path, style in re.findall('<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"', ttext): FindTunnelScan(tunnelfile, path) - + # should also scan and look for survex blocks that might have been included - # and also survex titles as well. - + # and also survex titles as well. + tunnelfile.save() @@ -295,6 +295,6 @@ def LoadTunnelFiles(): elif f[-4:] == ".xml": tunnelfile = TunnelFile(tunnelpath=lf, tunnelname=os.path.split(f[:-4])[1]) tunnelfile.save() - + for tunnelfile in TunnelFile.objects.all(): SetTunnelfileInfo(tunnelfile) |