From a26310767ba885bcb403e08f8060f045e4716e08 Mon Sep 17 00:00:00 2001 From: Martin Green Date: Sun, 1 May 2011 19:32:41 +0100 Subject: edit logbooks, new logbook format, increased database normalisation --- parsers/logbooks.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) (limited to 'parsers') diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 9404414..e6b553b 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -90,12 +90,12 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ #Check for an existing copy of the current entry, and save expeditionday = expedition.get_expedition_day(date) lookupAttribs={'date':date, 'title':title} - nonLookupAttribs={'place':place, 'text':text, 'author':author, 'expedition':expedition, 'expeditionday':expeditionday, 'cave':cave, 'slug':slugify(title)[:50]} + nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]} lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs) for tripperson, time_underground in trippersons: lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} - nonLookupAttribs={'time_underground':time_underground, 'date':date, 'expeditionday':expeditionday, 'is_logbook_entry_author':(tripperson == author)} + nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} #print nonLookupAttribs save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs) @@ -328,4 +328,105 @@ def LoadLogbooks(): parsefunc(year, expedition, txt) SetDatesFromLogbookEntries(expedition) +dateRegex = re.compile('(\d\d\d\d)-(\d\d)-(\d\d)', re.S) +expeditionYearRegex = re.compile('(.*?)', re.S) +titleRegex = re.compile('

(.*?)

', re.S) +reportRegex = re.compile('(.*)\s*', re.S) +personRegex = re.compile('(.*?)', re.S) +nameAuthorRegex = re.compile('(.*?)', re.S) +TURegex = re.compile('([0-9]*\.?[0-9]+)', re.S) +locationRegex = re.compile('(.*?)', re.S) +caveRegex = re.compile('(.*?)', re.S) + +def parseAutoLogBookEntry(filename): + errors = [] + f = open(filename, "r") + contents = f.read() + f.close() + + dateMatch = dateRegex.search(contents) + if dateMatch: + year, month, day = [int(x) for x in dateMatch.groups()] + date = datetime.date(year, month, day) + else: + errors.append("Date could not be found") + + expeditionYearMatch = expeditionYearRegex.search(contents) + if expeditionYearMatch: + try: + expedition = models.Expedition.objects.get(year = expeditionYearMatch.groups()[0]) + personExpeditionNameLookup = GetPersonExpeditionNameLookup(expedition) + except models.Expedition.DoesNotExist: + errors.append("Expedition not in database") + else: + errors.append("Expediton Year could not be parsed") + titleMatch = titleRegex.search(contents) + if titleMatch: + title, = titleMatch.groups() + if len(title) > settings.MAX_LOGBOOK_ENTRY_TITLE_LENGTH: + errors.append("Title too long") + else: + errors.append("Title could not be found") + + caveMatch = caveRegex.search(contents) + if caveMatch: + caveRef, = caveMatch.groups() + try: + cave = models.getCaveByReference(caveRef) + except AssertionError: + cave = None + errors.append("Cave not found in database") + else: + cave = None + + locationMatch = locationRegex.search(contents) + if locationMatch: + location, = locationMatch.groups() + else: + location = None + + if cave is None and location is None: + errors.append("Location nor cave could not be found") + + reportMatch = reportRegex.search(contents) + if reportMatch: + report, = reportMatch.groups() + else: + errors.append("Contents could not be found") + if errors: + return errors # Easiest to bail out at this point as we need to make sure that we know which expedition to look for people from. + people = [] + for personMatch in personRegex.findall(contents): + nameAuthorMatch = nameAuthorRegex.search(contents) + if nameAuthorMatch: + author, name = nameAuthorMatch.groups() + if name.lower() in personExpeditionNameLookup: + personExpo = personExpeditionNameLookup[name.lower()] + else: + errors.append("Person could not be found in database") + author = bool(author) + else: + errors.append("Persons name could not be found") + + TUMatch = TURegex.search(contents) + if TUMatch: + TU, = TUMatch.groups() + else: + errors.append("TU could not be found") + if not errors: + people.append((name, author, TU)) + if errors: + return errors # Bail out before commiting to the database + logbookEntry = models.LogbookEntry(date = date, + expedition = expedition, + title = title, cave = cave, place = location, + text = report, slug = slugify(title)[:50], + filename = filename) + logbookEntry.save() + for name, author, TU in people: + models.PersonTrip(personexpedition = personExpo, + time_underground = TU, + logbook_entry = logbookEntry, + is_logbook_entry_author = author).save() + print logbookEntry -- cgit v1.2.3