summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
authorMartin Green <martin.speleo@gmail.com>2011-05-01 19:32:41 +0100
committerMartin Green <martin.speleo@gmail.com>2011-05-01 19:32:41 +0100
commita26310767ba885bcb403e08f8060f045e4716e08 (patch)
tree935cdf586e354e94757c1f8dc88584e5f3a491a6 /parsers
parentd38a767d7ca1b205be82b8cb674746f638f5fb52 (diff)
downloadtroggle-a26310767ba885bcb403e08f8060f045e4716e08.tar.gz
troggle-a26310767ba885bcb403e08f8060f045e4716e08.tar.bz2
troggle-a26310767ba885bcb403e08f8060f045e4716e08.zip
edit logbooks, new logbook format, increased database normalisation
Diffstat (limited to 'parsers')
-rw-r--r--parsers/logbooks.py105
1 files changed, 103 insertions, 2 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 9404414..e6b553b 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -90,12 +90,12 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
#Check for an existing copy of the current entry, and save
expeditionday = expedition.get_expedition_day(date)
lookupAttribs={'date':date, 'title':title}
- nonLookupAttribs={'place':place, 'text':text, 'author':author, 'expedition':expedition, 'expeditionday':expeditionday, 'cave':cave, 'slug':slugify(title)[:50]}
+ nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons:
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
- nonLookupAttribs={'time_underground':time_underground, 'date':date, 'expeditionday':expeditionday, 'is_logbook_entry_author':(tripperson == author)}
+ nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
#print nonLookupAttribs
save_carefully(models.PersonTrip, lookupAttribs, nonLookupAttribs)
@@ -328,4 +328,105 @@ def LoadLogbooks():
parsefunc(year, expedition, txt)
SetDatesFromLogbookEntries(expedition)
+dateRegex = re.compile('<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
+expeditionYearRegex = re.compile('<span\s+class="expeditionyear">(.*?)</span>', re.S)
+titleRegex = re.compile('<H1>(.*?)</H1>', re.S)
+reportRegex = re.compile('<div\s+class="report">(.*)</div>\s*</body>', re.S)
+personRegex = re.compile('<div\s+class="person">(.*?)</div>', re.S)
+nameAuthorRegex = re.compile('<span\s+class="name(,author|)">(.*?)</span>', re.S)
+TURegex = re.compile('<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
+locationRegex = re.compile('<span\s+class="location">(.*?)</span>', re.S)
+caveRegex = re.compile('<span\s+class="cave">(.*?)</span>', re.S)
+
+def parseAutoLogBookEntry(filename):
+ errors = []
+ f = open(filename, "r")
+ contents = f.read()
+ f.close()
+
+ dateMatch = dateRegex.search(contents)
+ if dateMatch:
+ year, month, day = [int(x) for x in dateMatch.groups()]
+ date = datetime.date(year, month, day)
+ else:
+ errors.append("Date could not be found")
+
+ expeditionYearMatch = expeditionYearRegex.search(contents)
+ if expeditionYearMatch:
+ try:
+ expedition = models.Expedition.objects.get(year = expeditionYearMatch.groups()[0])
+ personExpeditionNameLookup = GetPersonExpeditionNameLookup(expedition)
+ except models.Expedition.DoesNotExist:
+ errors.append("Expedition not in database")
+ else:
+ errors.append("Expediton Year could not be parsed")
+ titleMatch = titleRegex.search(contents)
+ if titleMatch:
+ title, = titleMatch.groups()
+ if len(title) > settings.MAX_LOGBOOK_ENTRY_TITLE_LENGTH:
+ errors.append("Title too long")
+ else:
+ errors.append("Title could not be found")
+
+ caveMatch = caveRegex.search(contents)
+ if caveMatch:
+ caveRef, = caveMatch.groups()
+ try:
+ cave = models.getCaveByReference(caveRef)
+ except AssertionError:
+ cave = None
+ errors.append("Cave not found in database")
+ else:
+ cave = None
+
+ locationMatch = locationRegex.search(contents)
+ if locationMatch:
+ location, = locationMatch.groups()
+ else:
+ location = None
+
+ if cave is None and location is None:
+ errors.append("Location nor cave could not be found")
+
+ reportMatch = reportRegex.search(contents)
+ if reportMatch:
+ report, = reportMatch.groups()
+ else:
+ errors.append("Contents could not be found")
+ if errors:
+ return errors # Easiest to bail out at this point as we need to make sure that we know which expedition to look for people from.
+ people = []
+ for personMatch in personRegex.findall(contents):
+ nameAuthorMatch = nameAuthorRegex.search(contents)
+ if nameAuthorMatch:
+ author, name = nameAuthorMatch.groups()
+ if name.lower() in personExpeditionNameLookup:
+ personExpo = personExpeditionNameLookup[name.lower()]
+ else:
+ errors.append("Person could not be found in database")
+ author = bool(author)
+ else:
+ errors.append("Persons name could not be found")
+
+ TUMatch = TURegex.search(contents)
+ if TUMatch:
+ TU, = TUMatch.groups()
+ else:
+ errors.append("TU could not be found")
+ if not errors:
+ people.append((name, author, TU))
+ if errors:
+ return errors # Bail out before commiting to the database
+ logbookEntry = models.LogbookEntry(date = date,
+ expedition = expedition,
+ title = title, cave = cave, place = location,
+ text = report, slug = slugify(title)[:50],
+ filename = filename)
+ logbookEntry.save()
+ for name, author, TU in people:
+ models.PersonTrip(personexpedition = personExpo,
+ time_underground = TU,
+ logbook_entry = logbookEntry,
+ is_logbook_entry_author = author).save()
+ print logbookEntry