diff options
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r-- | parsers/logbooks.py | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 5bbbd98..0bd7370 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -18,7 +18,7 @@ from fuzzywuzzy import fuzz from utils import save_carefully -# +# # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc) # @@ -111,7 +111,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ lookupAttribs={'date':date, 'title':title} nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type} lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs) - + for tripperson, time_underground in trippersons: lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} @@ -216,7 +216,7 @@ def Parseloghtml01(year, expedition, txt): tripdate, triptitle, trippeople = tripheader.split("|") ldate = ParseDate(tripdate.strip(), year) - + mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext) if mtu: tu = mtu.group(1) @@ -228,7 +228,7 @@ def Parseloghtml01(year, expedition, txt): tripcave = triptitles[0].strip() ltriptext = triptext - + mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext) if mtail: #print mtail.group(0) @@ -240,7 +240,6 @@ def Parseloghtml01(year, expedition, txt): ltriptext = re.sub(r"</?u>", "_", ltriptext) ltriptext = re.sub(r"</?i>", "''", ltriptext) ltriptext = re.sub(r"</?b>", "'''", ltriptext) - #print ldate, trippeople.strip() # could includ the tripid (url link for cross referencing) @@ -301,7 +300,7 @@ def SetDatesFromLogbookEntries(expedition): def LoadLogbookForExpedition(expedition): """ Parses all logbook entries for one expedition """ - + expowebbase = os.path.join(settings.EXPOWEB, "years") yearlinks = settings.LOGBOOK_PARSER_SETTINGS @@ -344,7 +343,7 @@ def LoadLogbooks(): expos = models.Expedition.objects.all() for expo in expos: print("\nLoading Logbook for: " + expo.year) - + # Load logbook for expo LoadLogbookForExpedition(expo) @@ -378,17 +377,17 @@ def parseAutoLogBookEntry(filename): expedition = models.Expedition.objects.get(year = expeditionYearMatch.groups()[0]) personExpeditionNameLookup = GetPersonExpeditionNameLookup(expedition) except models.Expedition.DoesNotExist: - errors.append("Expedition not in database") + errors.append("Expedition not in database") else: - errors.append("Expediton Year could not be parsed") + errors.append("Expediton Year could not be parsed") titleMatch = titleRegex.search(contents) if titleMatch: title, = titleMatch.groups() if len(title) > settings.MAX_LOGBOOK_ENTRY_TITLE_LENGTH: - errors.append("Title too long") + errors.append("Title too long") else: - errors.append("Title could not be found") + errors.append("Title could not be found") caveMatch = caveRegex.search(contents) if caveMatch: @@ -397,24 +396,24 @@ def parseAutoLogBookEntry(filename): cave = models.getCaveByReference(caveRef) except AssertionError: cave = None - errors.append("Cave not found in database") + errors.append("Cave not found in database") else: cave = None locationMatch = locationRegex.search(contents) if locationMatch: - location, = locationMatch.groups() + location, = locationMatch.groups() else: location = None - + if cave is None and location is None: - errors.append("Location nor cave could not be found") + errors.append("Location nor cave could not be found") reportMatch = reportRegex.search(contents) if reportMatch: report, = reportMatch.groups() else: - errors.append("Contents could not be found") + errors.append("Contents could not be found") if errors: return errors # Easiest to bail out at this point as we need to make sure that we know which expedition to look for people from. people = [] @@ -429,7 +428,7 @@ def parseAutoLogBookEntry(filename): author = bool(author) else: errors.append("Persons name could not be found") - + TUMatch = TURegex.search(contents) if TUMatch: TU, = TUMatch.groups() @@ -439,15 +438,15 @@ def parseAutoLogBookEntry(filename): people.append((name, author, TU)) if errors: return errors # Bail out before commiting to the database - logbookEntry = models.LogbookEntry(date = date, + logbookEntry = models.LogbookEntry(date = date, expedition = expedition, - title = title, cave = cave, place = location, + title = title, cave = cave, place = location, text = report, slug = slugify(title)[:50], filename = filename) logbookEntry.save() for name, author, TU in people: - models.PersonTrip(personexpedition = personExpo, - time_underground = TU, - logbook_entry = logbookEntry, + models.PersonTrip(personexpedition = personExpo, + time_underground = TU, + logbook_entry = logbookEntry, is_logbook_entry_author = author).save() print(logbookEntry) |