diff options
Diffstat (limited to 'parsers/logbooks.py')
-rw-r--r-- | parsers/logbooks.py | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 01800a1..ded90e4 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -40,7 +40,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground): tripperson = re.sub(round_bracket_regex, "", tripperson).strip() personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) if not personyear: - print(" - No name match for: '%s'" % tripperson) + print((" - No name match for: '%s'" % tripperson)) message = "No name match for: '%s' in year '%s'" % (tripperson, expedition.year) models.DataIssue.objects.create(parser='logbooks', message=message) res.append((personyear, logtime_underground)) @@ -72,11 +72,11 @@ def GetTripCave(place): #need to be fuzzier about matching here. Already a very return tripCaveRes elif len(tripCaveRes)>1: - print("Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes)) - correctIndex=input("type list index of correct cave") + print(("Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes))) + correctIndex=eval(input("type list index of correct cave")) return tripCaveRes[correctIndex] else: - print("No cave found for place " , place) + print(("No cave found for place " , place)) return logentries = [] # the entire logbook is a single object: a list of entries @@ -92,7 +92,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground) if not author: - print(" - Skipping logentry: " + title + " - no author for entry") + print((" - Skipping logentry: " + title + " - no author for entry")) message = "Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year) models.DataIssue.objects.create(parser='logbooks', message=message) return @@ -175,7 +175,7 @@ def Parseloghtmltxt(year, expedition, txt): ''', trippara) if not s: if not re.search(r"Rigging Guide", trippara): - print("can't parse: ", trippara) # this is 2007 which needs editing + print(("can't parse: ", trippara)) # this is 2007 which needs editing #assert s, trippara continue tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() @@ -203,7 +203,7 @@ def Parseloghtmltxt(year, expedition, txt): def Parseloghtml01(year, expedition, txt): tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt) for trippara in tripparas: - s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara) + s = re.match("(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara) assert s, trippara[:300] tripheader, triptext = s.group(1), s.group(2) mtripid = re.search(r'<a id="(.*?)"', tripheader) @@ -251,7 +251,7 @@ def Parseloghtml01(year, expedition, txt): def Parseloghtml03(year, expedition, txt): tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt) for trippara in tripparas: - s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara) + s = re.match("(?s)\s*<p>(.*?)</p>(.*)$", trippara) assert s, trippara tripheader, triptext = s.group(1), s.group(2) tripheader = re.sub(r" ", " ", tripheader) @@ -261,7 +261,7 @@ def Parseloghtml03(year, expedition, txt): if re.match("T/U|Time underwater", sheader[-1]): tu = sheader.pop() if len(sheader) != 3: - print("header not three pieces", sheader) + print(("header not three pieces", sheader)) tripdate, triptitle, trippeople = sheader ldate = ParseDate(tripdate.strip(), year) triptitles = triptitle.split(" , ") @@ -325,35 +325,36 @@ def LoadLogbookForExpedition(expedition): #print " - Cache is more than 30 days old." bad_cache= True if bad_cache: - print " - Cache is either stale or more than 30 days old. Deleting it." + print(" - Cache is either stale or more than 30 days old. Deleting it.") os.remove(cache_filename) logentries=[] raise - print(" - Reading cache: " + cache_filename ) + print((" - Reading cache: " + cache_filename )) try: with open(cache_filename, "rb") as f: logentries = pickle.load(f) - print " - Loaded ", len(logentries), " objects" + print(" - Loaded ", len(logentries), " objects") logbook_cached = True except: - print " - Failed to load corrupt cache. Deleting it.\n" + print(" - Failed to load corrupt cache. Deleting it.\n") os.remove(cache_filename) logentries=[] + raise except: print(" - Opening logbook: ") - file_in = open(os.path.join(expowebbase, year_settings[0])) + file_in = open(os.path.join(expowebbase, year_settings[0]),'rb') txt = file_in.read().decode("latin1") file_in.close() parsefunc = year_settings[1] logbook_parseable = True - print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1]) + print((" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])) if logbook_parseable: parser = globals()[parsefunc] parser(expedition.year, expedition, txt) SetDatesFromLogbookEntries(expedition) # and this has also stored all the objects in logentries[] - print " - Storing " , len(logentries), " log entries" + print(" - Storing " , len(logentries), " log entries") cache_filename = os.path.join(expowebbase, year_settings[0])+".cache" with open(cache_filename, "wb") as f: pickle.dump(logentries, f, 2) @@ -370,7 +371,7 @@ def LoadLogbookForExpedition(expedition): i +=1 else: try: - file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE)) + file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE),'rb') txt = file_in.read().decode("latin1") file_in.close() logbook_parseable = True @@ -378,7 +379,7 @@ def LoadLogbookForExpedition(expedition): parsefunc = settings.DEFAULT_LOGBOOK_PARSER except (IOError): logbook_parseable = False - print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year) + print(("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)) #return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count()) @@ -391,7 +392,7 @@ def LoadLogbooks(): # Fetch all expos expos = models.Expedition.objects.all() for expo in expos: - print("\nLoading Logbook for: " + expo.year) + print(("\nLoading Logbook for: " + expo.year)) # Load logbook for expo LoadLogbookForExpedition(expo) |