diff options
Diffstat (limited to 'parsers/people.py')
-rw-r--r-- | parsers/people.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/parsers/people.py b/parsers/people.py index eb877f2..34a5ff3 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -4,6 +4,8 @@ from django.conf import settings import troggle.core.models as models import csv, re, datetime, os, shutil from utils import save_carefully +from HTMLParser import HTMLParser +from unidecode import unidecode def saveMugShot(mugShotPath, mugShotFilename, person): if mugShotFilename.startswith(r'i/'): #if filename in cell has the directory attached (I think they all do), remove it @@ -132,11 +134,12 @@ def GetPersonExpeditionNameLookup(expedition): print("Calculating GetPersonExpeditionNameLookup for " + expedition.year) personexpeditions = models.PersonExpedition.objects.filter(expedition=expedition) + htmlparser = HTMLParser() for personexpedition in personexpeditions: possnames = [ ] - f = personexpedition.person.first_name.lower() - l = personexpedition.person.last_name.lower() - full = personexpedition.person.fullname.lower() + f = unidecode(htmlparser.unescape(personexpedition.person.first_name.lower())) + l = unidecode(htmlparser.unescape(personexpedition.person.last_name.lower())) + full = unidecode(htmlparser.unescape(personexpedition.person.fullname.lower())) if l: possnames.append(f + " " + l) possnames.append(f + " " + l[0]) @@ -154,6 +157,8 @@ def GetPersonExpeditionNameLookup(expedition): possnames.append(personexpedition.nickname.lower() + " " + l) if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames: possnames.append(personexpedition.nickname.lower() + " " + l[0]) + if str(personexpedition.nickname.lower() + l[0]) not in possnames: + possnames.append(personexpedition.nickname.lower() + l[0]) for possname in possnames: if possname in res: |