summaryrefslogtreecommitdiffstats
path: root/parsers/people.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2023-10-01 17:53:25 +0300
committerPhilip Sargent <philip.sargent@gmail.com>2023-10-01 17:53:25 +0300
commitf161ed3cf445768e88001e9d339e78318d16c137 (patch)
tree664bd20032b3f3f8ef926d5fb4a6e2bcfb93a43b /parsers/people.py
parentd8aad0ba2b91a61308e50375376dd1594b2dffa7 (diff)
downloadtroggle-f161ed3cf445768e88001e9d339e78318d16c137.tar.gz
troggle-f161ed3cf445768e88001e9d339e78318d16c137.tar.bz2
troggle-f161ed3cf445768e88001e9d339e78318d16c137.zip
fixing accented names
Diffstat (limited to 'parsers/people.py')
-rw-r--r--parsers/people.py15
1 files changed, 13 insertions, 2 deletions
diff --git a/parsers/people.py b/parsers/people.py
index c0ffa10..01998e1 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -65,14 +65,24 @@ def parse_blurb(personline, header, person):
slug_cache = {}
def troggle_slugify(longname):
"""Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
+ This function copied intact to expoweb/scripts/make-folklist.py
"""
slug = longname.strip().lower().replace(" ","-")
+ slug = re.sub('\([^\)]*\)','',slug) # remove nickname in brackets
+ slug = slug.replace('&eacute;', 'e')
+ slug = slug.replace('&aacute;', 'a')
+ slug = slug.replace('&auml;', 'a')
+ slug = slug.replace('&', '') # otherwise just remove the &
+ slug = slug.replace(';', '') # otherwise just remove the ;
+ slug = re.sub('<[^>]*>','',slug) # remove <span-lang = "hu">
+
if len(slug) > 40: # slugfield is 50 chars
slug = slug[:40]
if slug in slug_cache:
slug_cache[slug] += 1
slug = f"{slug}_{slug_cache[slug]}"
slug_cache[slug] = 1
+
return slug
def load_people_expos():
@@ -99,8 +109,9 @@ def load_people_expos():
print(" - Loading personexpeditions")
for personline in personreader:
+ # This is all horrible: refactor it.
name = personline[header["Name"]]
- name = re.sub(r"<.*?>", "", name)
+ plainname = re.sub(r"<.*?>", "", name) # now in slugify
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
displayname = match.group(1)
@@ -113,7 +124,7 @@ def load_people_expos():
matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
lastname = matchlastname.group(1).strip()
- splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
+ splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", plainname)
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
nick = splitnick.group(2) or ""