diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2023-10-01 17:53:25 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2023-10-01 17:53:25 +0300 |
commit | f161ed3cf445768e88001e9d339e78318d16c137 (patch) | |
tree | 664bd20032b3f3f8ef926d5fb4a6e2bcfb93a43b /parsers/people.py | |
parent | d8aad0ba2b91a61308e50375376dd1594b2dffa7 (diff) | |
download | troggle-f161ed3cf445768e88001e9d339e78318d16c137.tar.gz troggle-f161ed3cf445768e88001e9d339e78318d16c137.tar.bz2 troggle-f161ed3cf445768e88001e9d339e78318d16c137.zip |
fixing accented names
Diffstat (limited to 'parsers/people.py')
-rw-r--r-- | parsers/people.py | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/parsers/people.py b/parsers/people.py index c0ffa10..01998e1 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -65,14 +65,24 @@ def parse_blurb(personline, header, person): slug_cache = {} def troggle_slugify(longname): """Uniqueness enforcement too. Yes we have had two "Dave Johnson"s + This function copied intact to expoweb/scripts/make-folklist.py """ slug = longname.strip().lower().replace(" ","-") + slug = re.sub('\([^\)]*\)','',slug) # remove nickname in brackets + slug = slug.replace('é', 'e') + slug = slug.replace('á', 'a') + slug = slug.replace('ä', 'a') + slug = slug.replace('&', '') # otherwise just remove the & + slug = slug.replace(';', '') # otherwise just remove the ; + slug = re.sub('<[^>]*>','',slug) # remove <span-lang = "hu"> + if len(slug) > 40: # slugfield is 50 chars slug = slug[:40] if slug in slug_cache: slug_cache[slug] += 1 slug = f"{slug}_{slug_cache[slug]}" slug_cache[slug] = 1 + return slug def load_people_expos(): @@ -99,8 +109,9 @@ def load_people_expos(): print(" - Loading personexpeditions") for personline in personreader: + # This is all horrible: refactor it. name = personline[header["Name"]] - name = re.sub(r"<.*?>", "", name) + plainname = re.sub(r"<.*?>", "", name) # now in slugify match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets displayname = match.group(1) @@ -113,7 +124,7 @@ def load_people_expos(): matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname) lastname = matchlastname.group(1).strip() - splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name) + splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", plainname) fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names nick = splitnick.group(2) or "" |