diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2025-02-19 18:32:58 +0200 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2025-02-19 18:32:58 +0200 |
commit | aeaf7cf57f2360ef4a6d459d5e84901f301cfb8b (patch) | |
tree | d276ec344370e3c3f91aee330bbcc4fcd82e9936 /parsers/people.py | |
parent | a950cc60d9400c553b853f3a3308edaa8ea645cc (diff) | |
download | troggle-aeaf7cf57f2360ef4a6d459d5e84901f301cfb8b.tar.gz troggle-aeaf7cf57f2360ef4a6d459d5e84901f301cfb8b.tar.bz2 troggle-aeaf7cf57f2360ef4a6d459d5e84901f301cfb8b.zip |
make more robust against data error
Diffstat (limited to 'parsers/people.py')
-rw-r--r-- | parsers/people.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/parsers/people.py b/parsers/people.py index b6b8159..0a05efb 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -137,10 +137,15 @@ def load_people_expos(): nick = "" rawlastname = personline[header["Lastname"]].strip() - matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname) - lastname = matchlastname.group(1).strip() + if rawlastname == "": + print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.") + if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname): + lastname = matchlastname.group(1).strip() + else: + print(f"MATCH FAIL {personline=}\n {slug=}\n {name=}\n {rawlastname=}") + exit(1) - splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", plainname) + splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname) fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names nick = splitnick.group(2) or "" |