summaryrefslogtreecommitdiffstats
path: root/parsers/people.py
diff options
context:
space:
mode:
Diffstat (limited to 'parsers/people.py')
-rw-r--r--parsers/people.py52
1 files changed, 37 insertions, 15 deletions
diff --git a/parsers/people.py b/parsers/people.py
index 4799ebf..c0ffa10 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -3,9 +3,9 @@ import os
import re
from html import unescape
from pathlib import Path
+from unidecode import unidecode
from django.conf import settings
-from unidecode import unidecode
from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
@@ -17,7 +17,9 @@ or they should use the same code by importing a module.
def parse_blurb(personline, header, person):
- """create mugshot Photo instance"""
+ """create mugshot Photo instance
+ Would be better if all this was done before the Person object was created in the db, then it would not
+ need re-saving (which is slow)"""
ms_filename = personline[header["Mugshot"]]
ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
@@ -60,7 +62,19 @@ def parse_blurb(personline, header, person):
person.save()
-
+slug_cache = {}
+def troggle_slugify(longname):
+ """Uniqueness enforcement too. Yes we have had two "Dave Johnson"s
+ """
+ slug = longname.strip().lower().replace(" ","-")
+ if len(slug) > 40: # slugfield is 50 chars
+ slug = slug[:40]
+ if slug in slug_cache:
+ slug_cache[slug] += 1
+ slug = f"{slug}_{slug_cache[slug]}"
+ slug_cache[slug] = 1
+ return slug
+
def load_people_expos():
"""This is where the folk.csv file is parsed to read people's names.
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
@@ -86,8 +100,11 @@ def load_people_expos():
for personline in personreader:
name = personline[header["Name"]]
- name = re.sub(r"<.*?>", "", name)
- slug = slugify(name)
+ name = re.sub(r"<.*?>", "", name)
+
+ match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
+ displayname = match.group(1)
+ slug = troggle_slugify(displayname)
firstname = ""
nick = ""
@@ -97,34 +114,39 @@ def load_people_expos():
lastname = matchlastname.group(1).strip()
splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
- fullname = splitnick.group(1)
-
- nick = splitnick.group(2) or ""
+ fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
+ nick = splitnick.group(2) or ""
fullname = fullname.strip()
- names = fullname.split(" ")
+
+ names = fullname.split(" ") # This may have more than one, e.g. "Adeleide de Diesback"
firstname = names[0]
if len(names) == 1:
- lastname = ""
+ lastname = "" # wookey special code
+
+ #restore fullname to be the whole string
+ fullname = displayname
if personline[header["VfHO member"]] == "":
vfho = False
else:
vfho = True
- coUniqueAttribs = {"first_name": firstname, "last_name": (lastname or "")}
- otherAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nick}
+ # would be better to just create the python object, and only cmmit to db once all done inc blurb
+ # and better to save all the Persons in a bulk update, then do all the PersonExpeditions
+ coUniqueAttribs = {"slug": slug}
+ otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
person = Person.objects.create(**otherAttribs, **coUniqueAttribs)
- parse_blurb(personline=personline, header=header, person=person)
+ parse_blurb(personline=personline, header=header, person=person) # saves to db too
# make person expedition from table
for year, attended in list(zip(headers, personline))[5:]:
expedition = Expedition.objects.get(year=year)
if attended == "1" or attended == "-1":
coUniqueAttribs = {"person": person, "expedition": expedition}
- otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")}
- pe = PersonExpedition.objects.create(**otherAttribs, **coUniqueAttribs)
+ # otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")}
+ pe = PersonExpedition.objects.create(**coUniqueAttribs)
print("", flush=True)