1 files changed, 111 insertions, 93 deletions
diff --git a/parsers/people.py b/parsers/people.py
index bfacc2a..47bb328 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -9,80 +9,81 @@ from pathlib import Path
 from django.conf import settings
 from unidecode import unidecode
 
-from troggle.core.models.troggle import (DataIssue, Expedition, Person,
-                                         PersonExpedition)
+from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
 from troggle.core.utils import TROG, save_carefully
 
-'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has 
+"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has 
 href links to pages in troggle which troggle does not think are right.
 The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
 or they should use the same code by importing a module.
-'''
+"""
+
 
 def parse_blurb(personline, header, person):
     """create mugshot Photo instance"""
     ms_filename = personline[header["Mugshot"]]
     ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
-    
+
     if ms_filename:
         if not ms_path.is_file():
             message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
             print(message)
-            DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
+            DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
             return
-    
-    if ms_filename.startswith('i/'):
-        #if person just has an image, add it. It has format 'i/adama2018.jpg'
+
+    if ms_filename.startswith("i/"):
+        # if person just has an image, add it. It has format 'i/adama2018.jpg'
         person.mug_shot = str(Path("/folk", ms_filename))
         person.blurb = None
 
-    elif ms_filename.startswith('l/'): 
+    elif ms_filename.startswith("l/"):
         # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
-        with open(ms_path,'r') as blurbfile:
+        with open(ms_path, "r") as blurbfile:
             blrb = blurbfile.read()
-        pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
+        pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
         if pblurb:
-            person.mug_shot = None           
-            fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1) 
+            person.mug_shot = None
+            fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
             fragment = fragment.replace('src="../i/', 'src="/folk/i/')
             fragment = fragment.replace("src='../i/", "src='/folk/i/")
-            fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
+            fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
             # replace src="../i/ with src="/folk/i
             person.blurb = fragment
         else:
             message = f"! Blurb parse error in {ms_filename}"
             print(message)
-            DataIssue.objects.create(parser='people', message=message, url="/folk/")
+            DataIssue.objects.create(parser="people", message=message, url="/folk/")
 
-    elif ms_filename == '':
+    elif ms_filename == "":
         pass
     else:
         message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
         print(message)
-        DataIssue.objects.create(parser='people', message=message, url="/folk/")
+        DataIssue.objects.create(parser="people", message=message, url="/folk/")
 
     person.save()
 
+
 def load_people_expos():
-    '''This is where the folk.csv file is parsed to read people's names. 
+    """This is where the folk.csv file is parsed to read people's names.
     Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
     and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
-    '''
-    DataIssue.objects.filter(parser='people').delete()
-    
-    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
-    personreader = csv.reader(persontab) # this is an iterator
+    """
+    DataIssue.objects.filter(parser="people").delete()
+
+    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))  # should really be EXPOFOLK I guess
+    personreader = csv.reader(persontab)  # this is an iterator
     headers = next(personreader)
     header = dict(list(zip(headers, list(range(len(headers))))))
-    
+
     # make expeditions
     print(" - Loading expeditions")
     years = headers[5:]
-    
+
     for year in years:
-        lookupAttribs = {'year':year}
-        nonLookupAttribs = {'name':f"CUCC expo {year}"}
-        
+        lookupAttribs = {"year": year}
+        nonLookupAttribs = {"name": f"CUCC expo {year}"}
+
         save_carefully(Expedition, lookupAttribs, nonLookupAttribs)
 
     # make persons
@@ -105,67 +106,86 @@ def load_people_expos():
         nickname = splitnick.group(2) or ""
 
         fullname = fullname.strip()
-        names = fullname.split(' ')
+        names = fullname.split(" ")
         firstname = names[0]
         if len(names) == 1:
             lastname = ""
 
-        if personline[header["VfHO member"]] =='':
+        if personline[header["VfHO member"]] == "":
             vfho = False
         else:
             vfho = True
 
-        lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
-        nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname}
+        lookupAttribs = {"first_name": firstname, "last_name": (lastname or "")}
+        nonLookupAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nickname}
         person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
 
         parse_blurb(personline=personline, header=header, person=person)
-    
+
         # make person expedition from table
         for year, attended in list(zip(headers, personline))[5:]:
             expedition = Expedition.objects.get(year=year)
             if attended == "1" or attended == "-1":
-                lookupAttribs = {'person':person, 'expedition':expedition}
-                nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
+                lookupAttribs = {"person": person, "expedition": expedition}
+                nonLookupAttribs = {"nickname": nickname, "is_guest": (personline[header["Guest"]] == "1")}
                 save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
     print("", flush=True)
 
-def who_is_this(year,possibleid):
+
+def who_is_this(year, possibleid):
     expo = Expedition.objects.filter(year=year)
-    personexpedition =  GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
+    personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
     if personexpedition:
         return personexpedition.person
     else:
         return None
-    
+
+
 global foreign_friends
-foreign_friends = ["P. Jeutter", "K. Jäger", "S. Steinberger", "R. Seebacher", 
-        "Dominik Jauch", "Fritz Mammel", "Marcus Scheuerman", 
-        "Uli Schütz", "Wieland Scheuerle", "Arndt Karger",
-        "Kai Schwekend", "Regina Kaiser", "Thilo Müller","Wieland Scheuerle",
-        "Florian Gruner", "Helmut Stopka-Ebeler", "Aiko", "Mark Morgan", "Arndt Karger"]
-    
+foreign_friends = [
+    "P. Jeutter",
+    "K. Jäger",
+    "S. Steinberger",
+    "R. Seebacher",
+    "Dominik Jauch",
+    "Fritz Mammel",
+    "Marcus Scheuerman",
+    "Uli Schütz",
+    "Wieland Scheuerle",
+    "Arndt Karger",
+    "Kai Schwekend",
+    "Regina Kaiser",
+    "Thilo Müller",
+    "Wieland Scheuerle",
+    "Florian Gruner",
+    "Helmut Stopka-Ebeler",
+    "Aiko",
+    "Mark Morgan",
+    "Arndt Karger",
+]
+
+
 def known_foreigner(id):
-    '''If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching
-    '''
-    global foreign_friends 
+    """If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching"""
+    global foreign_friends
 
     if id in foreign_friends:
         return True
     else:
         return False
 
-    
+
 # Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
 # This is convoluted, the whole personexpedition concept is unnecessary?
 
-Gpersonexpeditionnamelookup = { }
+Gpersonexpeditionnamelookup = {}
+
+
 def GetPersonExpeditionNameLookup(expedition):
     global Gpersonexpeditionnamelookup
-    
+
     def apply_variations(f, l):
-        '''Be generous in guessing possible matches. Any duplicates will be ruled as invalid.
-        '''
+        """Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
         f = f.lower()
         l = l.lower()
         variations = []
@@ -175,27 +195,27 @@ def GetPersonExpeditionNameLookup(expedition):
         variations.append(f + " " + l)
         variations.append(f + " " + l[0])
         variations.append(f + l[0])
-        variations.append(f + " " +l[0] + '.')
+        variations.append(f + " " + l[0] + ".")
         variations.append(f[0] + " " + l)
         variations.append(f[0] + ". " + l)
         variations.append(f[0] + l)
-        variations.append(f[0] + l[0]) # initials e.g. gb or bl
+        variations.append(f[0] + l[0])  # initials e.g. gb or bl
         return variations
-    
+
     res = Gpersonexpeditionnamelookup.get(expedition.name)
-    
+
     if res:
         return res
-    
-    res = { }
+
+    res = {}
     duplicates = set()
-    
-    #print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
+
+    # print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
     personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
     short = {}
     dellist = []
     for personexpedition in personexpeditions:
-        possnames = [ ]
+        possnames = []
         f = unidecode(unescape(personexpedition.person.first_name.lower()))
         l = unidecode(unescape(personexpedition.person.last_name.lower()))
         full = unidecode(unescape(personexpedition.person.fullname.lower()))
@@ -204,40 +224,40 @@ def GetPersonExpeditionNameLookup(expedition):
             possnames.append(full)
         if n not in possnames:
             possnames.append(n)
-        
+
         if l:
-            possnames += apply_variations(f,l)
+            possnames += apply_variations(f, l)
 
             if n:
                 possnames += apply_variations(n, l)
-                
+
             if f == "Robert".lower():
                 possnames += apply_variations("Bob", l)
             if f == "Rob".lower():
                 possnames += apply_variations("Robert", l)
-                
+
             if f == "Andrew".lower():
                 possnames += apply_variations("Andy", l)
             if f == "Andy".lower():
                 possnames += apply_variations("Andrew", l)
             if f == "Michael".lower():
                 possnames += apply_variations("Mike", l)
-                
+
             if f == "David".lower():
                 possnames += apply_variations("Dave", l)
             if f == "Dave".lower():
                 possnames += apply_variations("David", l)
-                
+
             if f == "Peter".lower():
                 possnames += apply_variations("Pete", l)
             if f == "Pete".lower():
                 possnames += apply_variations("Peter", l)
-                
+
             if f == "Olly".lower():
                 possnames += apply_variations("Oliver", l)
             if f == "Oliver".lower():
                 possnames += apply_variations("Olly", l)
-               
+
             if f == "Ollie".lower():
                 possnames += apply_variations("Oliver", l)
             if f == "Oliver".lower():
@@ -245,59 +265,57 @@ def GetPersonExpeditionNameLookup(expedition):
 
             if f == "Becka".lower():
                 possnames += apply_variations("Rebecca", l)
-         
-            if f'{f} {l}' == "Andy Waddington".lower():
+
+            if f"{f} {l}" == "Andy Waddington".lower():
                 possnames += apply_variations("aer", "waddington")
-            if f'{f} {l}' == "Phil Underwood".lower():
+            if f"{f} {l}" == "Phil Underwood".lower():
                 possnames += apply_variations("phil", "underpants")
-            if f'{f} {l}' == "Naomi Griffiths".lower():
+            if f"{f} {l}" == "Naomi Griffiths".lower():
                 possnames += apply_variations("naomi", "makins")
-            if f'{f} {l}' == "Tina White".lower():
+            if f"{f} {l}" == "Tina White".lower():
                 possnames += apply_variations("tina", "richardson")
-            if f'{f} {l}' == "Cat Hulse".lower():
+            if f"{f} {l}" == "Cat Hulse".lower():
                 possnames += apply_variations("catherine", "hulse")
                 possnames += apply_variations("cat", "henry")
-            if f'{f} {l}' == "Jess Stirrups".lower():
+            if f"{f} {l}" == "Jess Stirrups".lower():
                 possnames += apply_variations("jessica", "stirrups")
-            if f'{f} {l}' == "Nat Dalton".lower():
-                possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
-            if f'{f} {l}' == "Mike Richardson".lower():
+            if f"{f} {l}" == "Nat Dalton".lower():
+                possnames += apply_variations("nathanael", "dalton")  # correct. He has a weird spelling.
+            if f"{f} {l}" == "Mike Richardson".lower():
                 possnames.append("mta")
                 possnames.append("miketa")
                 possnames.append("mike the animal")
                 possnames.append("animal")
-            if f'{f} {l}' == "Eric Landgraf".lower():
+            if f"{f} {l}" == "Eric Landgraf".lower():
                 possnames.append("eric c.landgraf")
                 possnames.append("eric c. landgraf")
                 possnames.append("eric c landgraf")
-            if f'{f} {l}' == "Nadia Raeburn".lower():
+            if f"{f} {l}" == "Nadia Raeburn".lower():
                 possnames.append("nadia rc")
                 possnames.append("nadia raeburn-cherradi")
-             
+
         for i in [3, 4, 5, 6]:
-            lim = min(i, len(f)+1)  # short form, e.g. Dan for Daniel. 
+            lim = min(i, len(f) + 1)  # short form, e.g. Dan for Daniel.
             if f[:lim] not in short:
-                short[f[:lim]]= personexpedition 
+                short[f[:lim]] = personexpedition
             else:
                 dellist.append(f[:lim])
- 
-        possnames = set(possnames) # remove duplicates
+
+        possnames = set(possnames)  # remove duplicates
         for possname in possnames:
             if possname in res:
                 duplicates.add(possname)
             else:
                 res[possname] = personexpedition
-        
+
     for possname in duplicates:
         del res[possname]
-        
+
     for possname in dellist:
-        if possname in short: #always true ?
+        if possname in short:  # always true ?
             del short[possname]
     for shortname in short:
         res[shortname] = short[shortname]
-        
-    
+
     Gpersonexpeditionnamelookup[expedition.name] = res
     return res
-