8 files changed, 86 insertions, 57 deletions
diff --git a/core/views/logbooks.py b/core/views/logbooks.py
index 9508877..00ac4e4 100644
--- a/core/views/logbooks.py
+++ b/core/views/logbooks.py
@@ -12,8 +12,7 @@ from django.template.defaultfilters import slugify
 from django.utils import timezone
 from django.views.generic.list import ListView
 
-#import troggle.parsers.logbooks as logbookparsers
-from troggle.core.forms import getTripForm  # , get_name, PersonForm
+from troggle.core.forms import getTripForm  # , get_name
 from troggle.core.models.troggle import Expedition, Person, PersonExpedition
 from troggle.core.utils import TROG
 from troggle.core.models.caves import LogbookEntry, PersonTrip
@@ -30,29 +29,21 @@ and for persons: their individual pages and their perseonexpedition pages.
 It uses the global object TROG to hold some cached pages.
 '''
 
-def getNotablePersons():
-    notablepersons = []
-    for person in Person.objects.all():
-            if person.bisnotable():
-                notablepersons.append(person)
-    return notablepersons
-
-
 def personindex(request):
     persons = Person.objects.all()
-    # From what I can tell, "persons" seems to be the table rows, while "personss" is the table columns. - AC 16 Feb 09
-    personss = [ ]
+    # From what I can tell, "persons" seems to be the table rows, while "pcols" is the table columns. - AC 16 Feb 09
+    pcols = [ ]
     ncols = 4
     nc = int((len(persons) + ncols - 1) / ncols)
     for i in range(ncols):
-        personss.append(persons[i * nc: (i + 1) * nc])
+        pcols.append(persons[i * nc: (i + 1) * nc])
     
     notablepersons = []
     for person in Person.objects.all():
             if person.bisnotable():
                 notablepersons.append(person)
 
-    return render(request,'personindex.html', {'persons': persons, 'personss':personss, 'notablepersons':notablepersons})
+    return render(request,'personindex.html', {'persons': persons, 'pcols':pcols, 'notablepersons':notablepersons})
 
 
 def expedition(request, expeditionname):
@@ -65,7 +56,7 @@ def expedition(request, expeditionname):
     '''
     if "reload" in request.GET:
         this_expedition = Expedition.objects.get(year=int(expeditionname))
-        LoadLogbookForExpedition(this_expedition, 0)
+        LoadLogbookForExpedition(this_expedition, 0) # 0 means re-parse
 
     ts = TROG['pagecache']['expedition']
     if settings.CACHEDPAGES:
@@ -101,8 +92,8 @@ def expedition(request, expeditionname):
     return render(request,'expedition.html', ts[expeditionname] )
 
 
-def get_absolute_url(self):
-    return ('expedition', (expedition.year))
+# def get_absolute_url(self): # seems to have come seriously adrift. This should be in a class?!
+    # return ('expedition', (expedition.year))
 
 
 class Expeditions_tsvListView(ListView): 
@@ -136,7 +127,7 @@ def person(request, first_name='', last_name='', ):
     return render(request,'person.html', {'person': this_person, })
 
 
-def GetPersonChronology(personexpedition):
+def get_person_chronology(personexpedition):
     '''Horrible bug here whern there is more than one survex block per day, it duplicates the entry but gets it wrong
     Fortunately this is just the display on this page which is wroing, no bad calculations get into the database.
     '''
@@ -166,7 +157,7 @@ def personexpedition(request, first_name='',  last_name='', year=''):
     person = Person.objects.get(first_name = first_name, last_name = last_name)
     this_expedition = Expedition.objects.get(year=year)
     personexpedition = person.personexpedition_set.get(expedition=this_expedition)
-    personchronology = GetPersonChronology(personexpedition)
+    personchronology = get_person_chronology(personexpedition)
     return render(request,'personexpedition.html', {'personexpedition': personexpedition, 'personchronology':personchronology})
 
 
diff --git a/parsers/imports.py b/parsers/imports.py
index bfafbfe..01c1d7b 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -27,7 +27,7 @@ def import_people():
     print("-- Importing People (folk.csv) to ",end="")
     print(django.db.connections.databases['default']['NAME'])
     with transaction.atomic():
-        troggle.parsers.people.LoadPersonsExpos()
+        troggle.parsers.people.load_people_expos()
 
 def import_surveyscans():
     print("-- Importing Survey Scans")
diff --git a/parsers/people.py b/parsers/people.py
index 345210c..3f7c02a 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -1,11 +1,13 @@
 import csv, re, datetime, os, shutil
 from html.parser import HTMLParser
 from unidecode import unidecode
+from pathlib import Path
 
 from django.conf import settings
 
 from troggle.core.models.troggle import Expedition, Person, PersonExpedition
-from troggle.core.utils import save_carefully
+from troggle.core.models.troggle import DataIssue
+from troggle.core.utils import save_carefully, TROG
 
 '''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has 
 href links to pages in troggle which troggle does not think are right.
@@ -13,32 +15,59 @@ The standalone script needs to be renedred defucnt, and all the parsing needs to
 or they should use the same code by importing a module.
 '''
 
-def parseMugShotAndBlurb(personline, header, person):
+def parse_blurb(personline, header, person):
     """create mugshot Photo instance"""
-    mugShotFilename=personline[header["Mugshot"]]
-    mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
-    if mugShotPath[-3:]=='jpg': #if person just has an image, add it
-        #saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
+    ms_filename = personline[header["Mugshot"]]
+    ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
+    
+    if ms_filename:
+        if not ms_path.is_file():
+            message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
+            print(message)
+            DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
+            return
+    
+    if ms_filename.startswith('i/'):
+        #if person just has an image, add it. It has format 'i/adama2018.jpg'
+        person.mug_shot = str(Path("/folk", ms_filename))
+        person.blurb = None
+
+    elif ms_filename.startswith('l/'): 
+        # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
+        with open(ms_path,'r') as blurbfile:
+            blrb = blurbfile.read()
+        pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
+        if pblurb:
+            person.mug_shot = None           
+            fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1) 
+            fragment = fragment.replace('src="../i/', 'src="/folk/i/')
+            fragment = fragment.replace("src='../i/", "src='/folk/i/")
+            fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
+            # replace src="../i/ with src="/folk/i
+            person.blurb = fragment
+        else:
+            message = f"! Blurb parse error in {ms_filename}"
+            print(message)
+            DataIssue.objects.create(parser='people', message=message, url="/folk/")
+
+    elif ms_filename == '':
         pass
-    elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
-        personPageOld=open(mugShotPath,'r').read()
-        if not person.blurb:
-            pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL)
-            if pblurb:
-                #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb.
-                #Only finds the first image, not all of them
-                person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() 
-            else:
-                print("ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename)
-            #for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
-            #    mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
-            #    saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
+    else:
+        message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
+        print(message)
+        DataIssue.objects.create(parser='people', message=message, url="/folk/")
+
     person.save()
 
-def LoadPersonsExpos():
+def load_people_expos():
+    '''This is where the folk.csv file is parsed to read people's names. 
+    Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
+    and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
+    '''
+    DataIssue.objects.filter(parser='people').delete()
     
-    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))
-    personreader = csv.reader(persontab)
+    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
+    personreader = csv.reader(persontab) # this is an iterator
     headers = next(personreader)
     header = dict(list(zip(headers, list(range(len(headers))))))
     
@@ -86,7 +115,7 @@ def LoadPersonsExpos():
         nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname}
         person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
 
-        parseMugShotAndBlurb(personline=personline, header=header, person=person)
+        parse_blurb(personline=personline, header=header, person=person)
     
         # make person expedition from table
         for year, attended in list(zip(headers, personline))[5:]:
diff --git a/parsers/survex.py b/parsers/survex.py
index 23f27aa..5938615 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -576,6 +576,7 @@ class LoadingSurvex():
             return self.caveslist[cavepath.lower()]
         # TO DO - some of this is already done in generating self.caveslist so simplify this
         # esp. as it is in a loop.
+        # TO DO recognise cave if different name, e.g. gruenstein == 281
         path_match = self.rx_cave.search(cavepath)
         if path_match:
             sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -608,9 +609,15 @@ class LoadingSurvex():
         """Ignore surface, kataser and gps *include survex files
         """
         if headpath in self.ignorenoncave:
+            #message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
+            #print("\n"+message)
+            #print("\n"+message,file=sys.stderr)
             return
         for i in self.ignoreprefix:
             if headpath.startswith(i):
+                #message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
+                #print("\n"+message)
+                #print("\n"+message,file=sys.stderr)
                 return
         message = " ! {} is not a cave. (while creating '{}' sfile & sdirectory)".format(headpath, includelabel)
         print("\n"+message)
diff --git a/templates/person.html b/templates/person.html
index 64896a7..d9d8cb8 100644
--- a/templates/person.html
+++ b/templates/person.html
@@ -9,21 +9,14 @@
 {% endblock %}
 
 {% block content %}
-{% if person.blurb %}
-{{person.blurb|safe}}
-{% endif %}
 
-{% for pic in person.photo_set.all %}
-{% if pic.is_mugshot %}
+
+{% if person.mug_shot %}
     <div class="figure">
-      <p> <img src="{{ pic.thumbnail_image.url }}" class="thumbnail" />
-      <p> {{ pic.caption }}</p>
-      <p> <a href="{{ pic.get_admin_url }}">edit {{pic}}</a> </>
-      </p>
+      <p> <img src="{{ person.mug_shot }}" class="thumbnail" />
       </p>
     </div>
 {% endif %}
-{% endfor %}
 <br class="clearfloat" />
 
 <h3>{{person|wiki_to_html_short}} has been on expo in the following years:</h3>
@@ -38,5 +31,11 @@
 </ul>
 </p>
 
+{% if person.blurb %}
+{{person.blurb|safe}}
+{% else %}
+To add a blurb file for a person, create /folk/l/<id>.html and register it in /folk/folk.csv . 
+Documented in <a href="/handbook/computing/folkupdate.html">/handbook/computing/folkupdate.html</a>
+{% endif %}
 
 {% endblock %}
diff --git a/templates/personexpedition.html b/templates/personexpedition.html
index e04952e..48e94ac 100644
--- a/templates/personexpedition.html
+++ b/templates/personexpedition.html
@@ -64,7 +64,7 @@ it duplicates the entry but gets it wrong. The length from the first block is di
 - The template is in <var>troggle/templates/personexpedition.html</var>
 <br>
 - The code is in function <var>personexpedition()</var> which calls 
-<var>GetPersonChronology()</var> in <var>troggle/core/views/logbooks.py</var>
+<var>get_person_chronology()</var> in <var>troggle/core/views/logbooks.py</var>
 <br>
 - the connection between the two is made in the URL resolver in <var>troggle/urls.py</var>
 <p>To be fixed!
diff --git a/templates/personindex.html b/templates/personindex.html
index 10ac7f6..5338bd7 100644
--- a/templates/personindex.html
+++ b/templates/personindex.html
@@ -22,7 +22,7 @@
 <h2>All expoers</h2>
 <table class="searchable">
 <tr>
-{% for persons in personss %}
+{% for persons in pcols %}
 <td>
 
 <table>
diff --git a/templates/svxcavesingle.html b/templates/svxcavesingle.html
index cefe961..a53889f 100644
--- a/templates/svxcavesingle.html
+++ b/templates/svxcavesingle.html
@@ -5,7 +5,7 @@
 {% block content %}
 
 {% autoescape off %}
-<h1>Surveys for <a href="/{{cave.url}}">{{cave.official_name}}</a> - identifier:{{cave}}</h1>
+<h1>Surveys for <a href="/{{cave.url}}">{{cave.official_name}}</a> - id:{{cave}}</h1>
 {% endautoescape %}
 <!-- the only thing passed into this template is the object identifier for a cave.
 All the processing to extract the survex subdriectories and survex files is done in this template -->
@@ -17,7 +17,10 @@ All the processing to extract the survex subdriectories and survex files is done
  <a href="#T_{{survexdirectory.primarysurvexfile.path}}">{{survexdirectory.path}}</a> 
     {% empty %}
     <p>If you were expecting to see a list of survex files here and a summary table of who did what and when, perhaps
-    because you followed a link from <a href="/survexfile/caves/">the master caves' survex list</a> page which showed that such survex files clearly existed, and yet there is nothing here but a blank; then this will be because the survex (.svx) files have been stored on the server in the 
+    because you followed a link from <a href="/survexfile/caves/">the master caves' survex list</a> page which showed that such survex files clearly existed, and yet there is nothing here but a blank; then this will be because <br>
+    [a] - this cave has no survex survey files at all, or <br>
+    [b] - you have run a 'caves' parsing import after running a 'survex' import and the survex data has been blanked out, or <br>
+    [c] - the survex (.svx) files have been stored on the server in the 
     <a href="/handbook/computing/repos.html"><var>:loser:</var></a> repository 
     but whoever was 
     responsible has not yet created the appropriate entries in the XML file in the