9 files changed, 420 insertions, 562 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index efc8cd6..602b7af 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -17,19 +17,19 @@ def parseCaveQMs(cave,inputFile):
         try:
             steinBr=Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
         except Cave.DoesNotExist:
-            print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
+            print("Steinbruckenhoehle is not in the database. Please run parsers.cavetab first.")
             return
     elif cave=='hauch':
         try:
             hauchHl=Cave.objects.get(official_name="Hauchh&ouml;hle")
         except Cave.DoesNotExist:
-            print "Hauchhoele is not in the database. Please run parsers.cavetab first."
+            print("Hauchhoele is not in the database. Please run parsers.cavetab first.")
             return
     elif cave =='kh':
         try:
             kh=Cave.objects.get(official_name="Kaninchenh&ouml;hle")
         except Cave.DoesNotExist:
-            print "KH is not in the database. Please run parsers.cavetab first."
+            print("KH is not in the database. Please run parsers.cavetab first.")
         parse_KH_QMs(kh, inputFile=inputFile) 
         return
 
@@ -48,7 +48,7 @@ def parseCaveQMs(cave,inputFile):
             elif cave=='hauch':
                 placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl})            
             if hadToCreate:
-                print cave+" placeholder logbook entry for " + str(year) + " added to database"
+                print(cave + " placeholder logbook entry for " + str(year) + " added to database")
             QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
             newQM = QM()
             newQM.found_by=placeholder
@@ -71,19 +71,18 @@ def parseCaveQMs(cave,inputFile):
                 if preexistingQM.new_since_parsing==False:  #if the pre-existing QM has not been modified, overwrite it
                     preexistingQM.delete()
                     newQM.save()
-                    print "overwriting " + str(preexistingQM) +"\r",
-                
+                    print("overwriting " + str(preexistingQM) +"\r")
                 else:  # otherwise, print that it was ignored
-                    print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
+                    print("preserving " + str(preexistingQM) + ", which was edited in admin \r")
                     
             except QM.DoesNotExist:         #if there is no pre-existing QM, save the new one
                 newQM.save() 
-                print "QM "+str(newQM) + ' added to database\r',
+                print("QM "+str(newQM) + ' added to database\r')
                 
         except KeyError: #check on this one
             continue
         except IndexError:
-            print "Index error in " + str(line)
+            print("Index error in " + str(line))
             continue
 
 def parse_KH_QMs(kh, inputFile):
@@ -104,7 +103,7 @@ def parse_KH_QMs(kh, inputFile):
                 }
             nonLookupArgs={
                 'grade':res['grade'],
-                'nearest_station':res['nearest_station'],
+                'nearest_station_name':res['nearest_station'],
                 'location_description':res['description']
                 }
  
@@ -115,3 +114,4 @@ parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv")
 parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv")
 parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm")
 #parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
+
diff --git a/parsers/caves.py b/parsers/caves.py
index ba1c358..606007f 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -6,16 +6,18 @@ import re
 
 
 def readcaves():
-  newArea = models.Area(short_name = "1623", parent = None)
-  newArea.save()
-  newArea = models.Area(short_name = "1626", parent = None)
-  newArea.save()
-  print("Reading Entrances")
+
+  # Clear the cave data issues as we are reloading
+  models.DataIssue.objects.filter(parser='caves').delete()
+
+  area_1623 = models.Area.objects.update_or_create(short_name = "1623", parent = None)
+  area_1626 = models.Area.objects.update_or_create(short_name = "1626", parent = None)
+  print(" - Reading Entrances")
   #print "list of <Slug> <Filename>"
   for filename in os.walk(settings.ENTRANCEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
     if filename.endswith('.html'):
       readentrance(filename)
-  print ("Reading Caves")
+  print (" - Reading Caves")
   for filename in os.walk(settings.CAVEDESCRIPTIONS).next()[2]: #Should be a better way of getting a list of files
     if filename.endswith('.html'):
       readcave(filename)
@@ -51,7 +53,7 @@ def readentrance(filename):
         bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
         url = getXML(entrancecontents, "url", maxItems = 1, context = context)
         if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and  len(entrance_description) == 1 and  len(explorers) == 1 and  len(map_description) == 1 and  len(location_description) == 1 and  len(approach) == 1 and  len(underground_description) == 1 and  len(marking) == 1 and  len(marking_comment) == 1 and  len(findability) == 1 and  len(findability_description) == 1 and  len(alt) == 1 and  len(northing) == 1 and  len(easting) == 1 and  len(tag_station) == 1 and  len(exact_station) == 1 and  len(other_station) == 1 and  len(other_description) == 1 and  len(bearings) == 1 and  len(url) == 1:
-            e = models.Entrance(name = name[0],
+            e, state = models.Entrance.objects.update_or_create(name = name[0],
                          non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                          entrance_description = entrance_description[0],
                          explorers = explorers[0],
@@ -75,14 +77,12 @@ def readentrance(filename):
                          url = url[0],
                          filename = filename,
                          cached_primary_slug = slugs[0])
-            e.save()
             primary = True
             for slug in slugs:
                 #print slug, filename
-                cs = models.EntranceSlug(entrance = e,
+                cs = models.EntranceSlug.objects.update_or_create(entrance = e,
                                          slug = slug,
                                          primary = primary)
-                cs.save()
                 primary = False
 
 def readcave(filename):
@@ -117,7 +117,7 @@ def readcave(filename):
         url = getXML(cavecontents, "url", maxItems = 1, context = context)
         entrances = getXML(cavecontents, "entrance", context = context)
         if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1:
-            c = models.Cave(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
+            c, state = models.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
                      official_name = official_name[0],
                      kataster_code = kataster_code[0],
                      kataster_number = kataster_number[0],
@@ -137,7 +137,6 @@ def readcave(filename):
                      description_file = description_file[0],
                      url = url[0],
                      filename = filename)
-            c.save()
             for area_slug in areas:
                 area = models.Area.objects.filter(short_name = area_slug)
                 if area:
@@ -149,12 +148,13 @@ def readcave(filename):
             primary = True
             for slug in slugs:
                 try:
-                    cs = models.CaveSlug(cave = c,
+                    cs = models.CaveSlug.objects.update_or_create(cave = c,
                               slug = slug,
                               primary = primary)
-                    cs.save()
                 except:
-                    print("Can't find text (slug): %s, skipping %s" % (slug, context))
+                    message = "Can't find text (slug): %s, skipping %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                     
                 primary = False
             for entrance in entrances:
@@ -162,20 +162,26 @@ def readcave(filename):
                 letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
                 try:
                     entrance = models.Entrance.objects.get(entranceslug__slug = slug)
-                    ce = models.CaveAndEntrance(cave = c, entrance_letter = letter, entrance = entrance)
-                    ce.save()
+                    ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
                 except:
-                    print ("Entrance text (slug) %s missing %s" % (slug, context))
+                    message = "Entrance text (slug) %s missing %s" % (slug, context)
+                    models.DataIssue.objects.create(parser='caves', message=message)
+                    print(message)
                 
 
 def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
     items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
     if len(items) < minItems and printwarnings:
-        print("%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
+        message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
                                                                            "itemname": itemname,
-                                                                           "min": minItems} + context)
+                                                                           "min": minItems} + context
+        models.DataIssue.objects.create(parser='caves', message=message)
+        print(message)
+        
     if maxItems is not None and len(items) > maxItems and printwarnings:
-        print("%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
+        message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
                                                                                "itemname": itemname,
-                                                                               "max": maxItems} + context)
+                                                                               "max": maxItems} + context
+        models.DataIssue.objects.create(parser='caves', message=message)
+        print(message)
     return items
diff --git a/parsers/cavesM.py b/parsers/cavesM.py
deleted file mode 100644
index f9900d6..0000000
--- a/parsers/cavesM.py
+++ /dev/null
@@ -1,213 +0,0 @@
-
-import troggle.core.models as models #import models for various objects
-from django.conf import settings
-import xml.etree.ElementTree as ET #this is used to parse XML's
-import subprocess
-import re
-
-#
-#    This parser has to find several things:
-#    There are files of .html format in expoweb area - they contain some of the important information
-#    There is a similar number of .svx files in loser are - they contain all the measurements
-#
-#    Previous version was incredibly slow due to various shitty ideas about finding things 
-#    and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
-#    and handles more sophisticated bits only
-#
-
-def load():
-    print('Hi! I\'m caves parser. Ready to work')
-    
-    print('Loading caves of 1623 area')
-    loadarea('1623')
-
-    print('Loading caves of 1626 area')
-    loadarea('1626')
-
-
-def loadarea(areacode):
-
-    if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'):
-        print('Computing master .3d file')
-        bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
-    else:
-        print('Loading from existing master .3d file')
-
-    master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines()
-    master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file  
-    master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file      
-
-    print('Searching all cave dirs files')
-    basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
-
-    cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
-    print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
-    ndirs = len(cavedirs) #remember number of dirs for nice debug output
-
-    for cavedir in cavedirs:
-        if cavedir==basedir:
-            continue #skip the basedir - a non-proper subdirectory
-        cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
-        
-        test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
-        if not file_exists(cavedir+'/'+cavename+'.svx'):
-            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
-            print('Cave missing'+cavename+' :(')
-            msg.save()
-            continue
-        fullname=cavedir+'/'+cavename+'.svx'        
-        print('Found cave:'+cavename)
-        cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing
-        if 'cavern: error:' in cavernout:
-            msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
-            print('Fucked svx'+cavename+' :(')
-            msg.save()
-            continue
-        
-        cavernout = cavernout.splitlines()
-        depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
-        length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
-        cavefile = open(fullname,'r')
-        cavefilecontents = cavefile.read().splitlines()
-        surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower()          
-        try:            
-            title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1]              
-        except:
-            syrveyname = "Untitled"
-
-        relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] 
-        entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x]
-        surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x]
-        location_nodes = []
-        print('rel_nodes'+str(len(relevant_nodes)))
-        if len(entrance_nodes) > 0:
-            location_nodes = entrance_nodes
-        elif len(surface_nodes) > 0:
-            location_nodes = surface_nodes
-        elif len(relevant_nodes) > 0:
-            location_nodes = relevant_nodes
-
-        try:
-            location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop()
-        except:
-            print(location_nodes)
-            location = 'Not found'
-        
-        relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
-        try:
-            lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop()
-        except:
-            lastleg = ['LINE 1900.01.01']
-        try:        
-            lastdate = lastleg.split().pop()
-            if 'STYLE' in lastdate:
-                lastdate = lastleg.split().pop().pop()
-        except:
-            lastdate = '1900.01.01'
-        
-        entrance = ' '.join(location.split()[1:3])
-        print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print
-                
-        
-        newcave =  models.CaveM(
-            survex_file = fullname,
-            total_length = length,
-            name=areacode+'.'+surveyname,
-            total_depth = depth,
-            date = lastdate,
-            entrance = entrance)
-        newcave.save()
-    #end of reading survex masterfiles
-    
-    print ("Reading cave descriptions")
-    cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
-    for fn in cavefiles:
-        f = open(fn, "r")
-        print(fn)
-        contents = f.read()    
-        
-        slug = re.sub(r"\s+", "", extractXML(contents,'caveslug'))
-        desc = extractXML(contents,'underground_description')
-        name = slug[5:] #get survex compatible name
-        area = slug[0:4]
-        
-        print([area,name])
-        
-        if desc==None or name==None:
-            msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
-            print('Fucked description '+fn+' :(')
-            msg.save()
-            continue
-
-            print(area+'/'+name+'/'+name+'.svx')
-        
-        updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx')
-        if len(updatecave)>1:
-            print('Non unique solution - skipping. Name:'+name)
-        elif len(updatecave)==0:
-            print('Cave with no survex data:'+name)
-            continue
-        else: #exaclty one match
-            print('Adding desc:'+name)
-            updatecave = updatecave[0]
-            updatecave.description = '/cave/descriptionM/'+slug #area-name
-            updatecave.title=name
-            updatecave.save()
-
-            slugS = slug
-            explorersS = extractXML(contents,'explorers')
-            underground_descriptionS = extractXML(contents,'underground_description')
-            equipmentS = extractXML(contents,'equipment')
-            referencesS = extractXML(contents,'references')
-            surveyS = extractXML(contents,'survey')
-            kataster_statusS = extractXML(contents,'kataster_status')
-            underground_centre_lineS = extractXML(contents,'underground_centre_line')
-            survex_fileS = extractXML(contents,'survex_file')
-            notesS = extractXML(contents,'notes')
-
-
-            newcavedesc =  models.Cave_descriptionM(
-            slug = slugS,            
-            explorers = explorersS,
-            underground_description = underground_descriptionS, 
-            equipment = equipmentS, 
-            references = referencesS, 
-            survey = surveyS, 
-            kataster_status = kataster_statusS, 
-            underground_centre_line = underground_centre_lineS, 
-            survex_file = survex_fileS, 
-            notes = notesS)
-            newcavedesc.save()
-            
-
-            
-            
-    #end of reading cave descriptions
-    
-def file_exists(filename):
-    test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
-    if 'MISSING' in test: #send error message to the database
-        return False
-    return True       
-
-def extractXML(contents,tag):
-    #find correct lines
-    lines = contents.splitlines()
-    beg = [x for x in lines if ('<'+tag+'>' in x)]
-    end = [x for x in lines if ('</'+tag+'>' in x)]
-    if (not beg) or (not end):
-        return None       
-    begi = lines.index(beg[0])
-    endi = lines.index(end[0])
-    if endi!=begi:
-        segment = '\n'.join(lines[begi:endi+1])
-    else:
-        segment = lines[begi:endi+1][0]
-
-    hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0]
-    return hit
-
-def bash(cmd): #calls command in bash shell, returns output
-    process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
-    output, error = process.communicate()
-    return output
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index cb40f58..9dfa31b 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -7,6 +7,8 @@ from parsers.people import GetPersonExpeditionNameLookup
 from parsers.cavetab import GetCaveLookup
 
 from django.template.defaultfilters import slugify
+from django.utils.timezone import get_current_timezone
+from django.utils.timezone import make_aware
 
 import csv
 import re
@@ -23,19 +25,23 @@ from utils import save_carefully
 #
 # the logbook loading section
 #
-def GetTripPersons(trippeople, expedition, logtime_underground):    
+def GetTripPersons(trippeople, expedition, logtime_underground):
     res = [ ]
     author = None
-    for tripperson in re.split(",|\+|&amp;|&(?!\w+;)| and ", trippeople):
+    round_bracket_regex = re.compile(r"[\(\[].*?[\)\]]")
+    for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
         tripperson = tripperson.strip()
-        mul = re.match("<u>(.*?)</u>$(?i)", tripperson)
+        mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
         if mul:
             tripperson = mul.group(1).strip()
         if tripperson and tripperson[0] != '*':
             #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
+            tripperson = re.sub(round_bracket_regex, "", tripperson).strip()
             personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
             if not personyear:
-                print "NoMatchFor: '%s'" % tripperson    
+                print("   - No name match for: '%s'" % tripperson)
+                message = "No name match for: '%s' in year '%s'" % (tripperson, expedition.year)
+                models.DataIssue.objects.create(parser='logbooks', message=message)
             res.append((personyear, logtime_underground))
             if mul:
                 author = personyear
@@ -45,7 +51,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
         author = res[-1][0]
     return res, author
 
-def GetTripCave(place):                     #need to be fuzzier about matching here. Already a very slow function...
+def GetTripCave(place):  #need to be fuzzier about matching here. Already a very slow function...
 #    print "Getting cave for " , place
     try:
         katastNumRes=[]
@@ -65,32 +71,34 @@ def GetTripCave(place):                     #need to be fuzzier about matching h
         return tripCaveRes
 
     elif len(tripCaveRes)>1:
-        print "Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes)
+        print("Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes))
         correctIndex=input("type list index of correct cave")
         return tripCaveRes[correctIndex]
     else:
-        print "No cave found for place " , place
+        print("No cave found for place " , place)
         return
 
 
 noncaveplaces = [ "Journey", "Loser Plateau" ]
-def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
+def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
     """ saves a logbook entry and related persontrips """
     trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
     if not author:
-        print "skipping logentry", title
+        print("   - Skipping logentry: " + title + " - no author for entry")
+        message = "Skipping logentry: %s - no author for entry in year '%s'" % (title, expedition.year)
+        models.DataIssue.objects.create(parser='logbooks', message=message)
         return
-    
-#    tripCave = GetTripCave(place)
-    #
+
+    #tripCave = GetTripCave(place)
+
     lplace = place.lower()
     if lplace not in noncaveplaces:
         cave=GetCaveLookup().get(lplace)
 
     #Check for an existing copy of the current entry, and save
     expeditionday = expedition.get_expedition_day(date)
-    lookupAttribs={'date':date, 'title':title} 
-    nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
+    lookupAttribs={'date':date, 'title':title}
+    nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
     lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
     
     for tripperson, time_underground in trippersons:
@@ -102,8 +110,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
 
 def ParseDate(tripdate, year):
     """ Interprets dates in the expo logbooks and returns a correct datetime.date object  """
-    mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
-    mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
+    mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
+    mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
     if mdatestandard:
         assert mdatestandard.group(1) == year, (tripdate, year)
         year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
@@ -115,9 +123,9 @@ def ParseDate(tripdate, year):
         assert False, tripdate
     return datetime.date(year, month, day)
 
-# 2007, 2008, 2006
+# 2006, 2008 - 2010
 def Parselogwikitxt(year, expedition, txt):
-    trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
+    trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
     for triphead, triptext in trippara:
         tripheadp = triphead.split("|")
         #print "ttt", tripheadp
@@ -126,7 +134,7 @@ def Parselogwikitxt(year, expedition, txt):
         tripsplace = tripplace.split(" - ")
         tripcave = tripsplace[0].strip()
 
-        tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
+        tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
         if tul:
             #assert len(tul) <= 1, (triphead, triptext)
             #assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
@@ -140,12 +148,16 @@ def Parselogwikitxt(year, expedition, txt):
         #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
         EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
 
-# 2002, 2004, 2005
+# 2002, 2004, 2005, 2007, 2011 - 2018
 def Parseloghtmltxt(year, expedition, txt):
-    tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
+    #print(" - Starting log html parser")
+    tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
+    logbook_entry_count = 0
     for trippara in tripparas:
+        #print(" - HR detected - maybe a trip?")
+        logbook_entry_count += 1
         
-        s = re.match('''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)?  # second date
+        s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)?  # second date
                             \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
                             \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
                             \s*<div\s+class="trippeople">\s*(.*?)</div>
@@ -155,38 +167,41 @@ def Parseloghtmltxt(year, expedition, txt):
                             \s*$
                      ''', trippara)
         if not s:
-            if not re.search("Rigging Guide", trippara):
-                print "can't parse: ", trippara  # this is 2007 which needs editing
+            if not re.search(r"Rigging Guide", trippara):
+                print("can't parse: ", trippara)  # this is 2007 which needs editing
             #assert s, trippara
             continue
-
         tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
         ldate = ParseDate(tripdate.strip(), year)
         #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
-        trippeople = re.sub("Ol(?!l)", "Olly", trippeople)        
-        trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)        
+        #trippeople = re.sub(r"Ol(?!l)", "Olly", trippeople)
+        #trippeople = re.sub(r"Wook(?!e)", "Wookey", trippeople)
         triptitles = triptitle.split(" - ")
         if len(triptitles) >= 2:
             tripcave = triptitles[0]
         else:
             tripcave = "UNKNOWN"
-        #print "\n", tripcave, "---   ppp", trippeople, len(triptext)
-        ltriptext = re.sub("</p>", "", triptext)
-        ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
-
-# main parser for pre-2001.  simpler because the data has been hacked so much to fit it
+        #print("\n", tripcave, "---   ppp", trippeople, len(triptext))
+        ltriptext = re.sub(r"</p>", "", triptext)
+        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
+        ltriptext = re.sub(r"<p>", "</br></br>", ltriptext).strip()
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
+                          trippeople=trippeople, expedition=expedition, logtime_underground=0,
+                          entry_type="html")
+    if logbook_entry_count == 0:
+        print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
+
+
+# main parser for 1991 - 2001.  simpler because the data has been hacked so much to fit it
 def Parseloghtml01(year, expedition, txt):
-    tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
+    tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
     for trippara in tripparas:
         s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
         assert s, trippara[:300]
         tripheader, triptext = s.group(1), s.group(2)
-        mtripid = re.search('<a id="(.*?)"', tripheader)
+        mtripid = re.search(r'<a id="(.*?)"', tripheader)
         tripid = mtripid and mtripid.group(1) or ""
-        tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
+        tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
 
         #print "   ", [tripheader]
         #continue
@@ -194,7 +209,7 @@ def Parseloghtml01(year, expedition, txt):
         tripdate, triptitle, trippeople = tripheader.split("|")
         ldate = ParseDate(tripdate.strip(), year)
     
-        mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
+        mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
         if mtu:
             tu = mtu.group(1)
             triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
@@ -206,38 +221,40 @@ def Parseloghtml01(year, expedition, txt):
 
         ltriptext = triptext
         
-        mtail = re.search('(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&amp;|</?p>|\((?:same day|\d+)\))*$', ltriptext)
+        mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&amp;|</?p>|\((?:same day|\d+)\))*$', ltriptext)
         if mtail:
             #print mtail.group(0)
             ltriptext = ltriptext[:mtail.start(0)]
-        ltriptext = re.sub("</p>", "", ltriptext)
-        ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub("<p>|<br>", "\n\n", ltriptext).strip()
+        ltriptext = re.sub(r"</p>", "", ltriptext)
+        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
+        ltriptext = re.sub(r"<p>|<br>", "\n\n", ltriptext).strip()
         #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
-        ltriptext = re.sub("</?u>", "_", ltriptext)
-        ltriptext = re.sub("</?i>", "''", ltriptext)
-        ltriptext = re.sub("</?b>", "'''", ltriptext)
+        ltriptext = re.sub(r"</?u>", "_", ltriptext)
+        ltriptext = re.sub(r"</?i>", "''", ltriptext)
+        ltriptext = re.sub(r"</?b>", "'''", ltriptext)
         
 
         #print ldate, trippeople.strip()
             # could includ the tripid (url link for cross referencing)
-        EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
+        EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
+                          trippeople=trippeople, expedition=expedition, logtime_underground=0,
+                          entry_type="html")
 
+# parser for 2003
 def Parseloghtml03(year, expedition, txt):
-    tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
+    tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
     for trippara in tripparas:
         s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
         assert s, trippara
         tripheader, triptext = s.group(1), s.group(2)
-        tripheader = re.sub("&nbsp;", " ", tripheader)
-        tripheader = re.sub("\s+", " ", tripheader).strip()
+        tripheader = re.sub(r"&nbsp;", " ", tripheader)
+        tripheader = re.sub(r"\s+", " ", tripheader).strip()
         sheader = tripheader.split(" -- ")
         tu = ""
         if re.match("T/U|Time underwater", sheader[-1]):
             tu = sheader.pop()
         if len(sheader) != 3:
-            print "header not three pieces", sheader
+            print("header not three pieces", sheader)
         tripdate, triptitle, trippeople = sheader
         ldate = ParseDate(tripdate.strip(), year)
         triptitles = triptitle.split(" , ")
@@ -246,37 +263,14 @@ def Parseloghtml03(year, expedition, txt):
         else:
             tripcave = "UNKNOWN"
         #print tripcave, "---   ppp", triptitle, trippeople, len(triptext)
-        ltriptext = re.sub("</p>", "", triptext)
-        ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
-        ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
-        ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
-        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
-
-yearlinks = [ 
-#                ("2013", "2013/logbook.html", Parseloghtmltxt), 
-                ("2012", "2012/logbook.html", Parseloghtmltxt), 
-                ("2011", "2011/logbook.html", Parseloghtmltxt), 
-                ("2010", "2010/logbook.html", Parselogwikitxt), 
-                ("2009", "2009/2009logbook.txt", Parselogwikitxt), 
-                ("2008", "2008/2008logbook.txt", Parselogwikitxt), 
-                ("2007", "2007/logbook.html", Parseloghtmltxt), 
-                ("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt), 
-                ("2005", "2005/logbook.html", Parseloghtmltxt), 
-                ("2004", "2004/logbook.html", Parseloghtmltxt), 
-                ("2003", "2003/logbook.html", Parseloghtml03), 
-                ("2002", "2002/logbook.html", Parseloghtmltxt), 
-                ("2001", "2001/log.htm", Parseloghtml01), 
-                ("2000", "2000/log.htm", Parseloghtml01), 
-                ("1999", "1999/log.htm", Parseloghtml01), 
-                ("1998", "1998/log.htm", Parseloghtml01), 
-                ("1997", "1997/log.htm", Parseloghtml01), 
-                ("1996", "1996/log.htm", Parseloghtml01),
-                ("1995", "1995/log.htm", Parseloghtml01), 
-                ("1994", "1994/log.htm", Parseloghtml01), 
-                ("1993", "1993/log.htm", Parseloghtml01), 		
-                ("1992", "1992/log.htm", Parseloghtml01), 		
-                ("1991", "1991/log.htm", Parseloghtml01), 		
-            ]
+        ltriptext = re.sub(r"</p>", "", triptext)
+        ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
+        ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
+        ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
+        EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
+                          text = ltriptext, trippeople=trippeople, expedition=expedition,
+                          logtime_underground=0, entry_type="html")
+
 
 def SetDatesFromLogbookEntries(expedition):
     """
@@ -295,54 +289,67 @@ def SetDatesFromLogbookEntries(expedition):
             persontrip.persontrip_next = None
             lprevpersontrip = persontrip
             persontrip.save()
-            
-        
-        
+
+
 def LoadLogbookForExpedition(expedition):
     """ Parses all logbook entries for one expedition """
         
-    expowebbase = os.path.join(settings.EXPOWEB, "years")  
-    year = str(expedition.year)
-    for lyear, lloc, parsefunc in yearlinks:
-        if lyear == year:
-            break
-    fin = open(os.path.join(expowebbase, lloc))
-    print "opennning", lloc
-    txt = fin.read().decode("latin1")
-    fin.close()
-    parsefunc(year, expedition, txt)
-    SetDatesFromLogbookEntries(expedition)
-    return "TOLOAD: " + year + "  " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + "  " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
+    expowebbase = os.path.join(settings.EXPOWEB, "years")
+    yearlinks = settings.LOGBOOK_PARSER_SETTINGS
+
+    logbook_parseable = False
+
+    if expedition.year in yearlinks:
+        year_settings = yearlinks[expedition.year]
+        file_in = open(os.path.join(expowebbase, year_settings[0]))
+        txt = file_in.read().decode("latin1")
+        file_in.close()
+        parsefunc = year_settings[1]
+        logbook_parseable = True
+        print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
+    else:
+        try:
+            file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
+            txt = file_in.read().decode("latin1")
+            file_in.close()
+            logbook_parseable = True
+            print("No set parser found using default")
+            parsefunc = settings.DEFAULT_LOGBOOK_PARSER
+        except (IOError):
+            logbook_parseable = False
+            print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
+
+    if logbook_parseable:
+        parser = globals()[parsefunc]
+        parser(expedition.year, expedition, txt)
+        SetDatesFromLogbookEntries(expedition)
+
+    #return "TOLOAD: " + year + "  " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + "  " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
 
 
 def LoadLogbooks():
-    """ This is the master function for parsing all logbooks into the Troggle database. Requires yearlinks, which is a list of tuples for each expedition with expedition year, logbook path, and parsing function. """
-    
-    #Deletion has been moved to a seperate function to enable the non-destructive importing
-    #models.LogbookEntry.objects.all().delete()
-    expowebbase = os.path.join(settings.EXPOWEB, "years")  
-    #yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
-    #yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
-
-    for year, lloc, parsefunc in yearlinks:
-        # This will not work until the corresponding year exists in the database. 
-        # In 2012 this needed noscript/folk.csv to be updated first.
-        expedition = models.Expedition.objects.filter(year = year)[0]
-        fin = open(os.path.join(expowebbase, lloc))
-        txt = fin.read().decode("latin1")
-        fin.close()
-        parsefunc(year, expedition, txt)
-        SetDatesFromLogbookEntries(expedition)
+    """ This is the master function for parsing all logbooks into the Troggle database. """
+
+    # Clear the logbook data issues as we are reloading
+    models.DataIssue.objects.filter(parser='logbooks').delete()
+    # Fetch all expos
+    expos = models.Expedition.objects.all()
+    for expo in expos:
+        print("\nLoading Logbook for: " + expo.year)
+        
+        # Load logbook for expo
+        LoadLogbookForExpedition(expo)
+
 
-dateRegex = re.compile('<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
-expeditionYearRegex = re.compile('<span\s+class="expeditionyear">(.*?)</span>', re.S)
-titleRegex = re.compile('<H1>(.*?)</H1>', re.S)
-reportRegex = re.compile('<div\s+class="report">(.*)</div>\s*</body>', re.S)
-personRegex = re.compile('<div\s+class="person">(.*?)</div>', re.S)
-nameAuthorRegex = re.compile('<span\s+class="name(,author|)">(.*?)</span>', re.S)
-TURegex = re.compile('<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
-locationRegex = re.compile('<span\s+class="location">(.*?)</span>', re.S)
-caveRegex = re.compile('<span\s+class="cave">(.*?)</span>', re.S)
+dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
+expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
+titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
+reportRegex = re.compile(r'<div\s+class="report">(.*)</div>\s*</body>', re.S)
+personRegex = re.compile(r'<div\s+class="person">(.*?)</div>', re.S)
+nameAuthorRegex = re.compile(r'<span\s+class="name(,author|)">(.*?)</span>', re.S)
+TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
+locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
+caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
 
 def parseAutoLogBookEntry(filename):
     errors = []
@@ -435,4 +442,4 @@ def parseAutoLogBookEntry(filename):
                           time_underground = TU, 
                           logbook_entry = logbookEntry, 
                           is_logbook_entry_author = author).save()
-    print logbookEntry
+    print(logbookEntry)
diff --git a/parsers/people.py b/parsers/people.py
index 4dba3a8..34a5ff3 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -4,6 +4,8 @@ from django.conf import settings
 import troggle.core.models as models
 import csv, re, datetime, os, shutil
 from utils import save_carefully
+from HTMLParser import HTMLParser
+from unidecode import unidecode
 
 def saveMugShot(mugShotPath, mugShotFilename, person):
     if mugShotFilename.startswith(r'i/'): #if filename in cell has the directory attached (I think they all do), remove it
@@ -44,13 +46,13 @@ def parseMugShotAndBlurb(personline, header, person):
 
 def LoadPersonsExpos():
     
-    persontab = open(os.path.join(settings.EXPOWEB, "noinfo", "folk.csv"))
+    persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv"))
     personreader = csv.reader(persontab)
     headers = personreader.next()
     header = dict(zip(headers, range(len(headers))))
     
     # make expeditions
-    print "Loading expeditions"
+    print("Loading expeditions")
     years = headers[5:]
     
     for year in years:
@@ -59,20 +61,35 @@ def LoadPersonsExpos():
         
         save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
 
-    
     # make persons
-    print "Loading personexpeditions"
+    print("Loading personexpeditions")
 
     for personline in personreader:
         name = personline[header["Name"]]
-        name = re.sub("<.*?>", "", name)
-        mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
-        nickname = mname.group(3) or ""
-	
-        lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
-        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
+        name = re.sub(r"<.*?>", "", name)
+
+        firstname = ""
+        nickname = ""
+
+        rawlastname = personline[header["Lastname"]].strip()
+        matchlastname = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname)
+        lastname = matchlastname.group(1).strip()
+
+        splitnick = re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", name)
+        fullname = splitnick.group(1)
+
+        nickname = splitnick.group(2) or ""
+
+        fullname = fullname.strip()
+        names = fullname.split(' ')
+        firstname = names[0]
+        if len(names) == 1:
+            lastname = ""
+
+        lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
+        nonLookupAttribs={'is_vfho':personline[header["VfHO member"]], 'fullname':fullname}
         person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
-	
+
         parseMugShotAndBlurb(personline=personline, header=header, person=person)
     
         # make person expedition from table
@@ -83,7 +100,26 @@ def LoadPersonsExpos():
                 nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
                 save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs)
 
-                
+
+    # this fills in those people for whom 2008 was their first expo
+    #print "Loading personexpeditions 2008"
+    #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
+    #expomissing = set(expoers2008)
+    #for name in expomissing:
+        # firstname, lastname = name.split()
+        # is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
+        # print "2008:", name
+        # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname))
+        # if not persons:
+            # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "")
+            # #person.Sethref()
+            # person.save()
+        # else:
+            # person = persons[0]
+        # expedition = models.Expedition.objects.get(year="2008")
+        # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
+        # personexpedition.save()
+
 # used in other referencing parser functions
 # expedition name lookup cached for speed (it's a very big list)
 Gpersonexpeditionnamelookup = { }
@@ -96,20 +132,33 @@ def GetPersonExpeditionNameLookup(expedition):
     res = { }
     duplicates = set()
     
-    print "Calculating GetPersonExpeditionNameLookup for", expedition.year
+    print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
     personexpeditions = models.PersonExpedition.objects.filter(expedition=expedition)
+    htmlparser = HTMLParser()
     for personexpedition in personexpeditions:
         possnames = [ ]
-        f = personexpedition.person.first_name.lower()
-        l = personexpedition.person.last_name.lower()
+        f = unidecode(htmlparser.unescape(personexpedition.person.first_name.lower()))
+        l = unidecode(htmlparser.unescape(personexpedition.person.last_name.lower()))
+        full = unidecode(htmlparser.unescape(personexpedition.person.fullname.lower()))
         if l:
             possnames.append(f + " " + l)
             possnames.append(f + " " + l[0])
             possnames.append(f + l[0])
             possnames.append(f[0] + " " + l)
         possnames.append(f)
-        if personexpedition.nickname:
+        if full not in possnames:
+            possnames.append(full)
+        if personexpedition.nickname not in possnames:
             possnames.append(personexpedition.nickname.lower())
+            if l:
+                # This allows for nickname to be used for short name eg Phil
+                # adding Phil Sargent to the list
+                if str(personexpedition.nickname.lower() + " " + l) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l)
+                if str(personexpedition.nickname.lower() + " " + l[0]) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + " " + l[0])
+                if str(personexpedition.nickname.lower() + l[0]) not in possnames:
+                    possnames.append(personexpedition.nickname.lower() + l[0])
         
         for possname in possnames:
             if possname in res:
diff --git a/parsers/peopleM.py b/parsers/peopleM.py
deleted file mode 100644
index 62c7ce0..0000000
--- a/parsers/peopleM.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from django.conf import settings
-import troggle.core.models as models
-
-def load():
-    folkfile = open(settings.EXPOWEB+"noinfo/folk.csv")
-    personlines = folkfile.read().splitlines()
-    persontable = [x.split(',') for x in personlines]
-    years = [persontable[0][i] for i in range(5,len(persontable[0]))]
-    for year in years:
-        newexpedition = models.ExpeditionM( date = year )
-        newexpedition.save()
-    for row in persontable[1:]: #skip header
-        attendedid = [i for i, x in enumerate(row) if '1' in x]
-        attendedyears = [persontable[0][i] for i in attendedid if i >= 5]
-        name = row[0]
-        print(name+' has attended: '+', '.join(attendedyears))
-        newperson = models.PersonM(
-        name = name)
-        newperson.save()
-        for year in attendedyears:
-            target = models.ExpeditionM.objects.get(date=year)
-            newperson.expos_attended.add( target )
-    print('Person -> Expo table created!')
-
-        
-        
-    
diff --git a/parsers/survex.py b/parsers/survex.py
index 536314f..14bd035 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -5,20 +5,26 @@ import troggle.settings as settings
 from subprocess import call, Popen, PIPE
 
 from troggle.parsers.people import GetPersonExpeditionNameLookup
+from django.utils.timezone import get_current_timezone
+from django.utils.timezone import make_aware
+
 import re
 import os
+from datetime import datetime
 
+line_leg_regex = re.compile(r"[\d\-+.]+$")
 
-def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
+def LoadSurvexLineLeg(survexblock, stardata, sline, comment, cave):
+    # The try catches here need replacing as they are relativly expensive
     ls = sline.lower().split()
     ssfrom = survexblock.MakeSurvexStation(ls[stardata["from"]])
     ssto = survexblock.MakeSurvexStation(ls[stardata["to"]])
-    
+
     survexleg = models.SurvexLeg(block=survexblock, stationfrom=ssfrom, stationto=ssto)
     if stardata["type"] == "normal":
         try:
             survexleg.tape = float(ls[stardata["tape"]])
-        except ValueError: 
+        except ValueError:
             print("Tape misread in", survexblock.survexfile.path)
             print("Stardata:", stardata)
             print("Line:", ls)
@@ -53,14 +59,17 @@ def LoadSurvexLineLeg(survexblock, stardata, sline, comment):
                 survexleg.compass = 1000
             survexleg.clino = -90.0
         else:
-            assert re.match(r"[\d\-+.]+$", lcompass), ls
-            assert re.match(r"[\d\-+.]+$", lclino) and lclino != "-", ls
+            assert line_leg_regex.match(lcompass), ls
+            assert line_leg_regex.match(lclino) and lclino != "-", ls
             survexleg.compass = float(lcompass)
             survexleg.clino = float(lclino)
-        
+
+        if cave:
+            survexleg.cave = cave
+
         # only save proper legs
         survexleg.save()
-    
+
     itape = stardata.get("tape")
     if itape:
         try:
@@ -80,96 +89,212 @@ def LoadSurvexEquate(survexblock, sline):
 
 def LoadSurvexLinePassage(survexblock, stardata, sline, comment):
     pass
-    
 
 stardatadefault = {"type":"normal", "t":"leg", "from":0, "to":1, "tape":2, "compass":3, "clino":4}
 stardataparamconvert = {"length":"tape", "bearing":"compass", "gradient":"clino"}
 
+regex_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
+regex_ref     = re.compile(r'.*?ref.*?(\d+)\s*#\s*(\d+)')
+regex_star    = re.compile(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$')
+regex_team    = re.compile(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)")
+regex_team_member        = re.compile(r" and | / |, | & | \+ |^both$|^none$(?i)")
+regex_qm      = re.compile(r'^\s*QM(\d)\s+?([a-dA-DxX])\s+([\w\-]+)\.(\d+)\s+(([\w\-]+)\.(\d+)|\-)\s+(.+)$')
+
 def RecursiveLoad(survexblock, survexfile, fin, textlines):
     iblankbegins = 0
     text = [ ]
     stardata = stardatadefault
     teammembers = [ ]
- 
-# uncomment to print out all files during parsing 
-    print("Reading file:", survexblock.survexfile.path)
-    while True:
-        svxline = fin.readline().decode("latin1")
-        if not svxline:
-            return
-        textlines.append(svxline)
-        
+
+    # uncomment to print out all files during parsing 
+    print("  - Reading file: " + survexblock.survexfile.path)
+    stamp = datetime.now()
+    lineno = 0
+
+    # Try to find the cave in the DB if not use the string as before
+    path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
+    if path_match:
+        pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+        # print('Match')
+        # print(pos_cave)
+        cave = models.getCaveByReference(pos_cave)
+        if cave:
+            survexfile.cave = cave
+    svxlines = ''
+    svxlines = fin.read().splitlines()
+    # print('Cave - preloop ' + str(survexfile.cave))
+    # print(survexblock)
+    for svxline in svxlines:
+
+        # print(survexblock)
+
+        # print(svxline)
+        # if not svxline:
+        #     print('   - Not survex')
+        #     return
+        # textlines.append(svxline)
+
+        lineno += 1
+
+        # print('   - Line: %d' % lineno)
+
         # break the line at the comment
-        sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
-        
+        sline, comment = regex_comment.match(svxline.strip()).groups()
         # detect ref line pointing to the scans directory
-        mref = comment and re.match(r'.*?ref.*?(\d+)\s*#\s*(\d+)', comment)
+        mref = comment and regex_ref.match(comment)
         if mref:
             refscan = "%s#%s" % (mref.group(1), mref.group(2))
             survexscansfolders = models.SurvexScansFolder.objects.filter(walletname=refscan)
             if survexscansfolders:
                 survexblock.survexscansfolder = survexscansfolders[0]
                 #survexblock.refscandir = "%s/%s%%23%s" % (mref.group(1), mref.group(1), mref.group(2))
-                survexblock.save()   
+                survexblock.save()
             continue
-        
+
+        # This whole section should be moved if we can have *QM become a proper survex command
+        # Spec of QM in SVX files, currently commented out need to add to survex
+        # needs to match regex_qm
+        # ;Serial number   grade(A/B/C/D/X)  nearest-station  resolution-station description
+        # ;QM1	a	hobnob_hallway_2.42	hobnob-hallway_3.42	junction of keyhole passage
+        # ;QM1	a	hobnob_hallway_2.42	-	junction of keyhole passage
+        qmline = comment and regex_qm.match(comment)
+        if qmline:
+            print(qmline.groups())
+            #(u'1', u'B', u'miraclemaze', u'1.17', u'-', None, u'\tcontinuation of rift')
+            qm_no = qmline.group(1)
+            qm_grade = qmline.group(2)
+            qm_from_section = qmline.group(3)
+            qm_from_station = qmline.group(4)
+            qm_resolve_section = qmline.group(6)
+            qm_resolve_station = qmline.group(7)
+            qm_notes = qmline.group(8)
+
+            print('Cave - %s' % survexfile.cave)
+            print('QM no %d' % int(qm_no))
+            print('QM grade %s' % qm_grade)
+            print('QM section %s' % qm_from_section)
+            print('QM station %s' % qm_from_station)
+            print('QM res section %s' % qm_resolve_section)
+            print('QM res station %s' % qm_resolve_station)
+            print('QM notes %s' % qm_notes)
+
+            # If the QM isn't resolved (has a resolving station) thn load it
+            if not qm_resolve_section or qm_resolve_section is not '-' or qm_resolve_section is not 'None':
+                from_section = models.SurvexBlock.objects.filter(name=qm_from_section)
+                # If we can find a section (survex note chunck, named)
+                if len(from_section) > 0:
+                    print(from_section[0])
+                    from_station = models.SurvexStation.objects.filter(block=from_section[0], name=qm_from_station)
+                    # If we can find a from station then we have the nearest station and can import it
+                    if len(from_station) > 0:
+                        print(from_station[0])
+                        qm = models.QM.objects.create(number=qm_no,
+                                                      nearest_station=from_station[0],
+                                                      grade=qm_grade.upper(),
+                                                      location_description=qm_notes)
+            else:
+                print('QM found but resolved')
+
+        #print('Cave -sline ' + str(cave))
         if not sline:
             continue
-        
+
         # detect the star command
-        mstar = re.match(r'\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$', sline)
+        mstar = regex_star.match(sline)
         if not mstar:
             if "from" in stardata:
-                LoadSurvexLineLeg(survexblock, stardata, sline, comment)
+                # print('Cave ' + str(survexfile.cave))
+                # print(survexblock)
+                LoadSurvexLineLeg(survexblock, stardata, sline, comment, survexfile.cave)
+                # print('   - From: ')
+                #print(stardata)
+                pass
             elif stardata["type"] == "passage":
                 LoadSurvexLinePassage(survexblock, stardata, sline, comment)
+                # print('   - Passage: ')
             #Missing "station" in stardata.
             continue
-        
+
         # detect the star command
         cmd, line = mstar.groups()
         cmd = cmd.lower()
         if re.match("include$(?i)", cmd):
             includepath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
-            includesurvexfile = models.SurvexFile(path=includepath, cave=survexfile.cave)
+            print('   - Include file found including - ' + includepath)
+            # Try to find the cave in the DB if not use the string as before
+            path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
+            if path_match:
+                pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+                # print(pos_cave)
+                cave = models.getCaveByReference(pos_cave)
+                if cave:
+                    survexfile.cave = cave
+            else:
+                print('No match for %s' % includepath)
+            includesurvexfile = models.SurvexFile(path=includepath)
             includesurvexfile.save()
             includesurvexfile.SetDirectory()
             if includesurvexfile.exists():
+                survexblock.save()
                 fininclude = includesurvexfile.OpenFile()
                 RecursiveLoad(survexblock, includesurvexfile, fininclude, textlines)
-        
+
         elif re.match("begin$(?i)", cmd):
-            if line: 
+            if line:
+                newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", line))
+                # Try to find the cave in the DB if not use the string as before
+                path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
+                if path_match:
+                    pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
+                    print(pos_cave)
+                    cave = models.getCaveByReference(pos_cave)
+                    if cave:
+                        survexfile.cave = cave
+                else:
+                    print('No match for %s' % newsvxpath)
+
                 name = line.lower()
-                survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexblock.cave, survexfile=survexfile, totalleglength=0.0)
+                print('   - Begin found for: ' + name)
+                # print('Block cave: ' + str(survexfile.cave))
+                survexblockdown = models.SurvexBlock(name=name, begin_char=fin.tell(), parent=survexblock, survexpath=survexblock.survexpath+"."+name, cave=survexfile.cave, survexfile=survexfile, totalleglength=0.0)
                 survexblockdown.save()
+                survexblock.save()
+                survexblock = survexblockdown
+                # print(survexblockdown)
                 textlinesdown = [ ]
                 RecursiveLoad(survexblockdown, survexfile, fin, textlinesdown)
             else:
                 iblankbegins += 1
-        
+
         elif re.match("end$(?i)", cmd):
             if iblankbegins:
                 iblankbegins -= 1
             else:
                 survexblock.text = "".join(textlines)
                 survexblock.save()
+                # print('   - End found: ')
+                endstamp = datetime.now()
+                timetaken = endstamp - stamp
+                # print('   - Time to process: ' + str(timetaken))
                 return
-        
+
         elif re.match("date$(?i)", cmd):
             if len(line) == 10:
-                survexblock.date = re.sub(r"\.", "-", line)
+                #print('   - Date found: ' + line)
+                survexblock.date = make_aware(datetime.strptime(re.sub(r"\.", "-", line), '%Y-%m-%d'), get_current_timezone())
                 expeditions = models.Expedition.objects.filter(year=line[:4])
                 if expeditions:
                     assert len(expeditions) == 1
                     survexblock.expedition = expeditions[0]
                     survexblock.expeditionday = survexblock.expedition.get_expedition_day(survexblock.date)
                     survexblock.save()
-                    
+
         elif re.match("team$(?i)", cmd):
-            mteammember = re.match(r"(Insts|Notes|Tape|Dog|Useless|Pics|Helper|Disto|Consultant)\s+(.*)$(?i)", line)
+            pass
+            # print('   - Team found: ')
+            mteammember = regex_team.match(line)
             if mteammember:
-                for tm in re.split(r" and | / |, | & | \+ |^both$|^none$(?i)", mteammember.group(2)):
+                for tm in regex_team_member.split(mteammember.group(2)):
                     if tm:
                         personexpedition = survexblock.expedition and GetPersonExpeditionNameLookup(survexblock.expedition).get(tm.lower())
                         if (personexpedition, tm) not in teammembers:
@@ -179,18 +304,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                             if personexpedition:
                                 personrole.person=personexpedition.person
                             personrole.save()
-                            
+
         elif cmd == "title":
-            survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexblock.cave)
+            #print('   - Title found: ')
+            survextitle = models.SurvexTitle(survexblock=survexblock, title=line.strip('"'), cave=survexfile.cave)
             survextitle.save()
-            
+            pass
+
         elif cmd == "require":
             # should we check survex version available for processing?
             pass
 
         elif cmd == "data":
+            #print('   - Data found: ')
             ls = line.lower().split()
             stardata = { "type":ls[0] }
+            #print('    - Star data: ', stardata)
+            #print(ls)
             for i in range(0, len(ls)):
                 stardata[stardataparamconvert.get(ls[i], ls[i])] = i - 1
             if ls[0] in ["normal", "cartesian", "nosurvey"]:
@@ -199,40 +329,23 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
                 stardata = stardatadefault
             else:
                 assert ls[0] == "passage", line
-        
+
         elif cmd == "equate":
+            #print('   - Equate found: ')
             LoadSurvexEquate(survexblock, line)
 
         elif cmd == "fix":
+            #print('   - Fix found: ')
             survexblock.MakeSurvexStation(line.split()[0])
 
         else:
+            #print('   - Stuff')
             if cmd not in ["sd", "include", "units", "entrance", "data", "flags", "title", "export", "instrument",
                            "calibrate", "set", "infer", "alias", "ref", "cs", "declination", "case"]:
                 print("Unrecognised command in line:", cmd, line, survexblock, survexblock.survexfile.path)
-
-
-def ReloadSurvexCave(survex_cave, area):
-    print(survex_cave, area)
-    cave = models.Cave.objects.get(kataster_number=survex_cave, area__short_name=area)
-    print(cave)
-    #cave = models.Cave.objects.get(kataster_number=survex_cave)
-    cave.survexblock_set.all().delete()
-    cave.survexfile_set.all().delete()
-    cave.survexdirectory_set.all().delete()
-    
-    survexfile = models.SurvexFile(path="caves-" + cave.kat_area() + "/" + survex_cave + "/" + survex_cave, cave=cave)
-    survexfile.save()
-    survexfile.SetDirectory()
-    
-    survexblockroot = models.SurvexBlock(name="root", survexpath="caves-" + cave.kat_area(), begin_char=0, cave=cave, survexfile=survexfile, totalleglength=0.0)
-    survexblockroot.save()
-    fin = survexfile.OpenFile()
-    textlines = [ ]
-    RecursiveLoad(survexblockroot, survexfile, fin, textlines)
-    survexblockroot.text = "".join(textlines)
-    survexblockroot.save()
-
+        endstamp = datetime.now()
+        timetaken = endstamp - stamp
+        # print('   - Time to process: ' + str(timetaken))
 
 def LoadAllSurvexBlocks():
 
@@ -249,7 +362,7 @@ def LoadAllSurvexBlocks():
 
     print(" - Data flushed")
 
-    survexfile = models.SurvexFile(path="all", cave=None)
+    survexfile = models.SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
     survexfile.save()
     survexfile.SetDirectory()
 
@@ -258,22 +371,13 @@ def LoadAllSurvexBlocks():
     survexblockroot.save()
     fin = survexfile.OpenFile()
     textlines = [ ]
+    # The real work starts here
     RecursiveLoad(survexblockroot, survexfile, fin, textlines)
+    fin.close()
     survexblockroot.text = "".join(textlines)
     survexblockroot.save()
 
 
-    #Load each cave, 
-    #FIXME this should be dealt with load all above
-    print(" - Reloading all caves")
-    caves = models.Cave.objects.all()
-    for cave in caves:
-        if cave.kataster_number and os.path.isdir(os.path.join(settings.SURVEX_DATA, "caves-" + cave.kat_area(), cave.kataster_number)):
-            if cave.kataster_number not in ['40']:
-                print("loading", cave, cave.kat_area())
-                ReloadSurvexCave(cave.kataster_number, cave.kat_area())
-
-
 poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$")
 
 
@@ -281,12 +385,12 @@ def LoadPos():
 
     print('Loading Pos....')
 
-    call([settings.CAVERN, "--output=%s/all.3d" % settings.SURVEX_DATA, "%s/all.svx" % settings.SURVEX_DATA])
-    call([settings.THREEDTOPOS, '%sall.3d' % settings.SURVEX_DATA], cwd = settings.SURVEX_DATA)
-    posfile = open("%sall.pos" % settings.SURVEX_DATA)
+    call([settings.CAVERN, "--output=%s%s.3d" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME), "%s%s.svx" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)])
+    call([settings.THREEDTOPOS, '%s%s.3d' % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME)], cwd = settings.SURVEX_DATA)
+    posfile = open("%s%s.pos" % (settings.SURVEX_DATA, settings.SURVEX_TOPNAME))
     posfile.readline() #Drop header
     for line in posfile.readlines():
-        r = poslineregex.match(line)                
+        r = poslineregex.match(line)
         if r:
             x, y, z, name = r.groups()
             try:
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 72a05f3..efab536 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -99,7 +99,7 @@ def parseSurveyScans(expedition, logfile=None):
                 #scanList = listdir(expedition.year, surveyFolder)
                 scanList=os.listdir(os.path.join(yearPath,surveyFolder))
             except AttributeError:
-                print(surveyFolder + " ignored\r",)
+                print("Folder: " + surveyFolder + " ignored\r")
                 continue
 
             for scan in scanList:
@@ -107,7 +107,7 @@ def parseSurveyScans(expedition, logfile=None):
                     scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
                     scanType,scanNumber,scanFormat=scanChopped
                 except AttributeError:
-                    print(scan + " ignored\r",)
+                    print("File: " + scan + " ignored\r")
                     continue
                 if scanType == 'elev' or scanType == 'extend':
                     scanType = 'elevation'
@@ -174,9 +174,6 @@ def GetListDir(sdir):
                 ff = os.path.join(sdir, f)
                 res.append((f, ff, os.path.isdir(ff)))
     return res
-        
-        
-        
 
 
 def LoadListScansFile(survexscansfolder):
diff --git a/parsers/surveysM.py b/parsers/surveysM.py
deleted file mode 100644
index 2b94b02..0000000
--- a/parsers/surveysM.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from django.conf import settings
-import subprocess, re
-import troggle.core.models as models
-
-def load():
-    print('Load survex files and relations')
-    load_area('1623')
-
-def load_area(areacode):
-
-    print('Searching all cave dirs files')
-    basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
-
-    cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
-    print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
-    
-    for cavedir in cavedirs:
-        if cavedir==basedir:
-            continue #skip the basedir - a non-proper subdirectory
-        parentname = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
-        parentcave = models.CaveM.objects.filter(survex_file__icontains=cavedir)
-        if len(parentcave)>1:
-            print('Non unique parent - skipping. Name:'+parentname)
-        elif len(parentcave)==0:
-            print('Error! parent not created:'+parentname)
-            continue
-        else: #exaclty one match
-            print('Adding relations of:'+parentname)
-            parentcave = parentcave[0]
-            
-            surveyfiles = bash('find '+cavedir+' -name \'*.svx\'').splitlines()
-            for fn in surveyfiles:
-                print(fn)
-                svxcontents = open(fn,'r').read().splitlines()
-                try:
-                    dateline = [x for x in svxcontents if ('*date' in x)][0]
-                    date = re.findall('\\d\\d\\d\\d\\.\\d\\d\\.\\d\\d', dateline, re.S)[0]
-                    
-                    
-                except:
-                    if( len( [x for x in svxcontents if ('*date' in x)] ) == 0 ):
-                        continue #skip dateless files                     
-                    print('Date format error in '+fn)
-                    print('Dateline = '+ '"'.join([x for x in svxcontents if ('*date' in x)]))
-                    date = '1900.01.01' 
-                
-            
-                newsurvex = models.SurveyM(survex_file=fn, date=date)
-                newsurvex.save()
-                parentcave.surveys.add(newsurvex)
-                parentcave.save()
-                        
-
-def file_exists(filename):
-    test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
-    if 'MISSING' in test: #send error message to the database
-        return False
-    return True       
-
-def bash(cmd): #calls command in bash shell, returns output
-    process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
-    output, error = process.communicate()
-    return output
-        
-