Make the suryeys importer not explode

author: Sam Wenham <sam@wenhams.co.uk> 2019-02-24 14:29:14 +0000
committer: Sam Wenham <sam@wenhams.co.uk> 2019-02-24 14:29:14 +0000
commit: 8fd23008e363690310718fa616b8e5bbd4bc4e8f (patch)
tree: b6ec5e9765be6f1299cccb8d52ee07eb37c2ab02 /parsers/surveys.py
parent: 8f66837f6fb5b74ba3166ae6e31328f8a9e68d96 (diff)
download: troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.tar.gz
troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.tar.bz2
troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.zip
1 files changed, 60 insertions, 51 deletions
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 2c6f190..02b06f5 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -39,7 +39,7 @@ def readSurveysFromCSV():
 
     # test if the expeditions have been added yet
     if Expedition.objects.count()==0:
-        print "There are no expeditions in the database. Please run the logbook parser."
+        print("There are no expeditions in the database. Please run the logbook parser.")
         sys.exit()
 
     
@@ -56,7 +56,7 @@ def readSurveysFromCSV():
     for survey in surveyreader:
         #I hate this, but some surveys have a letter eg 2000#34a. The next line deals with that.
         walletNumberLetter = re.match(r'(?P<number>\d*)(?P<letter>[a-zA-Z]*)',survey[header['Survey Number']]) 
-    #    print walletNumberLetter.groups()
+    #    print(walletNumberLetter.groups())
         year=survey[header['Year']]
 
         
@@ -89,63 +89,72 @@ def listdir(*directories):
 # add survey scans
 def parseSurveyScans(expedition, logfile=None):
 #    yearFileList = listdir(expedition.year)
-    yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
-    yearFileList=os.listdir(yearPath)
-    print yearFileList
-    for surveyFolder in yearFileList:
-        try:
-            surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
-#            scanList = listdir(expedition.year, surveyFolder)
-            scanList=os.listdir(os.path.join(yearPath,surveyFolder))
-        except AttributeError:
-            print surveyFolder + " ignored\r",
-            continue
-
-        for scan in scanList:
+    try:
+        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
+        yearFileList=os.listdir(yearPath)
+        print(yearFileList)
+        for surveyFolder in yearFileList:
             try:
-                scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
-                scanType,scanNumber,scanFormat=scanChopped
+                surveyNumber=re.match(r'\d\d\d\d#0*(\d+)',surveyFolder).groups()
+                #scanList = listdir(expedition.year, surveyFolder)
+                scanList=os.listdir(os.path.join(yearPath,surveyFolder))
             except AttributeError:
-                print scan + " ignored\r",
+                print(surveyFolder + " ignored\r",)
                 continue
-            if scanType == 'elev' or scanType == 'extend':
-                scanType = 'elevation'
 
-            if scanNumber=='':
-                scanNumber=1
+            for scan in scanList:
+                try:
+                    scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
+                    scanType,scanNumber,scanFormat=scanChopped
+                except AttributeError:
+                    print(scan + " ignored\r",)
+                    continue
+                if scanType == 'elev' or scanType == 'extend':
+                    scanType = 'elevation'
 
-            if type(surveyNumber)==types.TupleType:
-                surveyNumber=surveyNumber[0]
-            try:
-                placeholder=get_or_create_placeholder(year=int(expedition.year))
-                survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
-            except Survey.MultipleObjectsReturned:
-                survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
-            file_=os.path.join(yearPath, surveyFolder, scan)
-            scanObj = ScannedImage(
-                file=file_,
-                contents=scanType,
-                number_in_wallet=scanNumber,
-                survey=survey,
-                new_since_parsing=False,
-                )
-            print "Added scanned image at " + str(scanObj)
-            #if scanFormat=="png":
-                #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
-                #    print file_+ " is an interlaced PNG. No can do."
-                #continue
-            scanObj.save()
+                if scanNumber=='':
+                    scanNumber=1
+
+                if type(surveyNumber)==types.TupleType:
+                    surveyNumber=surveyNumber[0]
+                try:
+                    placeholder=get_or_create_placeholder(year=int(expedition.year))
+                    survey=Survey.objects.get_or_create(wallet_number=surveyNumber, expedition=expedition, defaults={'logbook_entry':placeholder})[0]
+                except Survey.MultipleObjectsReturned:
+                    survey=Survey.objects.filter(wallet_number=surveyNumber, expedition=expedition)[0]
+                file_=os.path.join(yearPath, surveyFolder, scan)
+                scanObj = ScannedImage(
+                    file=file_,
+                    contents=scanType,
+                    number_in_wallet=scanNumber,
+                    survey=survey,
+                    new_since_parsing=False,
+                    )
+                print("Added scanned image at " + str(scanObj))
+                #if scanFormat=="png":
+                    #if isInterlacedPNG(os.path.join(settings.SURVEY_SCANS, "surveyscans", file_)):
+                    #    print file_+ " is an interlaced PNG. No can do."
+                    #continue
+                scanObj.save()
+    except (IOError, OSError):
+        yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
+        print("No folder found for " + expedition.year + " at:- " + yearPath)
 
 # dead
 def parseSurveys(logfile=None):
-    readSurveysFromCSV()
+    try:
+        readSurveysFromCSV()
+    except (IOError, OSError):
+        print("Survey CSV not found..")
+        pass
+
     for expedition in Expedition.objects.filter(year__gte=2000):   #expos since 2000, because paths and filenames were nonstandard before then
         parseSurveyScans(expedition)
 
 # dead
 def isInterlacedPNG(filePath): #We need to check for interlaced PNGs because the thumbnail engine can't handle them (uses PIL)
     file=Image.open(filePath)
-    print filePath
+    print(filePath)
     if 'interlace' in file.info:
         return file.info['interlace']
     else:
@@ -181,7 +190,7 @@ def LoadListScansFile(survexscansfolder):
     
     for (fyf, ffyf, fisdiryf) in gld:
         assert not fisdiryf, ffyf
-        if re.search("\.(?:png|jpg|jpeg)(?i)$", fyf):
+        if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
             survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
             survexscansingle.save()
 
@@ -190,7 +199,7 @@ def LoadListScansFile(survexscansfolder):
 # and builds up the models we can access later
 def LoadListScans():
 
-    print 'Loading Survey Scans...'
+    print('Loading Survey Scans...')
 
     SurvexScanSingle.objects.all().delete()
     SurvexScansFolder.objects.all().delete()
@@ -208,7 +217,7 @@ def LoadListScans():
             continue
         
         # do the year folders
-        if re.match("\d\d\d\d$", f):
+        if re.match(r"\d\d\d\d$", f):
             for fy, ffy, fisdiry in GetListDir(ff):
                 if fisdiry:
                     assert fisdiry, ffy
@@ -225,7 +234,7 @@ def LoadListScans():
 
 def FindTunnelScan(tunnelfile, path):
     scansfolder, scansfile = None, None
-    mscansdir = re.search("(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
+    mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg))$", path)
     if mscansdir:
         scansfolderl = SurvexScansFolder.objects.filter(walletname=mscansdir.group(1))
         if len(scansfolderl):
@@ -242,9 +251,9 @@ def FindTunnelScan(tunnelfile, path):
         if scansfile:
             tunnelfile.survexscans.add(scansfile)
     
-    elif path and not re.search("\.(?:png|jpg)$(?i)", path):
+    elif path and not re.search(r"\.(?:png|jpg)$(?i)", path):
         name = os.path.split(path)[1]
-        print "ttt", tunnelfile.tunnelpath, path, name
+        print("ttt", tunnelfile.tunnelpath, path, name)
         rtunnelfilel = TunnelFile.objects.filter(tunnelname=name)
         if len(rtunnelfilel):
             assert len(rtunnelfilel) == 1, ("two paths with name of", path, "need more discrimination coded")
author	Sam Wenham <sam@wenhams.co.uk>	2019-02-24 14:29:14 +0000
committer	Sam Wenham <sam@wenhams.co.uk>	2019-02-24 14:29:14 +0000
commit	8fd23008e363690310718fa616b8e5bbd4bc4e8f (patch)
tree	b6ec5e9765be6f1299cccb8d52ee07eb37c2ab02 /parsers/surveys.py
parent	8f66837f6fb5b74ba3166ae6e31328f8a9e68d96 (diff)
download	troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.tar.gz troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.tar.bz2 troggle-8fd23008e363690310718fa616b8e5bbd4bc4e8f.zip