Validation of mugshot or blrub file added

author: Philip Sargent <philip.sargent@gmail.com> 2020-04-01 19:58:31 +0100
committer: Philip Sargent <philip.sargent@gmail.com> 2020-04-01 19:58:31 +0100
commit: 53b797fb53fdc2c540e03d2ad4273688c30173a3 (patch)
tree: 3cf120b30359fadde3f5dda7e35f525642d4f84a /parsers/people.py
parent: 98eb9173ee7f846a0f8dea6aa54aad4b5dd2f5fb (diff)
download: troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.tar.gz
troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.tar.bz2
troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.zip
1 files changed, 7 insertions, 1 deletions
diff --git a/parsers/people.py b/parsers/people.py
index 34a5ff3..f7e2f50 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -38,7 +38,13 @@ def parseMugShotAndBlurb(personline, header, person):
     elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance.
         personPageOld=open(mugShotPath,'r').read()
         if not person.blurb:
-            person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb
+            pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL)
+            if pblurb:
+                #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb.
+                #Only finds the first image, not all of them
+                person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() 
+            else:
+                print "ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename
             for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL):
                 mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename)
                 saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person)
author	Philip Sargent <philip.sargent@gmail.com>	2020-04-01 19:58:31 +0100
committer	Philip Sargent <philip.sargent@gmail.com>	2020-04-01 19:58:31 +0100
commit	53b797fb53fdc2c540e03d2ad4273688c30173a3 (patch)
tree	3cf120b30359fadde3f5dda7e35f525642d4f84a /parsers/people.py
parent	98eb9173ee7f846a0f8dea6aa54aad4b5dd2f5fb (diff)
download	troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.tar.gz troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.tar.bz2 troggle-53b797fb53fdc2c540e03d2ad4273688c30173a3.zip