diff options
Diffstat (limited to 'parsers/people.py')
-rw-r--r-- | parsers/people.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/parsers/people.py b/parsers/people.py index 34a5ff3..f7e2f50 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -38,7 +38,13 @@ def parseMugShotAndBlurb(personline, header, person): elif mugShotPath[-3:]=='htm': #if person has an html page, find the image(s) and add it. Also, add the text from the html page to the "blurb" field in his model instance. personPageOld=open(mugShotPath,'r').read() if not person.blurb: - person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb + pblurb=re.search('<body>.*<hr',personPageOld,re.DOTALL) + if pblurb: + #this needs to be refined, take care of the HTML and make sure it doesn't match beyond the blurb. + #Only finds the first image, not all of them + person.blurb=re.search('<body>.*<hr',personPageOld,re.DOTALL).group() + else: + print "ERROR: --------------- Broken link or Blurb parse error in ", mugShotFilename for mugShotFilename in re.findall('i/.*?jpg',personPageOld,re.DOTALL): mugShotPath = os.path.join(settings.EXPOWEB, "folk", mugShotFilename) saveMugShot(mugShotPath=mugShotPath, mugShotFilename=mugShotFilename, person=person) |