summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-06-14 04:36:19 +0100
committersubstantialnoninfringinguser <substantialnoninfringinguser@gmail.com>2009-06-14 04:36:19 +0100
commit0306723c950296566ef6c6cf3c3add5b605e9855 (patch)
treee8632560fa184f461e439cc860c37f050a053e56
parentaf9743026e52081886d93575927b5a4c89c987cb (diff)
downloadtroggle-0306723c950296566ef6c6cf3c3add5b605e9855.tar.gz
troggle-0306723c950296566ef6c6cf3c3add5b605e9855.tar.bz2
troggle-0306723c950296566ef6c6cf3c3add5b605e9855.zip
[svn] Whoops, forgot to add the file in last revision.
-rw-r--r--parsers/subcaves.py48
1 files changed, 48 insertions, 0 deletions
diff --git a/parsers/subcaves.py b/parsers/subcaves.py
new file mode 100644
index 0000000..024a229
--- /dev/null
+++ b/parsers/subcaves.py
@@ -0,0 +1,48 @@
+'''
+This module is the part of troggle that parses descriptions of cave parts (subcaves) from the legacy html files and saves them in the troggle database as instances of the model Subcave. Unfortunately, this parser can not be very flexible because the legacy format is poorly structured.
+'''
+
+import sys, os
+
+sys.path.append('C:\\troggle')
+sys.path.append('C:\\')
+os.environ['DJANGO_SETTINGS_MODULE']='troggle.settings'
+import troggle.expo as expo
+
+import os, re, logging
+from django.conf import settings
+from expo.models import Subcave, Cave
+from troggle.save_carefully import save_carefully
+
+def getLinksInCaveDescription(cave):
+ '''
+ Returns all HTML <a href> tags from a given cave as a list of tuples
+ in the format ('filename.html','Description')
+ '''
+ pattern='<a href=\"(.*?)\">(.*?)</a>'
+ if cave.underground_description:
+ return re.findall(pattern,cave.underground_description)
+ else:
+ return []
+
+def importSubcaves(cave):
+ for link in getLinksInCaveDescription(cave):
+ try:
+ subcaveFilePath=os.path.join(
+ settings.EXPOWEB,
+ os.path.dirname(cave.description_file),
+ link[0])
+ subcaveFile=open(subcaveFilePath,'r')
+ description=subcaveFile.read().decode('iso-8859-1').encode('utf-8')
+
+ lookupAttribs={'title':link[1], 'cave':cave}
+ nonLookupAttribs={'description':description}
+ newSubcave=save_carefully(Subcave,lookupAttribs=lookupAttribs,nonLookupAttribs=nonLookupAttribs)
+
+ logging.info("Added " + unicode(newSubcave) + " to " + unicode(cave))
+ except IOError:
+ logging.info("Subcave import couldn't open "+subcaveFilePath)
+
+def importAllSubcaves():
+ for cave in Cave.objects.all():
+ importSubcaves(cave)