summaryrefslogtreecommitdiffstats
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py173
1 files changed, 0 insertions, 173 deletions
diff --git a/utils.py b/utils.py
deleted file mode 100644
index b667e09..0000000
--- a/utils.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import sys
-import random
-import re
-import logging
-
-from django.conf import settings
-from django.shortcuts import render
-
-"""Oddball mixture of critical, superfluous and useful functions which should
-be re-located more sensibly to other modules:
-
-
-various HTML/wiki functions presumably for logbooks?
-
-Use unknown:
-weighted_choice(lst)
-randomLogbookSentence()
-"""
-
-
-def weighted_choice(lst):
- n = random.uniform(0,1)
- for item, weight in lst:
- if n < weight:
- break
- n = n - weight
- return item
-
-def randomLogbookSentence():
- from troggle.core.models import LogbookEntry
- randSent={}
-
- # needs to handle empty logbooks without crashing
-
- #Choose a random logbook entry
- randSent['entry']=LogbookEntry.objects.order_by('?')[0]
-
- #Choose again if there are no sentances (this happens if it is a placeholder entry)
- while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
- randSent['entry']=LogbookEntry.objects.order_by('?')[0]
-
- #Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
- sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
- randSent['number']=random.randrange(0,len(sentenceList))
- randSent['sentence']=sentenceList[randSent['number']]
-
- return randSent
-
-
-def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
- """Looks up instance using lookupAttribs and carries out the following:
- -if instance does not exist in DB: add instance to DB, return (new instance, True)
- -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
- -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
-
- The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
- defined in core.models.TroggleModel.
-
- """
- try:
- instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
- except:
- print(" !! - SAVE CAREFULLY ===================", objectType)
- print(" !! - -- objects.get_or_create()")
- print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
- raise
- if not created and not instance.new_since_parsing:
- for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title)
- setattr(instance, k, v)
- try:
- instance.save()
- except:
- print(" !! - SAVE CAREFULLY ===================", objectType)
- print(" !! - -- instance.save()")
- print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
- raise
- try:
- msg = str(instance)
- except:
- msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs)
- if created:
- logging.info(str(instance) + ' was just added to the database for the first time. \n')
-
- if not created and instance.new_since_parsing:
- logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
-
- if not created and not instance.new_since_parsing:
- logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
- return (instance, created)
-
-re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
-re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
-
-def get_html_body(text):
- return get_single_match(re_body, text)
-
-def get_html_title(text):
- return get_single_match(re_title, text)
-
-def get_single_match(regex, text):
- match = regex.search(text)
-
- if match:
- return match.groups()[0]
- else:
- return None
-
-re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
- (re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
- (re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
- (re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
- (re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
- (re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
- (re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
- (re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
- (re.compile(r'(<a href="?(?P<target>.*)"?>)?<img class="?(?P<class>\w*)"? src="?t/?(?P<source>[\w/\.]*)"?(?P<rest>></img>|\s/>(</a>)?)', re.DOTALL),r'[[display:\g<class> photo:\g<source>]]'), #
- (re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[subcave:\1|\2]]"), #assumes that all links with id attributes are subcaves. Not great.
- #interpage link needed
- (re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[cavedescription:\1|\2]]"), #assumes that all links with target ids are cave descriptions. Not great.
- (re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"[[qm:\1]]"),
-# (re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks),
- ]
-
-def html_to_wiki(text, codec = "utf-8"):
- if isinstance(text, str):
- text = str(text, codec)
- text = re.sub("</p>", r"", text)
- text = re.sub("<p>$", r"", text)
- text = re.sub("<p>", r"\n\n", text)
- out = ""
- lists = ""
- #lists
- while text:
- mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
- munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
- mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
- munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
- mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
- ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
- def min_(i, l):
- try:
- v = i.groups()[0]
- l.remove(len(v))
- return len(v) < min(l, 1000000000)
- except:
- return False
- if min_(mstar, ms):
- lists += "*"
- pre, val, post = mstar.groups()
- out += pre + "\n" + lists + " " + val
- text = post
- elif min_(mhash, ms):
- lists += "#"
- pre, val, post = mhash.groups()
- out += pre + "\n" + lists + " " + val
- text = post
- elif min_(mitem, ms):
- pre, val, post = mitem.groups()
- out += "\n" + lists + " " + val
- text = post
- elif min_(munstar, ms):
- lists = lists[:-1]
- text = munstar.groups()[1]
- elif min_(munhash, ms):
- lists.pop()
- text = munhash.groups()[1]
- else:
- out += text
- text = ""
- #substitutions
- for regex, repl in re_subs:
- out = regex.sub(repl, out)
- return out \ No newline at end of file