X-Git-Url: https://git.openstreetmap.org/osqa.git/blobdiff_plain/1a949f7c97dc2f34c135f5cdf088df2927d3d652..143150bddf1a02498da0dcbdf0ad59d540c1519a:/forum/utils/html.py diff --git a/forum/utils/html.py b/forum/utils/html.py index 25a74a4..3657ef6 100644 --- a/forum/utils/html.py +++ b/forum/utils/html.py @@ -1,6 +1,10 @@ """Utilities for working with HTML.""" -import html5lib -from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers +from html5lib import sanitizer, serializer, tokenizer, treebuilders, treewalkers, HTMLParser +from urllib import quote_plus +from django.utils.html import strip_tags +from forum.utils.html2text import HTML2Text +from django.utils.safestring import mark_safe +from forum import settings class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin): acceptable_elements = ('a', 'abbr', 'acronym', 'address', 'b', 'big', @@ -26,11 +30,6 @@ class HTMLSanitizerMixin(sanitizer.HTMLSanitizerMixin): allowed_svg_properties = () class HTMLSanitizer(tokenizer.HTMLTokenizer, HTMLSanitizerMixin): - def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=True, lowercaseAttrName=True): - tokenizer.HTMLTokenizer.__init__(self, stream, encoding, parseMeta, - useChardet, lowercaseElementName, - lowercaseAttrName) def __iter__(self): for token in tokenizer.HTMLTokenizer.__iter__(self): @@ -40,7 +39,7 @@ class HTMLSanitizer(tokenizer.HTMLTokenizer, HTMLSanitizerMixin): def sanitize_html(html): """Sanitizes an HTML fragment.""" - p = html5lib.HTMLParser(tokenizer=HTMLSanitizer, + p = HTMLParser(tokenizer=HTMLSanitizer, tree=treebuilders.getTreeBuilder("dom")) dom_tree = p.parseFragment(html) walker = treewalkers.getTreeWalker("dom") @@ -49,3 +48,27 @@ def sanitize_html(html): quote_attr_values=True) output_generator = s.serialize(stream) return u''.join(output_generator) + +def cleanup_urls(url): + return quote_plus(strip_tags(url)) + + +def html2text(s, ignore_tags=(), indent_width=4, page_width=80): + ignore_tags = [t.lower() for t in ignore_tags] + parser = HTML2Text(ignore_tags, indent_width, page_width) + parser.feed(s) + parser.close() + parser.generate() + return mark_safe(parser.result) + +def buildtag(name, content, **attrs): + return mark_safe('<%s %s>%s' % (name, " ".join('%s="%s"' % i for i in attrs.items()), unicode(content), name)) + +def hyperlink(url, title, **attrs): + return mark_safe('%s' % (url, " ".join('%s="%s"' % i for i in attrs.items()), title)) + +def objlink(obj, **attrs): + link = obj.get_absolute_url() + if not link.startswith(settings.APP_URL): + link = settings.APP_URL + link + return hyperlink(link, unicode(obj), **attrs)