From 23f8005343d9c3ba72f46b1748889066b2f7621c Mon Sep 17 00:00:00 2001 From: stef Date: Sat, 30 Apr 2011 22:17:48 +0200 Subject: [PATCH] [enh] sanitize html and display it in comments/contract excerpts --- bt/views.py | 31 +++++++++++++++++++++++++++++-- templates/view.html | 4 ++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/bt/views.py b/bt/views.py index 6056615..878a356 100644 --- a/bt/views.py +++ b/bt/views.py @@ -9,7 +9,34 @@ from django.core.exceptions import ObjectDoesNotExist from models import Violation, Attachment, Comment from tempfile import mkstemp from datetime import datetime -import hashlib, os +import hashlib, os, re +from urlparse import urljoin +from BeautifulSoup import BeautifulSoup, Comment as BComment + +def sanitizeHtml(value, base_url=None): + rjs = r'[\s]*(&#x.{1,7})?'.join(list('javascript:')) + rvb = r'[\s]*(&#x.{1,7})?'.join(list('vbscript:')) + re_scripts = re.compile('(%s)|(%s)' % (rjs, rvb), re.IGNORECASE) + validTags = 'p i strong b u a h1 h2 h3 pre br img'.split() + validAttrs = 'href src width height'.split() + urlAttrs = 'href src'.split() # Attributes which should have a URL + soup = BeautifulSoup(value) + for comment in soup.findAll(text=lambda text: isinstance(text, BComment)): + # Get rid of comments + comment.extract() + for tag in soup.findAll(True): + if tag.name not in validTags: + tag.hidden = True + attrs = tag.attrs + tag.attrs = [] + for attr, val in attrs: + if attr in validAttrs: + val = re_scripts.sub('', val) # Remove scripts (vbs & js) + if attr in urlAttrs: + val = urljoin(base_url, val) # Calculate the absolute url + tag.attrs.append((attr, val)) + + return soup.renderContents().decode('utf8') def add(request): if request.method == 'POST': @@ -24,7 +51,7 @@ def add(request): media = form.cleaned_data['media'], temporary = form.cleaned_data['temporary'], contractual = form.cleaned_data['contractual'], - contract_excerpt = form.cleaned_data['contract_excerpt'], + contract_excerpt = sanitizeHtml(form.cleaned_data['contract_excerpt']), loophole = form.cleaned_data['loophole'] ) v.save() diff --git a/templates/view.html b/templates/view.html index 9aeeebf..db0b2d2 100644 --- a/templates/view.html +++ b/templates/view.html @@ -16,7 +16,7 @@ {%if v.media%}
  • Media
    {{v.media}}
  • {%endif%} {%if v.temporary%}
  • Temporary restriction
    {{v.temporary}}
  • {%endif%} {%if v.contractual%}
  • Contractual restriction
    {{v.Contractual}}
  • {%endif%} - {%if v.contract_excerpt%}
  • Contract excerpt
    {{v.contract_excerpt}}
  • {%endif%} + {%if v.contract_excerpt%}
  • Contract excerpt
    {{v.contract_excerpt|safe}}
  • {%endif%} {%if v.loophole%}
  • Loophole offering
    {{v.loophole}}
  • {%endif%} {%if v.comment_set.all%}
    @@ -24,7 +24,7 @@ {%for c in v.comment_set.all%}
    {%if c.submitter_nick%}{{c.submitter_nick}}{%else%}{{c.submitter_email}}{%endif%} - {{c.timestamp|datetimefmt:"short" }} -
    {{c.comment}}
    +
    {{c.comment|safe}}
    {%if c.attachment_set.all%}
    Attachments -- GitLab