# The only schemes allowed in URLs (for href and src attributes).
# Adding "javascript" or "vbscript" to this list would not be smart.
self.allowed_schemes = ['http', 'https', 'ftp']
parsed = urlparse(url)
return (parsed[0] in self.allowed_schemes and '.' in parsed[1]) \
or (parsed[0] == '' and parsed[2].startswith('/'))
file, ftp, gopher, hdl, http, https, imap, mailto, mms,
news, nntp, prospero, rsync, rtsp, rtspu, sftp,
shttp, sip, sips, snews, svn, svn+ssh, telnet, wais
>>> from urlparse import urlparse
>>> p = urlparse('http://bb.aa')
>>> print p
ParseResult(scheme='http', netloc='bb.aa', path='', params='', query='', fragment='')
>>> o = urlparse('mailto:a...@bb.aa')
>>> print o
ParseResult(scheme='mailto', netloc='', path='a...@bb.aa', params='', query='', fragment='')
--- sanitizer.py-orig 2013-03-07 03:12:08.256256153 +0100
+++ sanitizer.py 2013-03-07 03:10:50.170662121 +0100
@@ -66,7 +66,7 @@
# The only schemes allowed in URLs (for href and src attributes).
# Adding "javascript" or "vbscript" to this list would not be smart.
- self.allowed_schemes = ['http', 'https', 'ftp']
+ self.allowed_schemes = ['http', 'https', 'ftp', 'mailto']
#to strip or escape disallowed tags?
self.strip_disallowed = strip_disallowed
@@ -151,11 +151,12 @@
def url_is_acceptable(self, url):
"""
- Accepts relative and absolute urls
+ Accepts relative, absolute and mailto urls
"""
parsed = urlparse(url)
return (parsed[0] in self.allowed_schemes and '.' in parsed[1]) \
+ or (parsed[0] in self.allowed_schemes and '@' in parsed[2]) \
or (parsed[0] == '' and parsed[2].startswith('/'))
def strip(self, rawstring, escape=True):