diff --git a/vendor/github.com/kennygrant/sanitize/.gitignore b/vendor/github.com/kennygrant/sanitize/.gitignore deleted file mode 100644 index 0026861..0000000 --- a/vendor/github.com/kennygrant/sanitize/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe diff --git a/vendor/github.com/kennygrant/sanitize/.travis.yml b/vendor/github.com/kennygrant/sanitize/.travis.yml deleted file mode 100644 index 4f2ee4d..0000000 --- a/vendor/github.com/kennygrant/sanitize/.travis.yml +++ /dev/null @@ -1 +0,0 @@ -language: go diff --git a/vendor/github.com/kennygrant/sanitize/LICENSE b/vendor/github.com/kennygrant/sanitize/LICENSE deleted file mode 100644 index 749ebb2..0000000 --- a/vendor/github.com/kennygrant/sanitize/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2017 Mechanism Design. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/vendor/github.com/kennygrant/sanitize/README.md b/vendor/github.com/kennygrant/sanitize/README.md deleted file mode 100644 index 4401ef7..0000000 --- a/vendor/github.com/kennygrant/sanitize/README.md +++ /dev/null @@ -1,62 +0,0 @@ -sanitize [![GoDoc](https://godoc.org/github.com/kennygrant/sanitize?status.svg)](https://godoc.org/github.com/kennygrant/sanitize) [![Go Report Card](https://goreportcard.com/badge/github.com/kennygrant/sanitize)](https://goreportcard.com/report/github.com/kennygrant/sanitize) [![CircleCI](https://circleci.com/gh/kennygrant/sanitize.svg?style=svg)](https://circleci.com/gh/kennygrant/sanitize) -======== - -Package sanitize provides functions to sanitize html and paths with go (golang). - -FUNCTIONS - - -```go -sanitize.Accents(s string) string -``` - -Accents replaces a set of accented characters with ascii equivalents. - -```go -sanitize.BaseName(s string) string -``` - -BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. Unlike Name no attempt is made to normalise text as a path. - -```go -sanitize.HTML(s string) string -``` - -HTML strips html tags with a very simple parser, replace common entities, and escape < and > in the result. The result is intended to be used as plain text. - -```go -sanitize.HTMLAllowing(s string, args...[]string) (string, error) -``` - -HTMLAllowing parses html and allow certain tags and attributes from the lists optionally specified by args - args[0] is a list of allowed tags, args[1] is a list of allowed attributes. If either is missing default sets are used. - -```go -sanitize.Name(s string) string -``` - -Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. - -```go -sanitize.Path(s string) string -``` - -Path makes a string safe to use as an url path. - - -Changes -------- - -Version 1.2 - -Adjusted HTML function to avoid linter warning -Added more tests from https://githubengineering.com/githubs-post-csp-journey/ -Chnaged name of license file -Added badges and change log to readme - -Version 1.1 -Fixed type in comments. -Merge pull request from Povilas Balzaravicius Pawka - - replace br tags with newline even when they contain a space - -Version 1.0 -First release \ No newline at end of file diff --git a/vendor/github.com/kennygrant/sanitize/sanitize.go b/vendor/github.com/kennygrant/sanitize/sanitize.go deleted file mode 100755 index 50b39b9..0000000 --- a/vendor/github.com/kennygrant/sanitize/sanitize.go +++ /dev/null @@ -1,383 +0,0 @@ -// Package sanitize provides functions for sanitizing text. -package sanitize - -import ( - "bytes" - "html" - "html/template" - "io" - "path" - "regexp" - "strings" - - parser "golang.org/x/net/html" -) - -var ( - ignoreTags = []string{"title", "script", "style", "iframe", "frame", "frameset", "noframes", "noembed", "embed", "applet", "object", "base"} - - defaultTags = []string{"h1", "h2", "h3", "h4", "h5", "h6", "div", "span", "hr", "p", "br", "b", "i", "strong", "em", "ol", "ul", "li", "a", "img", "pre", "code", "blockquote"} - - defaultAttributes = []string{"id", "class", "src", "href", "title", "alt", "name", "rel"} -) - -// HTMLAllowing sanitizes html, allowing some tags. -// Arrays of allowed tags and allowed attributes may optionally be passed as the second and third arguments. -func HTMLAllowing(s string, args ...[]string) (string, error) { - - allowedTags := defaultTags - if len(args) > 0 { - allowedTags = args[0] - } - allowedAttributes := defaultAttributes - if len(args) > 1 { - allowedAttributes = args[1] - } - - // Parse the html - tokenizer := parser.NewTokenizer(strings.NewReader(s)) - - buffer := bytes.NewBufferString("") - ignore := "" - - for { - tokenType := tokenizer.Next() - token := tokenizer.Token() - - switch tokenType { - - case parser.ErrorToken: - err := tokenizer.Err() - if err == io.EOF { - return buffer.String(), nil - } - return "", err - - case parser.StartTagToken: - - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = cleanAttributes(token.Attr, allowedAttributes) - buffer.WriteString(token.String()) - } else if includes(ignoreTags, token.Data) { - ignore = token.Data - } - - case parser.SelfClosingTagToken: - - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = cleanAttributes(token.Attr, allowedAttributes) - buffer.WriteString(token.String()) - } else if token.Data == ignore { - ignore = "" - } - - case parser.EndTagToken: - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = []parser.Attribute{} - buffer.WriteString(token.String()) - } else if token.Data == ignore { - ignore = "" - } - - case parser.TextToken: - // We allow text content through, unless ignoring this entire tag and its contents (including other tags) - if ignore == "" { - buffer.WriteString(token.String()) - } - case parser.CommentToken: - // We ignore comments by default - case parser.DoctypeToken: - // We ignore doctypes by default - html5 does not require them and this is intended for sanitizing snippets of text - default: - // We ignore unknown token types by default - - } - - } - -} - -// HTML strips html tags, replace common entities, and escapes <>&;'" in the result. -// Note the returned text may contain entities as it is escaped by HTMLEscapeString, and most entities are not translated. -func HTML(s string) (output string) { - - // Shortcut strings with no tags in them - if !strings.ContainsAny(s, "<>") { - output = s - } else { - - // First remove line breaks etc as these have no meaning outside html tags (except pre) - // this means pre sections will lose formatting... but will result in less unintentional paras. - s = strings.Replace(s, "\n", "", -1) - - // Then replace line breaks with newlines, to preserve that formatting - s = strings.Replace(s, "

", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - - // Walk through the string removing all tags - b := bytes.NewBufferString("") - inTag := false - for _, r := range s { - switch r { - case '<': - inTag = true - case '>': - inTag = false - default: - if !inTag { - b.WriteRune(r) - } - } - } - output = b.String() - } - - // Remove a few common harmless entities, to arrive at something more like plain text - output = strings.Replace(output, "‘", "'", -1) - output = strings.Replace(output, "’", "'", -1) - output = strings.Replace(output, "“", "\"", -1) - output = strings.Replace(output, "”", "\"", -1) - output = strings.Replace(output, " ", " ", -1) - output = strings.Replace(output, """, "\"", -1) - output = strings.Replace(output, "'", "'", -1) - - // Translate some entities into their plain text equivalent (for example accents, if encoded as entities) - output = html.UnescapeString(output) - - // In case we have missed any tags above, escape the text - removes <, >, &, ' and ". - output = template.HTMLEscapeString(output) - - // After processing, remove some harmless entities &, ' and " which are encoded by HTMLEscapeString - output = strings.Replace(output, """, "\"", -1) - output = strings.Replace(output, "'", "'", -1) - output = strings.Replace(output, "& ", "& ", -1) // NB space after - output = strings.Replace(output, "&amp; ", "& ", -1) // NB space after - - return output -} - -// We are very restrictive as this is intended for ascii url slugs -var illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`) - -// Path makes a string safe to use as an url path. -func Path(s string) string { - // Start with lowercase string - filePath := strings.ToLower(s) - filePath = strings.Replace(filePath, "..", "", -1) - filePath = path.Clean(filePath) - - // Remove illegal characters for paths, flattening accents and replacing some common separators with - - filePath = cleanString(filePath, illegalPath) - - // NB this may be of length 0, caller must check - return filePath -} - -// Remove all other unrecognised characters apart from -var illegalName = regexp.MustCompile(`[^[:alnum:]-.]`) - -// Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. -func Name(s string) string { - // Start with lowercase string - fileName := strings.ToLower(s) - fileName = path.Clean(path.Base(fileName)) - - // Remove illegal characters for names, replacing some common separators with - - fileName = cleanString(fileName, illegalName) - - // NB this may be of length 0, caller must check - return fileName -} - -// Replace these separators with - -var baseNameSeparators = regexp.MustCompile(`[./]`) - -// BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. -// No attempt is made to normalise a path or normalise case. -func BaseName(s string) string { - - // Replace certain joining characters with a dash - baseName := baseNameSeparators.ReplaceAllString(s, "-") - - // Remove illegal characters for names, replacing some common separators with - - baseName = cleanString(baseName, illegalName) - - // NB this may be of length 0, caller must check - return baseName -} - -// A very limited list of transliterations to catch common european names translated to urls. -// This set could be expanded with at least caps and many more characters. -var transliterations = map[rune]string{ - 'À': "A", - 'Á': "A", - 'Â': "A", - 'Ã': "A", - 'Ä': "A", - 'Å': "AA", - 'Æ': "AE", - 'Ç': "C", - 'È': "E", - 'É': "E", - 'Ê': "E", - 'Ë': "E", - 'Ì': "I", - 'Í': "I", - 'Î': "I", - 'Ï': "I", - 'Ð': "D", - 'Ł': "L", - 'Ñ': "N", - 'Ò': "O", - 'Ó': "O", - 'Ô': "O", - 'Õ': "O", - 'Ö': "O", - 'Ø': "OE", - 'Ù': "U", - 'Ú': "U", - 'Ü': "U", - 'Û': "U", - 'Ý': "Y", - 'Þ': "Th", - 'ß': "ss", - 'à': "a", - 'á': "a", - 'â': "a", - 'ã': "a", - 'ä': "a", - 'å': "aa", - 'æ': "ae", - 'ç': "c", - 'è': "e", - 'é': "e", - 'ê': "e", - 'ë': "e", - 'ì': "i", - 'í': "i", - 'î': "i", - 'ï': "i", - 'ð': "d", - 'ł': "l", - 'ñ': "n", - 'ń': "n", - 'ò': "o", - 'ó': "o", - 'ô': "o", - 'õ': "o", - 'ō': "o", - 'ö': "o", - 'ø': "oe", - 'ś': "s", - 'ù': "u", - 'ú': "u", - 'û': "u", - 'ū': "u", - 'ü': "u", - 'ý': "y", - 'þ': "th", - 'ÿ': "y", - 'ż': "z", - 'Œ': "OE", - 'œ': "oe", -} - -// Accents replaces a set of accented characters with ascii equivalents. -func Accents(s string) string { - // Replace some common accent characters - b := bytes.NewBufferString("") - for _, c := range s { - // Check transliterations first - if val, ok := transliterations[c]; ok { - b.WriteString(val) - } else { - b.WriteRune(c) - } - } - return b.String() -} - -var ( - // If the attribute contains data: or javascript: anywhere, ignore it - // we don't allow this in attributes as it is so frequently used for xss - // NB we allow spaces in the value, and lowercase. - illegalAttr = regexp.MustCompile(`(d\s*a\s*t\s*a|j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)\s*:`) - - // We are far more restrictive with href attributes. - legalHrefAttr = regexp.MustCompile(`\A[/#][^/\\]?|mailto://|http://|https://`) -) - -// cleanAttributes returns an array of attributes after removing malicious ones. -func cleanAttributes(a []parser.Attribute, allowed []string) []parser.Attribute { - if len(a) == 0 { - return a - } - - var cleaned []parser.Attribute - for _, attr := range a { - if includes(allowed, attr.Key) { - - val := strings.ToLower(attr.Val) - - // Check for illegal attribute values - if illegalAttr.FindString(val) != "" { - attr.Val = "" - } - - // Check for legal href values - / mailto:// http:// or https:// - if attr.Key == "href" { - if legalHrefAttr.FindString(val) == "" { - attr.Val = "" - } - } - - // If we still have an attribute, append it to the array - if attr.Val != "" { - cleaned = append(cleaned, attr) - } - } - } - return cleaned -} - -// A list of characters we consider separators in normal strings and replace with our canonical separator - rather than removing. -var ( - separators = regexp.MustCompile(`[ &_=+:]`) - - dashes = regexp.MustCompile(`[\-]+`) -) - -// cleanString replaces separators with - and removes characters listed in the regexp provided from string. -// Accents, spaces, and all characters not in A-Za-z0-9 are replaced. -func cleanString(s string, r *regexp.Regexp) string { - - // Remove any trailing space to avoid ending on - - s = strings.Trim(s, " ") - - // Flatten accents first so that if we remove non-ascii we still get a legible name - s = Accents(s) - - // Replace certain joining characters with a dash - s = separators.ReplaceAllString(s, "-") - - // Remove all other unrecognised characters - NB we do allow any printable characters - s = r.ReplaceAllString(s, "") - - // Remove any multiple dashes caused by replacements above - s = dashes.ReplaceAllString(s, "-") - - return s -} - -// includes checks for inclusion of a string in a []string. -func includes(a []string, s string) bool { - for _, as := range a { - if as == s { - return true - } - } - return false -} diff --git a/vendor/github.com/kennygrant/sanitize/sanitize_test.go b/vendor/github.com/kennygrant/sanitize/sanitize_test.go deleted file mode 100644 index f7312ea..0000000 --- a/vendor/github.com/kennygrant/sanitize/sanitize_test.go +++ /dev/null @@ -1,233 +0,0 @@ -// Utility functions for working with text -package sanitize - -import ( - "testing" -) - -var Format = "\ninput: %q\nexpected: %q\noutput: %q" - -type Test struct { - input string - expected string -} - -// NB the treatment of accents - they are removed and replaced with ascii transliterations -var urls = []Test{ - {"ReAd ME.md", `read-me.md`}, - {"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`}, - {"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`}, - {"/../../4-icon.jpg", `/4-icon.jpg`}, - {"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`}, - {"../4 icon.*", `/4-icon.`}, - {"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`}, - {"../*", `/`}, -} - -func TestPath(t *testing.T) { - for _, test := range urls { - output := Path(test.input) - if output != test.expected { - t.Fatalf(Format, test.input, test.expected, output) - } - } -} - -func BenchmarkPath(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, test := range urls { - output := Path(test.input) - if output != test.expected { - b.Fatalf(Format, test.input, test.expected, output) - } - } - } -} - -var fileNames = []Test{ - {"ReAd ME.md", `read-me.md`}, - {"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`}, - {"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`}, - {"/../../4-icon.jpg", `4-icon.jpg`}, - {"/Images/../4-icon.jpg", `4-icon.jpg`}, - {"../4 icon.jpg", `4-icon.jpg`}, - {"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`}, -} - -func TestName(t *testing.T) { - for _, test := range fileNames { - output := Name(test.input) - if output != test.expected { - t.Fatalf(Format, test.input, test.expected, output) - } - } -} - -func BenchmarkName(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, test := range fileNames { - output := Name(test.input) - if output != test.expected { - b.Fatalf(Format, test.input, test.expected, output) - } - } - } -} - -var baseFileNames = []Test{ - {"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`}, - {"/../../4-iCoN.jpg", `-4-iCoN-jpg`}, - {"And/Or", `And-Or`}, - {"Sonic.EXE", `Sonic-EXE`}, - {"012: #Fetch for Defaults", `012-Fetch-for-Defaults`}, -} - -func TestBaseName(t *testing.T) { - for _, test := range baseFileNames { - output := BaseName(test.input) - if output != test.expected { - t.Fatalf(Format, test.input, test.expected, output) - } - } -} - -// Test with some malformed or malicious html -// NB because we remove all tokens after a < until the next > -// and do not attempt to parse, we should be safe from invalid html, -// but will sometimes completely empty the string if we have invalid input -// Note we sometimes use " in order to keep things on one line and use the ` character -var htmlTests = []Test{ - {` `, " "}, - {`&#x000D;`, `&#x000D;`}, - {`

`, ``}, - {"

Bold Not bold

\nAlso not bold.", "Bold Not bold\nAlso not bold."}, - {`FOO ZOO`, "FOO\rZOO"}, - {`">`, `alert("XSS")"`}, - {``, ``}, - {``, ``}, - {`> & test <`, `> & test <`}, - {``, ``}, - {`“hello” it’s for ‘real’`, `"hello" it's for 'real'`}, - {``, ``}, - {`'';!--"=&{()}`, `'';!--"=&{()}`}, - {"LINE 1
\nLINE 2", "LINE 1\nLINE 2"}, - - // Examples from https://githubengineering.com/githubs-post-csp-journey/ - {` -... - -`, `...`}, - {` --> -
- -
`, `Click -- `}, -} - -func TestHTML(t *testing.T) { - for _, test := range htmlTests { - output := HTML(test.input) - if output != test.expected { - t.Fatalf(Format, test.input, test.expected, output) - } - } -} - -var htmlTestsAllowing = []Test{ - {``, ``}, - {`hello world`, `hello world`}, - {`hello



rulers`, `hello



rulers`}, - {`

Span

`, `

Span

`}, - {`
Div

test

invalid

test

`, `
Div

test

invalid

test

`}, - {`

Some text

`, `

Some text

`}, - {`hello world`, `hello world`}, - {`text

inside

too`, `text

inside

too`}, - {`&#x000D;`, `&#x000D;`}, - {`

`, `

`}, - {"

Bold Not bold

\nAlso not bold.", "

Bold Not bold

\nAlso not bold."}, - {"`FOO ZOO", "`FOO ZOO"}, - {`">`, `">`}, - {``, ``}, - {`>> >> `}, - {`> & test <`, `> & test <`}, - {``, ``}, - {``, ``}, - {`