You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

254 lines
8.7 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build go1.10
  5. package precis
  6. import (
  7. "strings"
  8. "golang.org/x/text/runes"
  9. "golang.org/x/text/secure/bidirule"
  10. )
  11. var enforceTestCases = []struct {
  12. name string
  13. p *Profile
  14. cases []testCase
  15. }{
  16. {"Basic", NewFreeform(), []testCase{
  17. {"e\u0301\u031f", "\u00e9\u031f", nil}, // normalize
  18. }},
  19. {"Context Rule 1", NewFreeform(), []testCase{
  20. // Rule 1: zero-width non-joiner (U+200C)
  21. // From RFC:
  22. // False
  23. // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
  24. // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
  25. // (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
  26. //
  27. // Example runes for different joining types:
  28. // Join L: U+A872; PHAGS-PA SUPERFIXED LETTER RA
  29. // Join D: U+062C; HAH WITH DOT BELOW
  30. // Join T: U+0610; ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM
  31. // Join R: U+0627; ALEF
  32. // Virama: U+0A4D; GURMUKHI SIGN VIRAMA
  33. // Virama and Join T: U+0ACD; GUJARATI SIGN VIRAMA
  34. {"\u200c", "", errContext},
  35. {"\u200ca", "", errContext},
  36. {"a\u200c", "", errContext},
  37. {"\u200c\u0627", "", errContext}, // missing JoinStart
  38. {"\u062c\u200c", "", errContext}, // missing JoinEnd
  39. {"\u0610\u200c\u0610\u0627", "", errContext}, // missing JoinStart
  40. {"\u062c\u0610\u200c\u0610", "", errContext}, // missing JoinEnd
  41. // Variants of: D T* U+200c T* R
  42. {"\u062c\u200c\u0627", "\u062c\u200c\u0627", nil},
  43. {"\u062c\u0610\u200c\u0610\u0627", "\u062c\u0610\u200c\u0610\u0627", nil},
  44. {"\u062c\u0610\u0610\u200c\u0610\u0610\u0627", "\u062c\u0610\u0610\u200c\u0610\u0610\u0627", nil},
  45. {"\u062c\u0610\u200c\u0627", "\u062c\u0610\u200c\u0627", nil},
  46. {"\u062c\u200c\u0610\u0627", "\u062c\u200c\u0610\u0627", nil},
  47. // Variants of: L T* U+200c T* D
  48. {"\ua872\u200c\u062c", "\ua872\u200c\u062c", nil},
  49. {"\ua872\u0610\u200c\u0610\u062c", "\ua872\u0610\u200c\u0610\u062c", nil},
  50. {"\ua872\u0610\u0610\u200c\u0610\u0610\u062c", "\ua872\u0610\u0610\u200c\u0610\u0610\u062c", nil},
  51. {"\ua872\u0610\u200c\u062c", "\ua872\u0610\u200c\u062c", nil},
  52. {"\ua872\u200c\u0610\u062c", "\ua872\u200c\u0610\u062c", nil},
  53. // Virama
  54. {"\u0a4d\u200c", "\u0a4d\u200c", nil},
  55. {"\ua872\u0a4d\u200c", "\ua872\u0a4d\u200c", nil},
  56. {"\ua872\u0a4d\u0610\u200c", "", errContext},
  57. {"\ua872\u0a4d\u0610\u200c", "", errContext},
  58. {"\u0acd\u200c", "\u0acd\u200c", nil},
  59. {"\ua872\u0acd\u200c", "\ua872\u0acd\u200c", nil},
  60. {"\ua872\u0acd\u0610\u200c", "", errContext},
  61. {"\ua872\u0acd\u0610\u200c", "", errContext},
  62. // Using Virama as join T
  63. {"\ua872\u0acd\u200c\u062c", "\ua872\u0acd\u200c\u062c", nil},
  64. {"\ua872\u200c\u0acd\u062c", "\ua872\u200c\u0acd\u062c", nil},
  65. }},
  66. {"Context Rule 2", NewFreeform(), []testCase{
  67. // Rule 2: zero-width joiner (U+200D)
  68. {"\u200d", "", errContext},
  69. {"\u200da", "", errContext},
  70. {"a\u200d", "", errContext},
  71. {"\u0a4d\u200d", "\u0a4d\u200d", nil},
  72. {"\ua872\u0a4d\u200d", "\ua872\u0a4d\u200d", nil},
  73. {"\u0a4da\u200d", "", errContext},
  74. }},
  75. {"Context Rule 3", NewFreeform(), []testCase{
  76. // Rule 3: middle dot
  77. {"·", "", errContext},
  78. {"l·", "", errContext},
  79. {"·l", "", errContext},
  80. {"a·", "", errContext},
  81. {"l·a", "", errContext},
  82. {"a·a", "", errContext},
  83. {"l·l", "l·l", nil},
  84. {"al·la", "al·la", nil},
  85. }},
  86. {"Context Rule 4", NewFreeform(), []testCase{
  87. // Rule 4: Greek lower numeral U+0375
  88. {"͵", "", errContext},
  89. {"͵a", "", errContext},
  90. {"α͵", "", errContext},
  91. {"͵α", "͵α", nil},
  92. {"α͵α", "α͵α", nil},
  93. {"͵͵α", "͵͵α", nil}, // The numeric sign is itself Greek.
  94. {"α͵͵α", "α͵͵α", nil},
  95. {"α͵͵", "", errContext},
  96. {"α͵͵a", "", errContext},
  97. }},
  98. {"Context Rule 5+6", NewFreeform(), []testCase{
  99. // Rule 5+6: Hebrew preceding
  100. // U+05f3: Geresh
  101. {"׳", "", errContext},
  102. {"׳ה", "", errContext},
  103. {"a׳b", "", errContext},
  104. {"ש׳", "ש׳", nil}, // U+05e9 U+05f3
  105. {"ש׳׳׳", "ש׳׳׳", nil}, // U+05e9 U+05f3
  106. // U+05f4: Gershayim
  107. {"״", "", errContext},
  108. {"״ה", "", errContext},
  109. {"a״b", "", errContext},
  110. {"ש״", "ש״", nil}, // U+05e9 U+05f4
  111. {"ש״״״", "ש״״״", nil}, // U+05e9 U+05f4
  112. {"aש״״״", "aש״״״", nil}, // U+05e9 U+05f4
  113. }},
  114. {"Context Rule 7", NewFreeform(), []testCase{
  115. // Rule 7: Katakana middle Dot
  116. {"・", "", errContext},
  117. {"abc・", "", errContext},
  118. {"・def", "", errContext},
  119. {"abc・def", "", errContext},
  120. {"aヅc・def", "aヅc・def", nil},
  121. {"abc・dぶf", "abc・dぶf", nil},
  122. {"⺐bc・def", "⺐bc・def", nil},
  123. }},
  124. {"Context Rule 8+9", NewFreeform(), []testCase{
  125. // Rule 8+9: Arabic Indic Digit
  126. {"١٢٣٤٥۶", "", errContext},
  127. {"۱۲۳۴۵٦", "", errContext},
  128. {"١٢٣٤٥", "١٢٣٤٥", nil},
  129. {"۱۲۳۴۵", "۱۲۳۴۵", nil},
  130. }},
  131. {"Nickname", Nickname, []testCase{
  132. {" Swan of Avon ", "Swan of Avon", nil},
  133. {"", "", errEmptyString},
  134. {" ", "", errEmptyString},
  135. {" ", "", errEmptyString},
  136. {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
  137. {"Foo", "Foo", nil},
  138. {"foo", "foo", nil},
  139. {"Foo Bar", "Foo Bar", nil},
  140. {"foo bar", "foo bar", nil},
  141. {"\u03A3", "\u03A3", nil},
  142. {"\u03C3", "\u03C3", nil},
  143. // Greek final sigma is left as is (do not fold!)
  144. {"\u03C2", "\u03C2", nil},
  145. {"\u265A", "♚", nil},
  146. {"Richard \u2163", "Richard IV", nil},
  147. {"\u212B", "Å", nil},
  148. {"\uFB00", "ff", nil}, // because of NFKC
  149. {"שa", "שa", nil}, // no bidi rule
  150. {"동일조건변경허락", "동일조건변경허락", nil},
  151. }},
  152. {"OpaqueString", OpaqueString, []testCase{
  153. {" Swan of Avon ", " Swan of Avon ", nil},
  154. {"", "", errEmptyString},
  155. {" ", " ", nil},
  156. {" ", " ", nil},
  157. {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil},
  158. {"Foo", "Foo", nil},
  159. {"foo", "foo", nil},
  160. {"Foo Bar", "Foo Bar", nil},
  161. {"foo bar", "foo bar", nil},
  162. {"\u03C3", "\u03C3", nil},
  163. {"Richard \u2163", "Richard \u2163", nil},
  164. {"\u212B", "Å", nil},
  165. {"Jack of \u2666s", "Jack of \u2666s", nil},
  166. {"my cat is a \u0009by", "", errDisallowedRune},
  167. {"שa", "שa", nil}, // no bidi rule
  168. }},
  169. {"UsernameCaseMapped", UsernameCaseMapped, []testCase{
  170. // TODO: Should this work?
  171. // {UsernameCaseMapped, "", "", errDisallowedRune},
  172. {"juliet@example.com", "juliet@example.com", nil},
  173. {"fussball", "fussball", nil},
  174. {"fu\u00DFball", "fu\u00DFball", nil},
  175. {"\u03C0", "\u03C0", nil},
  176. {"\u03A3", "\u03C3", nil},
  177. {"\u03C3", "\u03C3", nil},
  178. // Greek final sigma is left as is (do not fold!)
  179. {"\u03C2", "\u03C2", nil},
  180. {"\u0049", "\u0069", nil},
  181. {"\u0049", "\u0069", nil},
  182. {"\u03D2", "", errDisallowedRune},
  183. {"\u03B0", "\u03B0", nil},
  184. {"foo bar", "", errDisallowedRune},
  185. {"♚", "", bidirule.ErrInvalid},
  186. {"\u007E", "~", nil},
  187. {"a", "a", nil},
  188. {"!", "!", nil},
  189. {"²", "", bidirule.ErrInvalid},
  190. {"\t", "", errDisallowedRune},
  191. {"\n", "", errDisallowedRune},
  192. {"\u26D6", "", bidirule.ErrInvalid},
  193. {"\u26FF", "", bidirule.ErrInvalid},
  194. {"\uFB00", "", errDisallowedRune},
  195. {"\u1680", "", bidirule.ErrInvalid},
  196. {" ", "", errDisallowedRune},
  197. {" ", "", errDisallowedRune},
  198. {"\u01C5", "", errDisallowedRune},
  199. {"\u16EE", "", errDisallowedRune}, // Nl RUNIC ARLAUG SYMBOL
  200. {"\u0488", "", bidirule.ErrInvalid}, // Me COMBINING CYRILLIC HUNDRED THOUSANDS SIGN
  201. {"\u212B", "\u00e5", nil}, // Angstrom sign, NFC -> U+00E5
  202. {"A\u030A", "å", nil}, // A + ring
  203. {"\u00C5", "å", nil}, // A with ring
  204. {"\u00E7", "ç", nil}, // c cedille
  205. {"\u0063\u0327", "ç", nil}, // c + cedille
  206. {"\u0158", "ř", nil},
  207. {"\u0052\u030C", "ř", nil},
  208. {"\u1E61", "\u1E61", nil}, // LATIN SMALL LETTER S WITH DOT ABOVE
  209. // Confusable characters ARE allowed and should NOT be mapped.
  210. {"\u0410", "\u0430", nil}, // CYRILLIC CAPITAL LETTER A
  211. // Full width should be mapped to the canonical decomposition.
  212. {"AB", "ab", nil},
  213. {"שc", "", bidirule.ErrInvalid}, // bidi rule
  214. }},
  215. {"UsernameCasePreserved", UsernameCasePreserved, []testCase{
  216. {"ABC", "ABC", nil},
  217. {"AB", "AB", nil},
  218. {"שc", "", bidirule.ErrInvalid}, // bidi rule
  219. {"\uFB00", "", errDisallowedRune},
  220. {"\u212B", "\u00c5", nil}, // Angstrom sign, NFC -> U+00E5
  221. {"ẛ", "", errDisallowedRune}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE
  222. }},
  223. {"UsernameCaseMappedRestricted", NewRestrictedProfile(UsernameCaseMapped, runes.Predicate(func(r rune) bool {
  224. return strings.ContainsRune(`@`, r)
  225. })), []testCase{
  226. {"juliet@example.com", "", errDisallowedRune},
  227. {"\u0049", "\u0069", nil},
  228. }},
  229. }