You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

417 lines
9.6 KiB

  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package publicsuffix
  5. import (
  6. "sort"
  7. "strings"
  8. "testing"
  9. )
  10. func TestNodeLabel(t *testing.T) {
  11. for i, want := range nodeLabels {
  12. got := nodeLabel(uint32(i))
  13. if got != want {
  14. t.Errorf("%d: got %q, want %q", i, got, want)
  15. }
  16. }
  17. }
  18. func TestFind(t *testing.T) {
  19. testCases := []string{
  20. "",
  21. "a",
  22. "a0",
  23. "aaaa",
  24. "ao",
  25. "ap",
  26. "ar",
  27. "aro",
  28. "arp",
  29. "arpa",
  30. "arpaa",
  31. "arpb",
  32. "az",
  33. "b",
  34. "b0",
  35. "ba",
  36. "z",
  37. "zu",
  38. "zv",
  39. "zw",
  40. "zx",
  41. "zy",
  42. "zz",
  43. "zzzz",
  44. }
  45. for _, tc := range testCases {
  46. got := find(tc, 0, numTLD)
  47. want := notFound
  48. for i := uint32(0); i < numTLD; i++ {
  49. if tc == nodeLabel(i) {
  50. want = i
  51. break
  52. }
  53. }
  54. if got != want {
  55. t.Errorf("%q: got %d, want %d", tc, got, want)
  56. }
  57. }
  58. }
  59. func TestICANN(t *testing.T) {
  60. testCases := map[string]bool{
  61. "foo.org": true,
  62. "foo.co.uk": true,
  63. "foo.dyndns.org": false,
  64. "foo.go.dyndns.org": false,
  65. "foo.blogspot.co.uk": false,
  66. "foo.intranet": false,
  67. }
  68. for domain, want := range testCases {
  69. _, got := PublicSuffix(domain)
  70. if got != want {
  71. t.Errorf("%q: got %v, want %v", domain, got, want)
  72. }
  73. }
  74. }
  75. var publicSuffixTestCases = []struct {
  76. domain, want string
  77. }{
  78. // Empty string.
  79. {"", ""},
  80. // The .ao rules are:
  81. // ao
  82. // ed.ao
  83. // gv.ao
  84. // og.ao
  85. // co.ao
  86. // pb.ao
  87. // it.ao
  88. {"ao", "ao"},
  89. {"www.ao", "ao"},
  90. {"pb.ao", "pb.ao"},
  91. {"www.pb.ao", "pb.ao"},
  92. {"www.xxx.yyy.zzz.pb.ao", "pb.ao"},
  93. // The .ar rules are:
  94. // ar
  95. // com.ar
  96. // edu.ar
  97. // gob.ar
  98. // gov.ar
  99. // int.ar
  100. // mil.ar
  101. // net.ar
  102. // org.ar
  103. // tur.ar
  104. // blogspot.com.ar
  105. {"ar", "ar"},
  106. {"www.ar", "ar"},
  107. {"nic.ar", "ar"},
  108. {"www.nic.ar", "ar"},
  109. {"com.ar", "com.ar"},
  110. {"www.com.ar", "com.ar"},
  111. {"blogspot.com.ar", "blogspot.com.ar"},
  112. {"www.blogspot.com.ar", "blogspot.com.ar"},
  113. {"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar"},
  114. {"logspot.com.ar", "com.ar"},
  115. {"zlogspot.com.ar", "com.ar"},
  116. {"zblogspot.com.ar", "com.ar"},
  117. // The .arpa rules are:
  118. // arpa
  119. // e164.arpa
  120. // in-addr.arpa
  121. // ip6.arpa
  122. // iris.arpa
  123. // uri.arpa
  124. // urn.arpa
  125. {"arpa", "arpa"},
  126. {"www.arpa", "arpa"},
  127. {"urn.arpa", "urn.arpa"},
  128. {"www.urn.arpa", "urn.arpa"},
  129. {"www.xxx.yyy.zzz.urn.arpa", "urn.arpa"},
  130. // The relevant {kobe,kyoto}.jp rules are:
  131. // jp
  132. // *.kobe.jp
  133. // !city.kobe.jp
  134. // kyoto.jp
  135. // ide.kyoto.jp
  136. {"jp", "jp"},
  137. {"kobe.jp", "jp"},
  138. {"c.kobe.jp", "c.kobe.jp"},
  139. {"b.c.kobe.jp", "c.kobe.jp"},
  140. {"a.b.c.kobe.jp", "c.kobe.jp"},
  141. {"city.kobe.jp", "kobe.jp"},
  142. {"www.city.kobe.jp", "kobe.jp"},
  143. {"kyoto.jp", "kyoto.jp"},
  144. {"test.kyoto.jp", "kyoto.jp"},
  145. {"ide.kyoto.jp", "ide.kyoto.jp"},
  146. {"b.ide.kyoto.jp", "ide.kyoto.jp"},
  147. {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
  148. // The .tw rules are:
  149. // tw
  150. // edu.tw
  151. // gov.tw
  152. // mil.tw
  153. // com.tw
  154. // net.tw
  155. // org.tw
  156. // idv.tw
  157. // game.tw
  158. // ebiz.tw
  159. // club.tw
  160. // 網路.tw (xn--zf0ao64a.tw)
  161. // 組織.tw (xn--uc0atv.tw)
  162. // 商業.tw (xn--czrw28b.tw)
  163. // blogspot.tw
  164. {"tw", "tw"},
  165. {"aaa.tw", "tw"},
  166. {"www.aaa.tw", "tw"},
  167. {"xn--czrw28b.aaa.tw", "tw"},
  168. {"edu.tw", "edu.tw"},
  169. {"www.edu.tw", "edu.tw"},
  170. {"xn--czrw28b.edu.tw", "edu.tw"},
  171. {"xn--czrw28b.tw", "xn--czrw28b.tw"},
  172. {"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
  173. {"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
  174. {"xn--kpry57d.tw", "tw"},
  175. // The .uk rules are:
  176. // uk
  177. // ac.uk
  178. // co.uk
  179. // gov.uk
  180. // ltd.uk
  181. // me.uk
  182. // net.uk
  183. // nhs.uk
  184. // org.uk
  185. // plc.uk
  186. // police.uk
  187. // *.sch.uk
  188. // blogspot.co.uk
  189. {"uk", "uk"},
  190. {"aaa.uk", "uk"},
  191. {"www.aaa.uk", "uk"},
  192. {"mod.uk", "uk"},
  193. {"www.mod.uk", "uk"},
  194. {"sch.uk", "uk"},
  195. {"mod.sch.uk", "mod.sch.uk"},
  196. {"www.sch.uk", "www.sch.uk"},
  197. {"blogspot.co.uk", "blogspot.co.uk"},
  198. {"blogspot.nic.uk", "uk"},
  199. {"blogspot.sch.uk", "blogspot.sch.uk"},
  200. // The .рф rules are
  201. // рф (xn--p1ai)
  202. {"xn--p1ai", "xn--p1ai"},
  203. {"aaa.xn--p1ai", "xn--p1ai"},
  204. {"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
  205. // The .zw rules are:
  206. // *.zw
  207. {"zw", "zw"},
  208. {"www.zw", "www.zw"},
  209. {"zzz.zw", "zzz.zw"},
  210. {"www.zzz.zw", "zzz.zw"},
  211. {"www.xxx.yyy.zzz.zw", "zzz.zw"},
  212. // There are no .nosuchtld rules.
  213. {"nosuchtld", "nosuchtld"},
  214. {"foo.nosuchtld", "nosuchtld"},
  215. {"bar.foo.nosuchtld", "nosuchtld"},
  216. }
  217. func BenchmarkPublicSuffix(b *testing.B) {
  218. for i := 0; i < b.N; i++ {
  219. for _, tc := range publicSuffixTestCases {
  220. List.PublicSuffix(tc.domain)
  221. }
  222. }
  223. }
  224. func TestPublicSuffix(t *testing.T) {
  225. for _, tc := range publicSuffixTestCases {
  226. got := List.PublicSuffix(tc.domain)
  227. if got != tc.want {
  228. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  229. }
  230. }
  231. }
  232. func TestSlowPublicSuffix(t *testing.T) {
  233. for _, tc := range publicSuffixTestCases {
  234. got := slowPublicSuffix(tc.domain)
  235. if got != tc.want {
  236. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  237. }
  238. }
  239. }
  240. // slowPublicSuffix implements the canonical (but O(number of rules)) public
  241. // suffix algorithm described at http://publicsuffix.org/list/.
  242. //
  243. // 1. Match domain against all rules and take note of the matching ones.
  244. // 2. If no rules match, the prevailing rule is "*".
  245. // 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
  246. // 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
  247. // 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
  248. // 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
  249. // 7. The registered or registrable domain is the public suffix plus one additional label.
  250. //
  251. // This function returns the public suffix, not the registrable domain, and so
  252. // it stops after step 6.
  253. func slowPublicSuffix(domain string) string {
  254. match := func(rulePart, domainPart string) bool {
  255. switch rulePart[0] {
  256. case '*':
  257. return true
  258. case '!':
  259. return rulePart[1:] == domainPart
  260. }
  261. return rulePart == domainPart
  262. }
  263. domainParts := strings.Split(domain, ".")
  264. var matchingRules [][]string
  265. loop:
  266. for _, rule := range rules {
  267. ruleParts := strings.Split(rule, ".")
  268. if len(domainParts) < len(ruleParts) {
  269. continue
  270. }
  271. for i := range ruleParts {
  272. rulePart := ruleParts[len(ruleParts)-1-i]
  273. domainPart := domainParts[len(domainParts)-1-i]
  274. if !match(rulePart, domainPart) {
  275. continue loop
  276. }
  277. }
  278. matchingRules = append(matchingRules, ruleParts)
  279. }
  280. if len(matchingRules) == 0 {
  281. matchingRules = append(matchingRules, []string{"*"})
  282. } else {
  283. sort.Sort(byPriority(matchingRules))
  284. }
  285. prevailing := matchingRules[0]
  286. if prevailing[0][0] == '!' {
  287. prevailing = prevailing[1:]
  288. }
  289. if prevailing[0][0] == '*' {
  290. replaced := domainParts[len(domainParts)-len(prevailing)]
  291. prevailing = append([]string{replaced}, prevailing[1:]...)
  292. }
  293. return strings.Join(prevailing, ".")
  294. }
  295. type byPriority [][]string
  296. func (b byPriority) Len() int { return len(b) }
  297. func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  298. func (b byPriority) Less(i, j int) bool {
  299. if b[i][0][0] == '!' {
  300. return true
  301. }
  302. if b[j][0][0] == '!' {
  303. return false
  304. }
  305. return len(b[i]) > len(b[j])
  306. }
  307. // eTLDPlusOneTestCases come from
  308. // https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
  309. var eTLDPlusOneTestCases = []struct {
  310. domain, want string
  311. }{
  312. // Empty input.
  313. {"", ""},
  314. // Unlisted TLD.
  315. {"example", ""},
  316. {"example.example", "example.example"},
  317. {"b.example.example", "example.example"},
  318. {"a.b.example.example", "example.example"},
  319. // TLD with only 1 rule.
  320. {"biz", ""},
  321. {"domain.biz", "domain.biz"},
  322. {"b.domain.biz", "domain.biz"},
  323. {"a.b.domain.biz", "domain.biz"},
  324. // TLD with some 2-level rules.
  325. {"com", ""},
  326. {"example.com", "example.com"},
  327. {"b.example.com", "example.com"},
  328. {"a.b.example.com", "example.com"},
  329. {"uk.com", ""},
  330. {"example.uk.com", "example.uk.com"},
  331. {"b.example.uk.com", "example.uk.com"},
  332. {"a.b.example.uk.com", "example.uk.com"},
  333. {"test.ac", "test.ac"},
  334. // TLD with only 1 (wildcard) rule.
  335. {"mm", ""},
  336. {"c.mm", ""},
  337. {"b.c.mm", "b.c.mm"},
  338. {"a.b.c.mm", "b.c.mm"},
  339. // More complex TLD.
  340. {"jp", ""},
  341. {"test.jp", "test.jp"},
  342. {"www.test.jp", "test.jp"},
  343. {"ac.jp", ""},
  344. {"test.ac.jp", "test.ac.jp"},
  345. {"www.test.ac.jp", "test.ac.jp"},
  346. {"kyoto.jp", ""},
  347. {"test.kyoto.jp", "test.kyoto.jp"},
  348. {"ide.kyoto.jp", ""},
  349. {"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  350. {"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
  351. {"c.kobe.jp", ""},
  352. {"b.c.kobe.jp", "b.c.kobe.jp"},
  353. {"a.b.c.kobe.jp", "b.c.kobe.jp"},
  354. {"city.kobe.jp", "city.kobe.jp"},
  355. {"www.city.kobe.jp", "city.kobe.jp"},
  356. // TLD with a wildcard rule and exceptions.
  357. {"ck", ""},
  358. {"test.ck", ""},
  359. {"b.test.ck", "b.test.ck"},
  360. {"a.b.test.ck", "b.test.ck"},
  361. {"www.ck", "www.ck"},
  362. {"www.www.ck", "www.ck"},
  363. // US K12.
  364. {"us", ""},
  365. {"test.us", "test.us"},
  366. {"www.test.us", "test.us"},
  367. {"ak.us", ""},
  368. {"test.ak.us", "test.ak.us"},
  369. {"www.test.ak.us", "test.ak.us"},
  370. {"k12.ak.us", ""},
  371. {"test.k12.ak.us", "test.k12.ak.us"},
  372. {"www.test.k12.ak.us", "test.k12.ak.us"},
  373. // Punycoded IDN labels
  374. {"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
  375. {"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
  376. {"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
  377. {"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
  378. {"xn--55qx5d.cn", ""},
  379. {"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
  380. {"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
  381. {"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
  382. {"xn--fiqs8s", ""},
  383. }
  384. func TestEffectiveTLDPlusOne(t *testing.T) {
  385. for _, tc := range eTLDPlusOneTestCases {
  386. got, _ := EffectiveTLDPlusOne(tc.domain)
  387. if got != tc.want {
  388. t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
  389. }
  390. }
  391. }