You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

876 regels
22 KiB

  1. // This file is generated with "go test -tags generate". DO NOT EDIT!
  2. // +build !generate
  3. package triegen_test
  4. // lookup returns the trie value for the first UTF-8 encoding in s and
  5. // the width in bytes of this encoding. The size will be 0 if s does not
  6. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  7. func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
  8. c0 := s[0]
  9. switch {
  10. case c0 < 0x80: // is ASCII
  11. return randValues[c0], 1
  12. case c0 < 0xC2:
  13. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  14. case c0 < 0xE0: // 2-byte UTF-8
  15. if len(s) < 2 {
  16. return 0, 0
  17. }
  18. i := randIndex[c0]
  19. c1 := s[1]
  20. if c1 < 0x80 || 0xC0 <= c1 {
  21. return 0, 1 // Illegal UTF-8: not a continuation byte.
  22. }
  23. return t.lookupValue(uint32(i), c1), 2
  24. case c0 < 0xF0: // 3-byte UTF-8
  25. if len(s) < 3 {
  26. return 0, 0
  27. }
  28. i := randIndex[c0]
  29. c1 := s[1]
  30. if c1 < 0x80 || 0xC0 <= c1 {
  31. return 0, 1 // Illegal UTF-8: not a continuation byte.
  32. }
  33. o := uint32(i)<<6 + uint32(c1)
  34. i = randIndex[o]
  35. c2 := s[2]
  36. if c2 < 0x80 || 0xC0 <= c2 {
  37. return 0, 2 // Illegal UTF-8: not a continuation byte.
  38. }
  39. return t.lookupValue(uint32(i), c2), 3
  40. case c0 < 0xF8: // 4-byte UTF-8
  41. if len(s) < 4 {
  42. return 0, 0
  43. }
  44. i := randIndex[c0]
  45. c1 := s[1]
  46. if c1 < 0x80 || 0xC0 <= c1 {
  47. return 0, 1 // Illegal UTF-8: not a continuation byte.
  48. }
  49. o := uint32(i)<<6 + uint32(c1)
  50. i = randIndex[o]
  51. c2 := s[2]
  52. if c2 < 0x80 || 0xC0 <= c2 {
  53. return 0, 2 // Illegal UTF-8: not a continuation byte.
  54. }
  55. o = uint32(i)<<6 + uint32(c2)
  56. i = randIndex[o]
  57. c3 := s[3]
  58. if c3 < 0x80 || 0xC0 <= c3 {
  59. return 0, 3 // Illegal UTF-8: not a continuation byte.
  60. }
  61. return t.lookupValue(uint32(i), c3), 4
  62. }
  63. // Illegal rune
  64. return 0, 1
  65. }
  66. // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
  67. // s must start with a full and valid UTF-8 encoded rune.
  68. func (t *randTrie) lookupUnsafe(s []byte) uint8 {
  69. c0 := s[0]
  70. if c0 < 0x80 { // is ASCII
  71. return randValues[c0]
  72. }
  73. i := randIndex[c0]
  74. if c0 < 0xE0 { // 2-byte UTF-8
  75. return t.lookupValue(uint32(i), s[1])
  76. }
  77. i = randIndex[uint32(i)<<6+uint32(s[1])]
  78. if c0 < 0xF0 { // 3-byte UTF-8
  79. return t.lookupValue(uint32(i), s[2])
  80. }
  81. i = randIndex[uint32(i)<<6+uint32(s[2])]
  82. if c0 < 0xF8 { // 4-byte UTF-8
  83. return t.lookupValue(uint32(i), s[3])
  84. }
  85. return 0
  86. }
  87. // lookupString returns the trie value for the first UTF-8 encoding in s and
  88. // the width in bytes of this encoding. The size will be 0 if s does not
  89. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  90. func (t *randTrie) lookupString(s string) (v uint8, sz int) {
  91. c0 := s[0]
  92. switch {
  93. case c0 < 0x80: // is ASCII
  94. return randValues[c0], 1
  95. case c0 < 0xC2:
  96. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  97. case c0 < 0xE0: // 2-byte UTF-8
  98. if len(s) < 2 {
  99. return 0, 0
  100. }
  101. i := randIndex[c0]
  102. c1 := s[1]
  103. if c1 < 0x80 || 0xC0 <= c1 {
  104. return 0, 1 // Illegal UTF-8: not a continuation byte.
  105. }
  106. return t.lookupValue(uint32(i), c1), 2
  107. case c0 < 0xF0: // 3-byte UTF-8
  108. if len(s) < 3 {
  109. return 0, 0
  110. }
  111. i := randIndex[c0]
  112. c1 := s[1]
  113. if c1 < 0x80 || 0xC0 <= c1 {
  114. return 0, 1 // Illegal UTF-8: not a continuation byte.
  115. }
  116. o := uint32(i)<<6 + uint32(c1)
  117. i = randIndex[o]
  118. c2 := s[2]
  119. if c2 < 0x80 || 0xC0 <= c2 {
  120. return 0, 2 // Illegal UTF-8: not a continuation byte.
  121. }
  122. return t.lookupValue(uint32(i), c2), 3
  123. case c0 < 0xF8: // 4-byte UTF-8
  124. if len(s) < 4 {
  125. return 0, 0
  126. }
  127. i := randIndex[c0]
  128. c1 := s[1]
  129. if c1 < 0x80 || 0xC0 <= c1 {
  130. return 0, 1 // Illegal UTF-8: not a continuation byte.
  131. }
  132. o := uint32(i)<<6 + uint32(c1)
  133. i = randIndex[o]
  134. c2 := s[2]
  135. if c2 < 0x80 || 0xC0 <= c2 {
  136. return 0, 2 // Illegal UTF-8: not a continuation byte.
  137. }
  138. o = uint32(i)<<6 + uint32(c2)
  139. i = randIndex[o]
  140. c3 := s[3]
  141. if c3 < 0x80 || 0xC0 <= c3 {
  142. return 0, 3 // Illegal UTF-8: not a continuation byte.
  143. }
  144. return t.lookupValue(uint32(i), c3), 4
  145. }
  146. // Illegal rune
  147. return 0, 1
  148. }
  149. // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
  150. // s must start with a full and valid UTF-8 encoded rune.
  151. func (t *randTrie) lookupStringUnsafe(s string) uint8 {
  152. c0 := s[0]
  153. if c0 < 0x80 { // is ASCII
  154. return randValues[c0]
  155. }
  156. i := randIndex[c0]
  157. if c0 < 0xE0 { // 2-byte UTF-8
  158. return t.lookupValue(uint32(i), s[1])
  159. }
  160. i = randIndex[uint32(i)<<6+uint32(s[1])]
  161. if c0 < 0xF0 { // 3-byte UTF-8
  162. return t.lookupValue(uint32(i), s[2])
  163. }
  164. i = randIndex[uint32(i)<<6+uint32(s[2])]
  165. if c0 < 0xF8 { // 4-byte UTF-8
  166. return t.lookupValue(uint32(i), s[3])
  167. }
  168. return 0
  169. }
  170. // randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
  171. type randTrie struct{}
  172. func newRandTrie(i int) *randTrie {
  173. return &randTrie{}
  174. }
  175. // lookupValue determines the type of block n and looks up the value for b.
  176. func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
  177. switch {
  178. default:
  179. return uint8(randValues[n<<6+uint32(b)])
  180. }
  181. }
  182. // randValues: 56 blocks, 3584 entries, 3584 bytes
  183. // The third block is the zero block.
  184. var randValues = [3584]uint8{
  185. // Block 0x0, offset 0x0
  186. // Block 0x1, offset 0x40
  187. // Block 0x2, offset 0x80
  188. // Block 0x3, offset 0xc0
  189. 0xc9: 0x0001,
  190. // Block 0x4, offset 0x100
  191. 0x100: 0x0001,
  192. // Block 0x5, offset 0x140
  193. 0x155: 0x0001,
  194. // Block 0x6, offset 0x180
  195. 0x196: 0x0001,
  196. // Block 0x7, offset 0x1c0
  197. 0x1ef: 0x0001,
  198. // Block 0x8, offset 0x200
  199. 0x206: 0x0001,
  200. // Block 0x9, offset 0x240
  201. 0x258: 0x0001,
  202. // Block 0xa, offset 0x280
  203. 0x288: 0x0001,
  204. // Block 0xb, offset 0x2c0
  205. 0x2f2: 0x0001,
  206. // Block 0xc, offset 0x300
  207. 0x304: 0x0001,
  208. // Block 0xd, offset 0x340
  209. 0x34b: 0x0001,
  210. // Block 0xe, offset 0x380
  211. 0x3ba: 0x0001,
  212. // Block 0xf, offset 0x3c0
  213. 0x3f5: 0x0001,
  214. // Block 0x10, offset 0x400
  215. 0x41d: 0x0001,
  216. // Block 0x11, offset 0x440
  217. 0x442: 0x0001,
  218. // Block 0x12, offset 0x480
  219. 0x4bb: 0x0001,
  220. // Block 0x13, offset 0x4c0
  221. 0x4e9: 0x0001,
  222. // Block 0x14, offset 0x500
  223. 0x53e: 0x0001,
  224. // Block 0x15, offset 0x540
  225. 0x55f: 0x0001,
  226. // Block 0x16, offset 0x580
  227. 0x5b7: 0x0001,
  228. // Block 0x17, offset 0x5c0
  229. 0x5d9: 0x0001,
  230. // Block 0x18, offset 0x600
  231. 0x60e: 0x0001,
  232. // Block 0x19, offset 0x640
  233. 0x652: 0x0001,
  234. // Block 0x1a, offset 0x680
  235. 0x68f: 0x0001,
  236. // Block 0x1b, offset 0x6c0
  237. 0x6dc: 0x0001,
  238. // Block 0x1c, offset 0x700
  239. 0x703: 0x0001,
  240. // Block 0x1d, offset 0x740
  241. 0x741: 0x0001,
  242. // Block 0x1e, offset 0x780
  243. 0x79b: 0x0001,
  244. // Block 0x1f, offset 0x7c0
  245. 0x7f1: 0x0001,
  246. // Block 0x20, offset 0x800
  247. 0x833: 0x0001,
  248. // Block 0x21, offset 0x840
  249. 0x853: 0x0001,
  250. // Block 0x22, offset 0x880
  251. 0x8a2: 0x0001,
  252. // Block 0x23, offset 0x8c0
  253. 0x8f8: 0x0001,
  254. // Block 0x24, offset 0x900
  255. 0x917: 0x0001,
  256. // Block 0x25, offset 0x940
  257. 0x945: 0x0001,
  258. // Block 0x26, offset 0x980
  259. 0x99e: 0x0001,
  260. // Block 0x27, offset 0x9c0
  261. 0x9fd: 0x0001,
  262. // Block 0x28, offset 0xa00
  263. 0xa0d: 0x0001,
  264. // Block 0x29, offset 0xa40
  265. 0xa66: 0x0001,
  266. // Block 0x2a, offset 0xa80
  267. 0xaab: 0x0001,
  268. // Block 0x2b, offset 0xac0
  269. 0xaea: 0x0001,
  270. // Block 0x2c, offset 0xb00
  271. 0xb2d: 0x0001,
  272. // Block 0x2d, offset 0xb40
  273. 0xb54: 0x0001,
  274. // Block 0x2e, offset 0xb80
  275. 0xb90: 0x0001,
  276. // Block 0x2f, offset 0xbc0
  277. 0xbe5: 0x0001,
  278. // Block 0x30, offset 0xc00
  279. 0xc28: 0x0001,
  280. // Block 0x31, offset 0xc40
  281. 0xc7c: 0x0001,
  282. // Block 0x32, offset 0xc80
  283. 0xcbf: 0x0001,
  284. // Block 0x33, offset 0xcc0
  285. 0xcc7: 0x0001,
  286. // Block 0x34, offset 0xd00
  287. 0xd34: 0x0001,
  288. // Block 0x35, offset 0xd40
  289. 0xd61: 0x0001,
  290. // Block 0x36, offset 0xd80
  291. 0xdb9: 0x0001,
  292. // Block 0x37, offset 0xdc0
  293. 0xdda: 0x0001,
  294. }
  295. // randIndex: 89 blocks, 5696 entries, 5696 bytes
  296. // Block 0 is the zero block.
  297. var randIndex = [5696]uint8{
  298. // Block 0x0, offset 0x0
  299. // Block 0x1, offset 0x40
  300. // Block 0x2, offset 0x80
  301. // Block 0x3, offset 0xc0
  302. 0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
  303. 0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
  304. 0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
  305. // Block 0x4, offset 0x100
  306. 0x107: 0x01,
  307. // Block 0x5, offset 0x140
  308. 0x16c: 0x02,
  309. // Block 0x6, offset 0x180
  310. 0x19c: 0x03,
  311. 0x1ae: 0x04,
  312. // Block 0x7, offset 0x1c0
  313. 0x1d8: 0x05,
  314. 0x1f7: 0x06,
  315. // Block 0x8, offset 0x200
  316. 0x20c: 0x07,
  317. // Block 0x9, offset 0x240
  318. 0x24a: 0x08,
  319. // Block 0xa, offset 0x280
  320. 0x2b6: 0x09,
  321. // Block 0xb, offset 0x2c0
  322. 0x2d5: 0x0a,
  323. // Block 0xc, offset 0x300
  324. 0x31a: 0x0b,
  325. // Block 0xd, offset 0x340
  326. 0x373: 0x0c,
  327. // Block 0xe, offset 0x380
  328. 0x38b: 0x0d,
  329. // Block 0xf, offset 0x3c0
  330. 0x3f0: 0x0e,
  331. // Block 0x10, offset 0x400
  332. 0x433: 0x0f,
  333. // Block 0x11, offset 0x440
  334. 0x45d: 0x10,
  335. // Block 0x12, offset 0x480
  336. 0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
  337. 0x49b: 0x0b, 0x49c: 0x0c,
  338. 0x4a1: 0x0d,
  339. 0x4ad: 0x0e,
  340. 0x4ba: 0x0f,
  341. // Block 0x13, offset 0x4c0
  342. 0x4c1: 0x11,
  343. // Block 0x14, offset 0x500
  344. 0x531: 0x12,
  345. // Block 0x15, offset 0x540
  346. 0x546: 0x13,
  347. // Block 0x16, offset 0x580
  348. 0x5ab: 0x14,
  349. // Block 0x17, offset 0x5c0
  350. 0x5d4: 0x11,
  351. 0x5fe: 0x11,
  352. // Block 0x18, offset 0x600
  353. 0x618: 0x0a,
  354. // Block 0x19, offset 0x640
  355. 0x65b: 0x15,
  356. // Block 0x1a, offset 0x680
  357. 0x6a0: 0x16,
  358. // Block 0x1b, offset 0x6c0
  359. 0x6d2: 0x17,
  360. 0x6f6: 0x18,
  361. // Block 0x1c, offset 0x700
  362. 0x711: 0x19,
  363. // Block 0x1d, offset 0x740
  364. 0x768: 0x1a,
  365. // Block 0x1e, offset 0x780
  366. 0x783: 0x1b,
  367. // Block 0x1f, offset 0x7c0
  368. 0x7f9: 0x1c,
  369. // Block 0x20, offset 0x800
  370. 0x831: 0x1d,
  371. // Block 0x21, offset 0x840
  372. 0x85e: 0x1e,
  373. // Block 0x22, offset 0x880
  374. 0x898: 0x1f,
  375. // Block 0x23, offset 0x8c0
  376. 0x8c7: 0x18,
  377. 0x8d5: 0x14,
  378. 0x8f7: 0x20,
  379. 0x8fe: 0x1f,
  380. // Block 0x24, offset 0x900
  381. 0x905: 0x21,
  382. // Block 0x25, offset 0x940
  383. 0x966: 0x03,
  384. // Block 0x26, offset 0x980
  385. 0x981: 0x07, 0x983: 0x11,
  386. 0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
  387. 0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
  388. 0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
  389. 0x9a3: 0x1d,
  390. 0x9ad: 0x1e, 0x9af: 0x1f,
  391. 0x9b0: 0x20, 0x9b1: 0x21,
  392. 0x9b8: 0x22, 0x9bd: 0x23,
  393. // Block 0x27, offset 0x9c0
  394. 0x9cd: 0x22,
  395. // Block 0x28, offset 0xa00
  396. 0xa0c: 0x08,
  397. // Block 0x29, offset 0xa40
  398. 0xa6f: 0x1c,
  399. // Block 0x2a, offset 0xa80
  400. 0xa90: 0x1a,
  401. 0xaaf: 0x23,
  402. // Block 0x2b, offset 0xac0
  403. 0xae3: 0x19,
  404. 0xae8: 0x24,
  405. 0xafc: 0x25,
  406. // Block 0x2c, offset 0xb00
  407. 0xb13: 0x26,
  408. // Block 0x2d, offset 0xb40
  409. 0xb67: 0x1c,
  410. // Block 0x2e, offset 0xb80
  411. 0xb8f: 0x0b,
  412. // Block 0x2f, offset 0xbc0
  413. 0xbcb: 0x27,
  414. 0xbe7: 0x26,
  415. // Block 0x30, offset 0xc00
  416. 0xc34: 0x16,
  417. // Block 0x31, offset 0xc40
  418. 0xc62: 0x03,
  419. // Block 0x32, offset 0xc80
  420. 0xcbb: 0x12,
  421. // Block 0x33, offset 0xcc0
  422. 0xcdf: 0x09,
  423. // Block 0x34, offset 0xd00
  424. 0xd34: 0x0a,
  425. // Block 0x35, offset 0xd40
  426. 0xd41: 0x1e,
  427. // Block 0x36, offset 0xd80
  428. 0xd83: 0x28,
  429. // Block 0x37, offset 0xdc0
  430. 0xdc0: 0x15,
  431. // Block 0x38, offset 0xe00
  432. 0xe1a: 0x15,
  433. // Block 0x39, offset 0xe40
  434. 0xe65: 0x29,
  435. // Block 0x3a, offset 0xe80
  436. 0xe86: 0x1f,
  437. // Block 0x3b, offset 0xec0
  438. 0xeec: 0x18,
  439. // Block 0x3c, offset 0xf00
  440. 0xf28: 0x2a,
  441. // Block 0x3d, offset 0xf40
  442. 0xf53: 0x08,
  443. // Block 0x3e, offset 0xf80
  444. 0xfa2: 0x2b,
  445. 0xfaa: 0x17,
  446. // Block 0x3f, offset 0xfc0
  447. 0xfc0: 0x25, 0xfc2: 0x26,
  448. 0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
  449. 0xfd5: 0x2a,
  450. 0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
  451. 0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
  452. 0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
  453. 0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
  454. 0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
  455. // Block 0x40, offset 0x1000
  456. 0x102c: 0x2c,
  457. // Block 0x41, offset 0x1040
  458. 0x1074: 0x2c,
  459. // Block 0x42, offset 0x1080
  460. 0x108c: 0x08,
  461. 0x10a0: 0x2d,
  462. // Block 0x43, offset 0x10c0
  463. 0x10e8: 0x10,
  464. // Block 0x44, offset 0x1100
  465. 0x110f: 0x13,
  466. // Block 0x45, offset 0x1140
  467. 0x114b: 0x2e,
  468. // Block 0x46, offset 0x1180
  469. 0x118b: 0x23,
  470. 0x119d: 0x0c,
  471. // Block 0x47, offset 0x11c0
  472. 0x11c3: 0x12,
  473. 0x11f9: 0x0f,
  474. // Block 0x48, offset 0x1200
  475. 0x121e: 0x1b,
  476. // Block 0x49, offset 0x1240
  477. 0x1270: 0x2f,
  478. // Block 0x4a, offset 0x1280
  479. 0x128a: 0x1b,
  480. 0x12a7: 0x02,
  481. // Block 0x4b, offset 0x12c0
  482. 0x12fb: 0x14,
  483. // Block 0x4c, offset 0x1300
  484. 0x1333: 0x30,
  485. // Block 0x4d, offset 0x1340
  486. 0x134d: 0x31,
  487. // Block 0x4e, offset 0x1380
  488. 0x138e: 0x15,
  489. // Block 0x4f, offset 0x13c0
  490. 0x13f4: 0x32,
  491. // Block 0x50, offset 0x1400
  492. 0x141b: 0x33,
  493. // Block 0x51, offset 0x1440
  494. 0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
  495. 0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
  496. 0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
  497. 0x1472: 0x4b, 0x1473: 0x4c,
  498. 0x1479: 0x4d, 0x147b: 0x4e,
  499. // Block 0x52, offset 0x1480
  500. 0x1480: 0x34,
  501. 0x1499: 0x11,
  502. 0x14b6: 0x2c,
  503. // Block 0x53, offset 0x14c0
  504. 0x14e4: 0x0d,
  505. // Block 0x54, offset 0x1500
  506. 0x1527: 0x08,
  507. // Block 0x55, offset 0x1540
  508. 0x1555: 0x2b,
  509. // Block 0x56, offset 0x1580
  510. 0x15b2: 0x35,
  511. // Block 0x57, offset 0x15c0
  512. 0x15f2: 0x1c, 0x15f4: 0x29,
  513. // Block 0x58, offset 0x1600
  514. 0x1600: 0x50, 0x1603: 0x51,
  515. 0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
  516. }
  517. // lookup returns the trie value for the first UTF-8 encoding in s and
  518. // the width in bytes of this encoding. The size will be 0 if s does not
  519. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  520. func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
  521. c0 := s[0]
  522. switch {
  523. case c0 < 0x80: // is ASCII
  524. return t.ascii[c0], 1
  525. case c0 < 0xC2:
  526. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  527. case c0 < 0xE0: // 2-byte UTF-8
  528. if len(s) < 2 {
  529. return 0, 0
  530. }
  531. i := t.utf8Start[c0]
  532. c1 := s[1]
  533. if c1 < 0x80 || 0xC0 <= c1 {
  534. return 0, 1 // Illegal UTF-8: not a continuation byte.
  535. }
  536. return t.lookupValue(uint32(i), c1), 2
  537. case c0 < 0xF0: // 3-byte UTF-8
  538. if len(s) < 3 {
  539. return 0, 0
  540. }
  541. i := t.utf8Start[c0]
  542. c1 := s[1]
  543. if c1 < 0x80 || 0xC0 <= c1 {
  544. return 0, 1 // Illegal UTF-8: not a continuation byte.
  545. }
  546. o := uint32(i)<<6 + uint32(c1)
  547. i = multiIndex[o]
  548. c2 := s[2]
  549. if c2 < 0x80 || 0xC0 <= c2 {
  550. return 0, 2 // Illegal UTF-8: not a continuation byte.
  551. }
  552. return t.lookupValue(uint32(i), c2), 3
  553. case c0 < 0xF8: // 4-byte UTF-8
  554. if len(s) < 4 {
  555. return 0, 0
  556. }
  557. i := t.utf8Start[c0]
  558. c1 := s[1]
  559. if c1 < 0x80 || 0xC0 <= c1 {
  560. return 0, 1 // Illegal UTF-8: not a continuation byte.
  561. }
  562. o := uint32(i)<<6 + uint32(c1)
  563. i = multiIndex[o]
  564. c2 := s[2]
  565. if c2 < 0x80 || 0xC0 <= c2 {
  566. return 0, 2 // Illegal UTF-8: not a continuation byte.
  567. }
  568. o = uint32(i)<<6 + uint32(c2)
  569. i = multiIndex[o]
  570. c3 := s[3]
  571. if c3 < 0x80 || 0xC0 <= c3 {
  572. return 0, 3 // Illegal UTF-8: not a continuation byte.
  573. }
  574. return t.lookupValue(uint32(i), c3), 4
  575. }
  576. // Illegal rune
  577. return 0, 1
  578. }
  579. // lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
  580. // s must start with a full and valid UTF-8 encoded rune.
  581. func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
  582. c0 := s[0]
  583. if c0 < 0x80 { // is ASCII
  584. return t.ascii[c0]
  585. }
  586. i := t.utf8Start[c0]
  587. if c0 < 0xE0 { // 2-byte UTF-8
  588. return t.lookupValue(uint32(i), s[1])
  589. }
  590. i = multiIndex[uint32(i)<<6+uint32(s[1])]
  591. if c0 < 0xF0 { // 3-byte UTF-8
  592. return t.lookupValue(uint32(i), s[2])
  593. }
  594. i = multiIndex[uint32(i)<<6+uint32(s[2])]
  595. if c0 < 0xF8 { // 4-byte UTF-8
  596. return t.lookupValue(uint32(i), s[3])
  597. }
  598. return 0
  599. }
  600. // lookupString returns the trie value for the first UTF-8 encoding in s and
  601. // the width in bytes of this encoding. The size will be 0 if s does not
  602. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  603. func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
  604. c0 := s[0]
  605. switch {
  606. case c0 < 0x80: // is ASCII
  607. return t.ascii[c0], 1
  608. case c0 < 0xC2:
  609. return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
  610. case c0 < 0xE0: // 2-byte UTF-8
  611. if len(s) < 2 {
  612. return 0, 0
  613. }
  614. i := t.utf8Start[c0]
  615. c1 := s[1]
  616. if c1 < 0x80 || 0xC0 <= c1 {
  617. return 0, 1 // Illegal UTF-8: not a continuation byte.
  618. }
  619. return t.lookupValue(uint32(i), c1), 2
  620. case c0 < 0xF0: // 3-byte UTF-8
  621. if len(s) < 3 {
  622. return 0, 0
  623. }
  624. i := t.utf8Start[c0]
  625. c1 := s[1]
  626. if c1 < 0x80 || 0xC0 <= c1 {
  627. return 0, 1 // Illegal UTF-8: not a continuation byte.
  628. }
  629. o := uint32(i)<<6 + uint32(c1)
  630. i = multiIndex[o]
  631. c2 := s[2]
  632. if c2 < 0x80 || 0xC0 <= c2 {
  633. return 0, 2 // Illegal UTF-8: not a continuation byte.
  634. }
  635. return t.lookupValue(uint32(i), c2), 3
  636. case c0 < 0xF8: // 4-byte UTF-8
  637. if len(s) < 4 {
  638. return 0, 0
  639. }
  640. i := t.utf8Start[c0]
  641. c1 := s[1]
  642. if c1 < 0x80 || 0xC0 <= c1 {
  643. return 0, 1 // Illegal UTF-8: not a continuation byte.
  644. }
  645. o := uint32(i)<<6 + uint32(c1)
  646. i = multiIndex[o]
  647. c2 := s[2]
  648. if c2 < 0x80 || 0xC0 <= c2 {
  649. return 0, 2 // Illegal UTF-8: not a continuation byte.
  650. }
  651. o = uint32(i)<<6 + uint32(c2)
  652. i = multiIndex[o]
  653. c3 := s[3]
  654. if c3 < 0x80 || 0xC0 <= c3 {
  655. return 0, 3 // Illegal UTF-8: not a continuation byte.
  656. }
  657. return t.lookupValue(uint32(i), c3), 4
  658. }
  659. // Illegal rune
  660. return 0, 1
  661. }
  662. // lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
  663. // s must start with a full and valid UTF-8 encoded rune.
  664. func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
  665. c0 := s[0]
  666. if c0 < 0x80 { // is ASCII
  667. return t.ascii[c0]
  668. }
  669. i := t.utf8Start[c0]
  670. if c0 < 0xE0 { // 2-byte UTF-8
  671. return t.lookupValue(uint32(i), s[1])
  672. }
  673. i = multiIndex[uint32(i)<<6+uint32(s[1])]
  674. if c0 < 0xF0 { // 3-byte UTF-8
  675. return t.lookupValue(uint32(i), s[2])
  676. }
  677. i = multiIndex[uint32(i)<<6+uint32(s[2])]
  678. if c0 < 0xF8 { // 4-byte UTF-8
  679. return t.lookupValue(uint32(i), s[3])
  680. }
  681. return 0
  682. }
  683. // multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
  684. type multiTrie struct {
  685. ascii []uint64 // index for ASCII bytes
  686. utf8Start []uint8 // index for UTF-8 bytes >= 0xC0
  687. }
  688. func newMultiTrie(i int) *multiTrie {
  689. h := multiTrieHandles[i]
  690. return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
  691. }
  692. type multiTrieHandle struct {
  693. ascii, multi uint8
  694. }
  695. // multiTrieHandles: 5 handles, 10 bytes
  696. var multiTrieHandles = [5]multiTrieHandle{
  697. {0, 0}, // 8c1e77823143d35c: all
  698. {0, 23}, // 8fb58ff8243b45b0: ASCII only
  699. {0, 23}, // 8fb58ff8243b45b0: ASCII only 2
  700. {0, 24}, // 2ccc43994f11046f: BMP only
  701. {30, 25}, // ce448591bdcb4733: No BMP
  702. }
  703. // lookupValue determines the type of block n and looks up the value for b.
  704. func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
  705. switch {
  706. default:
  707. return uint64(multiValues[n<<6+uint32(b)])
  708. }
  709. }
  710. // multiValues: 32 blocks, 2048 entries, 16384 bytes
  711. // The third block is the zero block.
  712. var multiValues = [2048]uint64{
  713. // Block 0x0, offset 0x0
  714. 0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
  715. 0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
  716. 0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
  717. 0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
  718. 0x3f: 0x4fd3bcfa72bce8b0,
  719. // Block 0x1, offset 0x40
  720. 0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
  721. 0x7f: 0x782caa2d25a418a9,
  722. // Block 0x2, offset 0x80
  723. // Block 0x3, offset 0xc0
  724. 0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
  725. // Block 0x4, offset 0x100
  726. 0x13f: 0x56f8c4c82f5962dc,
  727. // Block 0x5, offset 0x140
  728. 0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
  729. // Block 0x6, offset 0x180
  730. 0x1bf: 0x7bf4d0ebf302a088,
  731. // Block 0x7, offset 0x1c0
  732. 0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
  733. // Block 0x8, offset 0x200
  734. 0x23f: 0x5de81c1dff6bf29d,
  735. // Block 0x9, offset 0x240
  736. 0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
  737. // Block 0xa, offset 0x280
  738. 0x2bf: 0x6a28f01979cbf059,
  739. // Block 0xb, offset 0x2c0
  740. 0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
  741. // Block 0xc, offset 0x300
  742. 0x33f: 0x5a10ffa9e29184fb,
  743. // Block 0xd, offset 0x340
  744. 0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
  745. // Block 0xe, offset 0x380
  746. 0x3bf: 0x74071288fff39c76,
  747. // Block 0xf, offset 0x3c0
  748. 0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
  749. // Block 0x10, offset 0x400
  750. 0x43f: 0x5676a62fd49c6bec,
  751. // Block 0x11, offset 0x440
  752. 0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
  753. // Block 0x12, offset 0x480
  754. 0x4bf: 0x69d6f0fe711fafc9,
  755. // Block 0x13, offset 0x4c0
  756. 0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
  757. // Block 0x14, offset 0x500
  758. 0x53f: 0xe03b31814c95f8b,
  759. // Block 0x15, offset 0x540
  760. 0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
  761. // Block 0x16, offset 0x580
  762. 0x5bf: 0x3c02ea92fb168559,
  763. // Block 0x17, offset 0x5c0
  764. 0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
  765. // Block 0x18, offset 0x600
  766. 0x63f: 0x3bb2ed2a72748f4b,
  767. // Block 0x19, offset 0x640
  768. 0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
  769. // Block 0x1a, offset 0x680
  770. 0x6bf: 0x352711cfb7236418,
  771. // Block 0x1b, offset 0x6c0
  772. 0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
  773. // Block 0x1c, offset 0x700
  774. 0x73f: 0x7191a77b28d23110,
  775. // Block 0x1d, offset 0x740
  776. 0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
  777. // Block 0x1e, offset 0x780
  778. // Block 0x1f, offset 0x7c0
  779. }
  780. // multiIndex: 29 blocks, 1856 entries, 1856 bytes
  781. // Block 0 is the zero block.
  782. var multiIndex = [1856]uint8{
  783. // Block 0x0, offset 0x0
  784. // Block 0x1, offset 0x40
  785. // Block 0x2, offset 0x80
  786. // Block 0x3, offset 0xc0
  787. 0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
  788. 0xc8: 0x05, 0xcf: 0x06,
  789. 0xd0: 0x07,
  790. 0xdf: 0x08,
  791. 0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
  792. 0xe8: 0x08, 0xef: 0x09,
  793. 0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
  794. // Block 0x4, offset 0x100
  795. 0x120: 0x09,
  796. 0x13f: 0x0a,
  797. // Block 0x5, offset 0x140
  798. 0x140: 0x0b,
  799. 0x17f: 0x0c,
  800. // Block 0x6, offset 0x180
  801. 0x180: 0x0d,
  802. // Block 0x7, offset 0x1c0
  803. 0x1ff: 0x0e,
  804. // Block 0x8, offset 0x200
  805. 0x200: 0x0f,
  806. // Block 0x9, offset 0x240
  807. 0x27f: 0x10,
  808. // Block 0xa, offset 0x280
  809. 0x280: 0x11,
  810. // Block 0xb, offset 0x2c0
  811. 0x2ff: 0x12,
  812. // Block 0xc, offset 0x300
  813. 0x300: 0x13,
  814. // Block 0xd, offset 0x340
  815. 0x37f: 0x14,
  816. // Block 0xe, offset 0x380
  817. 0x380: 0x15,
  818. // Block 0xf, offset 0x3c0
  819. 0x3ff: 0x16,
  820. // Block 0x10, offset 0x400
  821. 0x410: 0x0a,
  822. 0x41f: 0x0b,
  823. 0x420: 0x0c,
  824. 0x43f: 0x0d,
  825. // Block 0x11, offset 0x440
  826. 0x440: 0x17,
  827. // Block 0x12, offset 0x480
  828. 0x4bf: 0x18,
  829. // Block 0x13, offset 0x4c0
  830. 0x4c0: 0x0f,
  831. 0x4ff: 0x10,
  832. // Block 0x14, offset 0x500
  833. 0x500: 0x19,
  834. // Block 0x15, offset 0x540
  835. 0x540: 0x12,
  836. // Block 0x16, offset 0x580
  837. 0x5bf: 0x1a,
  838. // Block 0x17, offset 0x5c0
  839. 0x5ff: 0x14,
  840. // Block 0x18, offset 0x600
  841. 0x600: 0x1b,
  842. // Block 0x19, offset 0x640
  843. 0x640: 0x16,
  844. // Block 0x1a, offset 0x680
  845. // Block 0x1b, offset 0x6c0
  846. 0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
  847. 0x6c8: 0x05, 0x6cf: 0x06,
  848. 0x6d0: 0x07,
  849. 0x6df: 0x08,
  850. 0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
  851. 0x6e8: 0x08, 0x6ef: 0x09,
  852. // Block 0x1c, offset 0x700
  853. 0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
  854. }