You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

160 lines
3.2 KiB

  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // The trie in this file is used to associate the first full character in an
  5. // UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
  6. // sequence are used to lookup offsets in the index table to be used for the
  7. // next byte. The last byte is used to index into a table of collation elements.
  8. // For a full description, see go.text/collate/build/trie.go.
  9. package colltab
  10. const blockSize = 64
  11. type Trie struct {
  12. Index0 []uint16 // index for first byte (0xC0-0xFF)
  13. Values0 []uint32 // index for first byte (0x00-0x7F)
  14. Index []uint16
  15. Values []uint32
  16. }
  17. const (
  18. t1 = 0x00 // 0000 0000
  19. tx = 0x80 // 1000 0000
  20. t2 = 0xC0 // 1100 0000
  21. t3 = 0xE0 // 1110 0000
  22. t4 = 0xF0 // 1111 0000
  23. t5 = 0xF8 // 1111 1000
  24. t6 = 0xFC // 1111 1100
  25. te = 0xFE // 1111 1110
  26. )
  27. func (t *Trie) lookupValue(n uint16, b byte) Elem {
  28. return Elem(t.Values[int(n)<<6+int(b)])
  29. }
  30. // lookup returns the trie value for the first UTF-8 encoding in s and
  31. // the width in bytes of this encoding. The size will be 0 if s does not
  32. // hold enough bytes to complete the encoding. len(s) must be greater than 0.
  33. func (t *Trie) lookup(s []byte) (v Elem, sz int) {
  34. c0 := s[0]
  35. switch {
  36. case c0 < tx:
  37. return Elem(t.Values0[c0]), 1
  38. case c0 < t2:
  39. return 0, 1
  40. case c0 < t3:
  41. if len(s) < 2 {
  42. return 0, 0
  43. }
  44. i := t.Index0[c0]
  45. c1 := s[1]
  46. if c1 < tx || t2 <= c1 {
  47. return 0, 1
  48. }
  49. return t.lookupValue(i, c1), 2
  50. case c0 < t4:
  51. if len(s) < 3 {
  52. return 0, 0
  53. }
  54. i := t.Index0[c0]
  55. c1 := s[1]
  56. if c1 < tx || t2 <= c1 {
  57. return 0, 1
  58. }
  59. o := int(i)<<6 + int(c1)
  60. i = t.Index[o]
  61. c2 := s[2]
  62. if c2 < tx || t2 <= c2 {
  63. return 0, 2
  64. }
  65. return t.lookupValue(i, c2), 3
  66. case c0 < t5:
  67. if len(s) < 4 {
  68. return 0, 0
  69. }
  70. i := t.Index0[c0]
  71. c1 := s[1]
  72. if c1 < tx || t2 <= c1 {
  73. return 0, 1
  74. }
  75. o := int(i)<<6 + int(c1)
  76. i = t.Index[o]
  77. c2 := s[2]
  78. if c2 < tx || t2 <= c2 {
  79. return 0, 2
  80. }
  81. o = int(i)<<6 + int(c2)
  82. i = t.Index[o]
  83. c3 := s[3]
  84. if c3 < tx || t2 <= c3 {
  85. return 0, 3
  86. }
  87. return t.lookupValue(i, c3), 4
  88. }
  89. // Illegal rune
  90. return 0, 1
  91. }
  92. // The body of lookupString is a verbatim copy of that of lookup.
  93. func (t *Trie) lookupString(s string) (v Elem, sz int) {
  94. c0 := s[0]
  95. switch {
  96. case c0 < tx:
  97. return Elem(t.Values0[c0]), 1
  98. case c0 < t2:
  99. return 0, 1
  100. case c0 < t3:
  101. if len(s) < 2 {
  102. return 0, 0
  103. }
  104. i := t.Index0[c0]
  105. c1 := s[1]
  106. if c1 < tx || t2 <= c1 {
  107. return 0, 1
  108. }
  109. return t.lookupValue(i, c1), 2
  110. case c0 < t4:
  111. if len(s) < 3 {
  112. return 0, 0
  113. }
  114. i := t.Index0[c0]
  115. c1 := s[1]
  116. if c1 < tx || t2 <= c1 {
  117. return 0, 1
  118. }
  119. o := int(i)<<6 + int(c1)
  120. i = t.Index[o]
  121. c2 := s[2]
  122. if c2 < tx || t2 <= c2 {
  123. return 0, 2
  124. }
  125. return t.lookupValue(i, c2), 3
  126. case c0 < t5:
  127. if len(s) < 4 {
  128. return 0, 0
  129. }
  130. i := t.Index0[c0]
  131. c1 := s[1]
  132. if c1 < tx || t2 <= c1 {
  133. return 0, 1
  134. }
  135. o := int(i)<<6 + int(c1)
  136. i = t.Index[o]
  137. c2 := s[2]
  138. if c2 < tx || t2 <= c2 {
  139. return 0, 2
  140. }
  141. o = int(i)<<6 + int(c2)
  142. i = t.Index[o]
  143. c3 := s[3]
  144. if c3 < tx || t2 <= c3 {
  145. return 0, 3
  146. }
  147. return t.lookupValue(i, c3), 4
  148. }
  149. // Illegal rune
  150. return 0, 1
  151. }