You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

282 lines
7.2 KiB

  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cases
  5. import (
  6. "golang.org/x/text/transform"
  7. )
  8. // A context is used for iterating over source bytes, fetching case info and
  9. // writing to a destination buffer.
  10. //
  11. // Casing operations may need more than one rune of context to decide how a rune
  12. // should be cased. Casing implementations should call checkpoint on context
  13. // whenever it is known to be safe to return the runes processed so far.
  14. //
  15. // It is recommended for implementations to not allow for more than 30 case
  16. // ignorables as lookahead (analogous to the limit in norm) and to use state if
  17. // unbounded lookahead is needed for cased runes.
  18. type context struct {
  19. dst, src []byte
  20. atEOF bool
  21. pDst int // pDst points past the last written rune in dst.
  22. pSrc int // pSrc points to the start of the currently scanned rune.
  23. // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
  24. nDst, nSrc int
  25. err error
  26. sz int // size of current rune
  27. info info // case information of currently scanned rune
  28. // State preserved across calls to Transform.
  29. isMidWord bool // false if next cased letter needs to be title-cased.
  30. }
  31. func (c *context) Reset() {
  32. c.isMidWord = false
  33. }
  34. // ret returns the return values for the Transform method. It checks whether
  35. // there were insufficient bytes in src to complete and introduces an error
  36. // accordingly, if necessary.
  37. func (c *context) ret() (nDst, nSrc int, err error) {
  38. if c.err != nil || c.nSrc == len(c.src) {
  39. return c.nDst, c.nSrc, c.err
  40. }
  41. // This point is only reached by mappers if there was no short destination
  42. // buffer. This means that the source buffer was exhausted and that c.sz was
  43. // set to 0 by next.
  44. if c.atEOF && c.pSrc == len(c.src) {
  45. return c.pDst, c.pSrc, nil
  46. }
  47. return c.nDst, c.nSrc, transform.ErrShortSrc
  48. }
  49. // checkpoint sets the return value buffer points for Transform to the current
  50. // positions.
  51. func (c *context) checkpoint() {
  52. if c.err == nil {
  53. c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
  54. }
  55. }
  56. // unreadRune causes the last rune read by next to be reread on the next
  57. // invocation of next. Only one unreadRune may be called after a call to next.
  58. func (c *context) unreadRune() {
  59. c.sz = 0
  60. }
  61. func (c *context) next() bool {
  62. c.pSrc += c.sz
  63. if c.pSrc == len(c.src) || c.err != nil {
  64. c.info, c.sz = 0, 0
  65. return false
  66. }
  67. v, sz := trie.lookup(c.src[c.pSrc:])
  68. c.info, c.sz = info(v), sz
  69. if c.sz == 0 {
  70. if c.atEOF {
  71. // A zero size means we have an incomplete rune. If we are atEOF,
  72. // this means it is an illegal rune, which we will consume one
  73. // byte at a time.
  74. c.sz = 1
  75. } else {
  76. c.err = transform.ErrShortSrc
  77. return false
  78. }
  79. }
  80. return true
  81. }
  82. // writeBytes adds bytes to dst.
  83. func (c *context) writeBytes(b []byte) bool {
  84. if len(c.dst)-c.pDst < len(b) {
  85. c.err = transform.ErrShortDst
  86. return false
  87. }
  88. // This loop is faster than using copy.
  89. for _, ch := range b {
  90. c.dst[c.pDst] = ch
  91. c.pDst++
  92. }
  93. return true
  94. }
  95. // writeString writes the given string to dst.
  96. func (c *context) writeString(s string) bool {
  97. if len(c.dst)-c.pDst < len(s) {
  98. c.err = transform.ErrShortDst
  99. return false
  100. }
  101. // This loop is faster than using copy.
  102. for i := 0; i < len(s); i++ {
  103. c.dst[c.pDst] = s[i]
  104. c.pDst++
  105. }
  106. return true
  107. }
  108. // copy writes the current rune to dst.
  109. func (c *context) copy() bool {
  110. return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
  111. }
  112. // copyXOR copies the current rune to dst and modifies it by applying the XOR
  113. // pattern of the case info. It is the responsibility of the caller to ensure
  114. // that this is a rune with a XOR pattern defined.
  115. func (c *context) copyXOR() bool {
  116. if !c.copy() {
  117. return false
  118. }
  119. if c.info&xorIndexBit == 0 {
  120. // Fast path for 6-bit XOR pattern, which covers most cases.
  121. c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
  122. } else {
  123. // Interpret XOR bits as an index.
  124. // TODO: test performance for unrolling this loop. Verify that we have
  125. // at least two bytes and at most three.
  126. idx := c.info >> xorShift
  127. for p := c.pDst - 1; ; p-- {
  128. c.dst[p] ^= xorData[idx]
  129. idx--
  130. if xorData[idx] == 0 {
  131. break
  132. }
  133. }
  134. }
  135. return true
  136. }
  137. // hasPrefix returns true if src[pSrc:] starts with the given string.
  138. func (c *context) hasPrefix(s string) bool {
  139. b := c.src[c.pSrc:]
  140. if len(b) < len(s) {
  141. return false
  142. }
  143. for i, c := range b[:len(s)] {
  144. if c != s[i] {
  145. return false
  146. }
  147. }
  148. return true
  149. }
  150. // caseType returns an info with only the case bits, normalized to either
  151. // cLower, cUpper, cTitle or cUncased.
  152. func (c *context) caseType() info {
  153. cm := c.info & 0x7
  154. if cm < 4 {
  155. return cm
  156. }
  157. if cm >= cXORCase {
  158. // xor the last bit of the rune with the case type bits.
  159. b := c.src[c.pSrc+c.sz-1]
  160. return info(b&1) ^ cm&0x3
  161. }
  162. if cm == cIgnorableCased {
  163. return cLower
  164. }
  165. return cUncased
  166. }
  167. // lower writes the lowercase version of the current rune to dst.
  168. func lower(c *context) bool {
  169. ct := c.caseType()
  170. if c.info&hasMappingMask == 0 || ct == cLower {
  171. return c.copy()
  172. }
  173. if c.info&exceptionBit == 0 {
  174. return c.copyXOR()
  175. }
  176. e := exceptions[c.info>>exceptionShift:]
  177. offset := 2 + e[0]&lengthMask // size of header + fold string
  178. if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
  179. return c.writeString(e[offset : offset+nLower])
  180. }
  181. return c.copy()
  182. }
  183. // upper writes the uppercase version of the current rune to dst.
  184. func upper(c *context) bool {
  185. ct := c.caseType()
  186. if c.info&hasMappingMask == 0 || ct == cUpper {
  187. return c.copy()
  188. }
  189. if c.info&exceptionBit == 0 {
  190. return c.copyXOR()
  191. }
  192. e := exceptions[c.info>>exceptionShift:]
  193. offset := 2 + e[0]&lengthMask // size of header + fold string
  194. // Get length of first special case mapping.
  195. n := (e[1] >> lengthBits) & lengthMask
  196. if ct == cTitle {
  197. // The first special case mapping is for lower. Set n to the second.
  198. if n == noChange {
  199. n = 0
  200. }
  201. n, e = e[1]&lengthMask, e[n:]
  202. }
  203. if n != noChange {
  204. return c.writeString(e[offset : offset+n])
  205. }
  206. return c.copy()
  207. }
  208. // title writes the title case version of the current rune to dst.
  209. func title(c *context) bool {
  210. ct := c.caseType()
  211. if c.info&hasMappingMask == 0 || ct == cTitle {
  212. return c.copy()
  213. }
  214. if c.info&exceptionBit == 0 {
  215. if ct == cLower {
  216. return c.copyXOR()
  217. }
  218. return c.copy()
  219. }
  220. // Get the exception data.
  221. e := exceptions[c.info>>exceptionShift:]
  222. offset := 2 + e[0]&lengthMask // size of header + fold string
  223. nFirst := (e[1] >> lengthBits) & lengthMask
  224. if nTitle := e[1] & lengthMask; nTitle != noChange {
  225. if nFirst != noChange {
  226. e = e[nFirst:]
  227. }
  228. return c.writeString(e[offset : offset+nTitle])
  229. }
  230. if ct == cLower && nFirst != noChange {
  231. // Use the uppercase version instead.
  232. return c.writeString(e[offset : offset+nFirst])
  233. }
  234. // Already in correct case.
  235. return c.copy()
  236. }
  237. // foldFull writes the foldFull version of the current rune to dst.
  238. func foldFull(c *context) bool {
  239. if c.info&hasMappingMask == 0 {
  240. return c.copy()
  241. }
  242. ct := c.caseType()
  243. if c.info&exceptionBit == 0 {
  244. if ct != cLower || c.info&inverseFoldBit != 0 {
  245. return c.copyXOR()
  246. }
  247. return c.copy()
  248. }
  249. e := exceptions[c.info>>exceptionShift:]
  250. n := e[0] & lengthMask
  251. if n == 0 {
  252. if ct == cLower {
  253. return c.copy()
  254. }
  255. n = (e[1] >> lengthBits) & lengthMask
  256. }
  257. return c.writeString(e[2 : 2+n])
  258. }