You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

127 lines
3.6 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package runes
  5. import (
  6. "unicode/utf8"
  7. "golang.org/x/text/transform"
  8. )
  9. // Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
  10. // This is done for various reasons:
  11. // - To retain the semantics of the Nop transformer: if input is passed to a Nop
  12. // one would expect it to be unchanged.
  13. // - It would be very expensive to pass a converted RuneError to a transformer:
  14. // a transformer might need more source bytes after RuneError, meaning that
  15. // the only way to pass it safely is to create a new buffer and manage the
  16. // intermingling of RuneErrors and normal input.
  17. // - Many transformers leave ill-formed UTF-8 as is, so this is not
  18. // inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
  19. // logical consequence of the operation (as for Map) or if it otherwise would
  20. // pose security concerns (as for Remove).
  21. // - An alternative would be to return an error on ill-formed UTF-8, but this
  22. // would be inconsistent with other operations.
  23. // If returns a transformer that applies tIn to consecutive runes for which
  24. // s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
  25. // is called on tIn and tNotIn at the start of each run. A Nop transformer will
  26. // substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
  27. // to RuneError to determine which transformer to apply, but is passed as is to
  28. // the respective transformer.
  29. func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
  30. if tIn == nil && tNotIn == nil {
  31. return Transformer{transform.Nop}
  32. }
  33. if tIn == nil {
  34. tIn = transform.Nop
  35. }
  36. if tNotIn == nil {
  37. tNotIn = transform.Nop
  38. }
  39. a := &cond{
  40. tIn: tIn,
  41. tNotIn: tNotIn,
  42. f: s.Contains,
  43. }
  44. a.Reset()
  45. return Transformer{a}
  46. }
  47. type cond struct {
  48. tIn, tNotIn transform.Transformer
  49. f func(rune) bool
  50. check func(rune) bool // current check to perform
  51. t transform.Transformer // current transformer to use
  52. }
  53. // Reset implements transform.Transformer.
  54. func (t *cond) Reset() {
  55. t.check = t.is
  56. t.t = t.tIn
  57. t.t.Reset() // notIn will be reset on first usage.
  58. }
  59. func (t *cond) is(r rune) bool {
  60. if t.f(r) {
  61. return true
  62. }
  63. t.check = t.isNot
  64. t.t = t.tNotIn
  65. t.tNotIn.Reset()
  66. return false
  67. }
  68. func (t *cond) isNot(r rune) bool {
  69. if !t.f(r) {
  70. return true
  71. }
  72. t.check = t.is
  73. t.t = t.tIn
  74. t.tIn.Reset()
  75. return false
  76. }
  77. func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  78. p := 0
  79. for nSrc < len(src) && err == nil {
  80. // Don't process too much at a time, as the work might be wasted if the
  81. // destination buffer isn't large enough to hold the result or a
  82. // transform returns an error early.
  83. const maxChunk = 4096
  84. max := len(src)
  85. if n := nSrc + maxChunk; n < len(src) {
  86. max = n
  87. }
  88. atEnd := false
  89. size := 0
  90. current := t.t
  91. for ; p < max; p += size {
  92. var r rune
  93. r, size = utf8.DecodeRune(src[p:])
  94. if r == utf8.RuneError && size == 1 {
  95. if !atEOF && !utf8.FullRune(src[p:]) {
  96. err = transform.ErrShortSrc
  97. break
  98. }
  99. }
  100. if !t.check(r) {
  101. // The next rune will be the start of a new run.
  102. atEnd = true
  103. break
  104. }
  105. }
  106. nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
  107. nDst += nDst2
  108. nSrc += nSrc2
  109. if err2 != nil {
  110. return nDst, nSrc, err2
  111. }
  112. // At this point either err != nil or t.check will pass for the rune at p.
  113. p = nSrc + size
  114. }
  115. return nDst, nSrc, err
  116. }