You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1318 rivejä
30 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package transform
  5. import (
  6. "bytes"
  7. "errors"
  8. "fmt"
  9. "io/ioutil"
  10. "strconv"
  11. "strings"
  12. "testing"
  13. "time"
  14. "unicode/utf8"
  15. "golang.org/x/text/internal/testtext"
  16. )
  17. type lowerCaseASCII struct{ NopResetter }
  18. func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  19. n := len(src)
  20. if n > len(dst) {
  21. n, err = len(dst), ErrShortDst
  22. }
  23. for i, c := range src[:n] {
  24. if 'A' <= c && c <= 'Z' {
  25. c += 'a' - 'A'
  26. }
  27. dst[i] = c
  28. }
  29. return n, n, err
  30. }
  31. // lowerCaseASCIILookahead lowercases the string and reports ErrShortSrc as long
  32. // as the input is not atEOF.
  33. type lowerCaseASCIILookahead struct{ NopResetter }
  34. func (lowerCaseASCIILookahead) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  35. n := len(src)
  36. if n > len(dst) {
  37. n, err = len(dst), ErrShortDst
  38. }
  39. for i, c := range src[:n] {
  40. if 'A' <= c && c <= 'Z' {
  41. c += 'a' - 'A'
  42. }
  43. dst[i] = c
  44. }
  45. if !atEOF {
  46. err = ErrShortSrc
  47. }
  48. return n, n, err
  49. }
  50. var errYouMentionedX = errors.New("you mentioned X")
  51. type dontMentionX struct{ NopResetter }
  52. func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  53. n := len(src)
  54. if n > len(dst) {
  55. n, err = len(dst), ErrShortDst
  56. }
  57. for i, c := range src[:n] {
  58. if c == 'X' {
  59. return i, i, errYouMentionedX
  60. }
  61. dst[i] = c
  62. }
  63. return n, n, err
  64. }
  65. var errAtEnd = errors.New("error after all text")
  66. type errorAtEnd struct{ NopResetter }
  67. func (errorAtEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  68. n := copy(dst, src)
  69. if n < len(src) {
  70. return n, n, ErrShortDst
  71. }
  72. if atEOF {
  73. return n, n, errAtEnd
  74. }
  75. return n, n, nil
  76. }
  77. type replaceWithConstant struct {
  78. replacement string
  79. written int
  80. }
  81. func (t *replaceWithConstant) Reset() {
  82. t.written = 0
  83. }
  84. func (t *replaceWithConstant) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  85. if atEOF {
  86. nDst = copy(dst, t.replacement[t.written:])
  87. t.written += nDst
  88. if t.written < len(t.replacement) {
  89. err = ErrShortDst
  90. }
  91. }
  92. return nDst, len(src), err
  93. }
  94. type addAnXAtTheEnd struct{ NopResetter }
  95. func (addAnXAtTheEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  96. n := copy(dst, src)
  97. if n < len(src) {
  98. return n, n, ErrShortDst
  99. }
  100. if !atEOF {
  101. return n, n, nil
  102. }
  103. if len(dst) == n {
  104. return n, n, ErrShortDst
  105. }
  106. dst[n] = 'X'
  107. return n + 1, n, nil
  108. }
  109. // doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss",
  110. // but only if atEOF is true.
  111. type doublerAtEOF struct{ NopResetter }
  112. func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  113. if !atEOF {
  114. return 0, 0, ErrShortSrc
  115. }
  116. for i, c := range src {
  117. if 2*i+2 >= len(dst) {
  118. return 2 * i, i, ErrShortDst
  119. }
  120. dst[2*i+0] = c
  121. dst[2*i+1] = c
  122. }
  123. return 2 * len(src), len(src), nil
  124. }
  125. // rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb"
  126. // is encoded as "2a10b". The decoding is assumed to not contain any numbers.
  127. type rleDecode struct{ NopResetter }
  128. func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  129. loop:
  130. for len(src) > 0 {
  131. n := 0
  132. for i, c := range src {
  133. if '0' <= c && c <= '9' {
  134. n = 10*n + int(c-'0')
  135. continue
  136. }
  137. if i == 0 {
  138. return nDst, nSrc, errors.New("rleDecode: bad input")
  139. }
  140. if n > len(dst) {
  141. return nDst, nSrc, ErrShortDst
  142. }
  143. for j := 0; j < n; j++ {
  144. dst[j] = c
  145. }
  146. dst, src = dst[n:], src[i+1:]
  147. nDst, nSrc = nDst+n, nSrc+i+1
  148. continue loop
  149. }
  150. if atEOF {
  151. return nDst, nSrc, errors.New("rleDecode: bad input")
  152. }
  153. return nDst, nSrc, ErrShortSrc
  154. }
  155. return nDst, nSrc, nil
  156. }
  157. type rleEncode struct {
  158. NopResetter
  159. // allowStutter means that "xxxxxxxx" can be encoded as "5x3x"
  160. // instead of always as "8x".
  161. allowStutter bool
  162. }
  163. func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  164. for len(src) > 0 {
  165. n, c0 := len(src), src[0]
  166. for i, c := range src[1:] {
  167. if c != c0 {
  168. n = i + 1
  169. break
  170. }
  171. }
  172. if n == len(src) && !atEOF && !e.allowStutter {
  173. return nDst, nSrc, ErrShortSrc
  174. }
  175. s := strconv.Itoa(n)
  176. if len(s) >= len(dst) {
  177. return nDst, nSrc, ErrShortDst
  178. }
  179. copy(dst, s)
  180. dst[len(s)] = c0
  181. dst, src = dst[len(s)+1:], src[n:]
  182. nDst, nSrc = nDst+len(s)+1, nSrc+n
  183. }
  184. return nDst, nSrc, nil
  185. }
  186. // trickler consumes all input bytes, but writes a single byte at a time to dst.
  187. type trickler []byte
  188. func (t *trickler) Reset() {
  189. *t = nil
  190. }
  191. func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  192. *t = append(*t, src...)
  193. if len(*t) == 0 {
  194. return 0, 0, nil
  195. }
  196. if len(dst) == 0 {
  197. return 0, len(src), ErrShortDst
  198. }
  199. dst[0] = (*t)[0]
  200. *t = (*t)[1:]
  201. if len(*t) > 0 {
  202. err = ErrShortDst
  203. }
  204. return 1, len(src), err
  205. }
  206. // delayedTrickler is like trickler, but delays writing output to dst. This is
  207. // highly unlikely to be relevant in practice, but it seems like a good idea
  208. // to have some tolerance as long as progress can be detected.
  209. type delayedTrickler []byte
  210. func (t *delayedTrickler) Reset() {
  211. *t = nil
  212. }
  213. func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  214. if len(*t) > 0 && len(dst) > 0 {
  215. dst[0] = (*t)[0]
  216. *t = (*t)[1:]
  217. nDst = 1
  218. }
  219. *t = append(*t, src...)
  220. if len(*t) > 0 {
  221. err = ErrShortDst
  222. }
  223. return nDst, len(src), err
  224. }
  225. type testCase struct {
  226. desc string
  227. t Transformer
  228. src string
  229. dstSize int
  230. srcSize int
  231. ioSize int
  232. wantStr string
  233. wantErr error
  234. wantIter int // number of iterations taken; 0 means we don't care.
  235. }
  236. func (t testCase) String() string {
  237. return tstr(t.t) + "; " + t.desc
  238. }
  239. func tstr(t Transformer) string {
  240. if stringer, ok := t.(fmt.Stringer); ok {
  241. return stringer.String()
  242. }
  243. s := fmt.Sprintf("%T", t)
  244. return s[1+strings.Index(s, "."):]
  245. }
  246. func (c chain) String() string {
  247. buf := &bytes.Buffer{}
  248. buf.WriteString("Chain(")
  249. for i, l := range c.link[:len(c.link)-1] {
  250. if i != 0 {
  251. fmt.Fprint(buf, ", ")
  252. }
  253. buf.WriteString(tstr(l.t))
  254. }
  255. buf.WriteString(")")
  256. return buf.String()
  257. }
  258. var testCases = []testCase{
  259. {
  260. desc: "empty",
  261. t: lowerCaseASCII{},
  262. src: "",
  263. dstSize: 100,
  264. srcSize: 100,
  265. wantStr: "",
  266. },
  267. {
  268. desc: "basic",
  269. t: lowerCaseASCII{},
  270. src: "Hello WORLD.",
  271. dstSize: 100,
  272. srcSize: 100,
  273. wantStr: "hello world.",
  274. },
  275. {
  276. desc: "small dst",
  277. t: lowerCaseASCII{},
  278. src: "Hello WORLD.",
  279. dstSize: 3,
  280. srcSize: 100,
  281. wantStr: "hello world.",
  282. },
  283. {
  284. desc: "small src",
  285. t: lowerCaseASCII{},
  286. src: "Hello WORLD.",
  287. dstSize: 100,
  288. srcSize: 4,
  289. wantStr: "hello world.",
  290. },
  291. {
  292. desc: "small buffers",
  293. t: lowerCaseASCII{},
  294. src: "Hello WORLD.",
  295. dstSize: 3,
  296. srcSize: 4,
  297. wantStr: "hello world.",
  298. },
  299. {
  300. desc: "very small buffers",
  301. t: lowerCaseASCII{},
  302. src: "Hello WORLD.",
  303. dstSize: 1,
  304. srcSize: 1,
  305. wantStr: "hello world.",
  306. },
  307. {
  308. desc: "small dst with lookahead",
  309. t: lowerCaseASCIILookahead{},
  310. src: "Hello WORLD.",
  311. dstSize: 3,
  312. srcSize: 100,
  313. wantStr: "hello world.",
  314. },
  315. {
  316. desc: "small src with lookahead",
  317. t: lowerCaseASCIILookahead{},
  318. src: "Hello WORLD.",
  319. dstSize: 100,
  320. srcSize: 4,
  321. wantStr: "hello world.",
  322. },
  323. {
  324. desc: "small buffers with lookahead",
  325. t: lowerCaseASCIILookahead{},
  326. src: "Hello WORLD.",
  327. dstSize: 3,
  328. srcSize: 4,
  329. wantStr: "hello world.",
  330. },
  331. {
  332. desc: "very small buffers with lookahead",
  333. t: lowerCaseASCIILookahead{},
  334. src: "Hello WORLD.",
  335. dstSize: 1,
  336. srcSize: 2,
  337. wantStr: "hello world.",
  338. },
  339. {
  340. desc: "user error",
  341. t: dontMentionX{},
  342. src: "The First Rule of Transform Club: don't mention Mister X, ever.",
  343. dstSize: 100,
  344. srcSize: 100,
  345. wantStr: "The First Rule of Transform Club: don't mention Mister ",
  346. wantErr: errYouMentionedX,
  347. },
  348. {
  349. desc: "user error at end",
  350. t: errorAtEnd{},
  351. src: "All goes well until it doesn't.",
  352. dstSize: 100,
  353. srcSize: 100,
  354. wantStr: "All goes well until it doesn't.",
  355. wantErr: errAtEnd,
  356. },
  357. {
  358. desc: "user error at end, incremental",
  359. t: errorAtEnd{},
  360. src: "All goes well until it doesn't.",
  361. dstSize: 10,
  362. srcSize: 10,
  363. wantStr: "All goes well until it doesn't.",
  364. wantErr: errAtEnd,
  365. },
  366. {
  367. desc: "replace entire non-empty string with one byte",
  368. t: &replaceWithConstant{replacement: "X"},
  369. src: "none of this will be copied",
  370. dstSize: 1,
  371. srcSize: 10,
  372. wantStr: "X",
  373. },
  374. {
  375. desc: "replace entire empty string with one byte",
  376. t: &replaceWithConstant{replacement: "X"},
  377. src: "",
  378. dstSize: 1,
  379. srcSize: 10,
  380. wantStr: "X",
  381. },
  382. {
  383. desc: "replace entire empty string with seven bytes",
  384. t: &replaceWithConstant{replacement: "ABCDEFG"},
  385. src: "",
  386. dstSize: 3,
  387. srcSize: 10,
  388. wantStr: "ABCDEFG",
  389. },
  390. {
  391. desc: "add an X (initialBufSize-1)",
  392. t: addAnXAtTheEnd{},
  393. src: aaa[:initialBufSize-1],
  394. dstSize: 10,
  395. srcSize: 10,
  396. wantStr: aaa[:initialBufSize-1] + "X",
  397. },
  398. {
  399. desc: "add an X (initialBufSize+0)",
  400. t: addAnXAtTheEnd{},
  401. src: aaa[:initialBufSize+0],
  402. dstSize: 10,
  403. srcSize: 10,
  404. wantStr: aaa[:initialBufSize+0] + "X",
  405. },
  406. {
  407. desc: "add an X (initialBufSize+1)",
  408. t: addAnXAtTheEnd{},
  409. src: aaa[:initialBufSize+1],
  410. dstSize: 10,
  411. srcSize: 10,
  412. wantStr: aaa[:initialBufSize+1] + "X",
  413. },
  414. {
  415. desc: "small buffers",
  416. t: dontMentionX{},
  417. src: "The First Rule of Transform Club: don't mention Mister X, ever.",
  418. dstSize: 10,
  419. srcSize: 10,
  420. wantStr: "The First Rule of Transform Club: don't mention Mister ",
  421. wantErr: errYouMentionedX,
  422. },
  423. {
  424. desc: "very small buffers",
  425. t: dontMentionX{},
  426. src: "The First Rule of Transform Club: don't mention Mister X, ever.",
  427. dstSize: 1,
  428. srcSize: 1,
  429. wantStr: "The First Rule of Transform Club: don't mention Mister ",
  430. wantErr: errYouMentionedX,
  431. },
  432. {
  433. desc: "only transform at EOF",
  434. t: doublerAtEOF{},
  435. src: "this",
  436. dstSize: 100,
  437. srcSize: 100,
  438. wantStr: "tthhiiss",
  439. },
  440. {
  441. desc: "basic",
  442. t: rleDecode{},
  443. src: "1a2b3c10d11e0f1g",
  444. dstSize: 100,
  445. srcSize: 100,
  446. wantStr: "abbcccddddddddddeeeeeeeeeeeg",
  447. },
  448. {
  449. desc: "long",
  450. t: rleDecode{},
  451. src: "12a23b34c45d56e99z",
  452. dstSize: 100,
  453. srcSize: 100,
  454. wantStr: strings.Repeat("a", 12) +
  455. strings.Repeat("b", 23) +
  456. strings.Repeat("c", 34) +
  457. strings.Repeat("d", 45) +
  458. strings.Repeat("e", 56) +
  459. strings.Repeat("z", 99),
  460. },
  461. {
  462. desc: "tight buffers",
  463. t: rleDecode{},
  464. src: "1a2b3c10d11e0f1g",
  465. dstSize: 11,
  466. srcSize: 3,
  467. wantStr: "abbcccddddddddddeeeeeeeeeeeg",
  468. },
  469. {
  470. desc: "short dst",
  471. t: rleDecode{},
  472. src: "1a2b3c10d11e0f1g",
  473. dstSize: 10,
  474. srcSize: 3,
  475. wantStr: "abbcccdddddddddd",
  476. wantErr: ErrShortDst,
  477. },
  478. {
  479. desc: "short src",
  480. t: rleDecode{},
  481. src: "1a2b3c10d11e0f1g",
  482. dstSize: 11,
  483. srcSize: 2,
  484. ioSize: 2,
  485. wantStr: "abbccc",
  486. wantErr: ErrShortSrc,
  487. },
  488. {
  489. desc: "basic",
  490. t: rleEncode{},
  491. src: "abbcccddddddddddeeeeeeeeeeeg",
  492. dstSize: 100,
  493. srcSize: 100,
  494. wantStr: "1a2b3c10d11e1g",
  495. },
  496. {
  497. desc: "long",
  498. t: rleEncode{},
  499. src: strings.Repeat("a", 12) +
  500. strings.Repeat("b", 23) +
  501. strings.Repeat("c", 34) +
  502. strings.Repeat("d", 45) +
  503. strings.Repeat("e", 56) +
  504. strings.Repeat("z", 99),
  505. dstSize: 100,
  506. srcSize: 100,
  507. wantStr: "12a23b34c45d56e99z",
  508. },
  509. {
  510. desc: "tight buffers",
  511. t: rleEncode{},
  512. src: "abbcccddddddddddeeeeeeeeeeeg",
  513. dstSize: 3,
  514. srcSize: 12,
  515. wantStr: "1a2b3c10d11e1g",
  516. },
  517. {
  518. desc: "short dst",
  519. t: rleEncode{},
  520. src: "abbcccddddddddddeeeeeeeeeeeg",
  521. dstSize: 2,
  522. srcSize: 12,
  523. wantStr: "1a2b3c",
  524. wantErr: ErrShortDst,
  525. },
  526. {
  527. desc: "short src",
  528. t: rleEncode{},
  529. src: "abbcccddddddddddeeeeeeeeeeeg",
  530. dstSize: 3,
  531. srcSize: 11,
  532. ioSize: 11,
  533. wantStr: "1a2b3c10d",
  534. wantErr: ErrShortSrc,
  535. },
  536. {
  537. desc: "allowStutter = false",
  538. t: rleEncode{allowStutter: false},
  539. src: "aaaabbbbbbbbccccddddd",
  540. dstSize: 10,
  541. srcSize: 10,
  542. wantStr: "4a8b4c5d",
  543. },
  544. {
  545. desc: "allowStutter = true",
  546. t: rleEncode{allowStutter: true},
  547. src: "aaaabbbbbbbbccccddddd",
  548. dstSize: 10,
  549. srcSize: 10,
  550. ioSize: 10,
  551. wantStr: "4a6b2b4c4d1d",
  552. },
  553. {
  554. desc: "trickler",
  555. t: &trickler{},
  556. src: "abcdefghijklm",
  557. dstSize: 3,
  558. srcSize: 15,
  559. wantStr: "abcdefghijklm",
  560. },
  561. {
  562. desc: "delayedTrickler",
  563. t: &delayedTrickler{},
  564. src: "abcdefghijklm",
  565. dstSize: 3,
  566. srcSize: 15,
  567. wantStr: "abcdefghijklm",
  568. },
  569. }
  570. func TestReader(t *testing.T) {
  571. for _, tc := range testCases {
  572. testtext.Run(t, tc.desc, func(t *testing.T) {
  573. r := NewReader(strings.NewReader(tc.src), tc.t)
  574. // Differently sized dst and src buffers are not part of the
  575. // exported API. We override them manually.
  576. r.dst = make([]byte, tc.dstSize)
  577. r.src = make([]byte, tc.srcSize)
  578. got, err := ioutil.ReadAll(r)
  579. str := string(got)
  580. if str != tc.wantStr || err != tc.wantErr {
  581. t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
  582. }
  583. })
  584. }
  585. }
  586. func TestWriter(t *testing.T) {
  587. tests := append(testCases, chainTests()...)
  588. for _, tc := range tests {
  589. sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
  590. if tc.ioSize > 0 {
  591. sizes = []int{tc.ioSize}
  592. }
  593. for _, sz := range sizes {
  594. testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) {
  595. bb := &bytes.Buffer{}
  596. w := NewWriter(bb, tc.t)
  597. // Differently sized dst and src buffers are not part of the
  598. // exported API. We override them manually.
  599. w.dst = make([]byte, tc.dstSize)
  600. w.src = make([]byte, tc.srcSize)
  601. src := make([]byte, sz)
  602. var err error
  603. for b := tc.src; len(b) > 0 && err == nil; {
  604. n := copy(src, b)
  605. b = b[n:]
  606. m := 0
  607. m, err = w.Write(src[:n])
  608. if m != n && err == nil {
  609. t.Errorf("did not consume all bytes %d < %d", m, n)
  610. }
  611. }
  612. if err == nil {
  613. err = w.Close()
  614. }
  615. str := bb.String()
  616. if str != tc.wantStr || err != tc.wantErr {
  617. t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
  618. }
  619. })
  620. }
  621. }
  622. }
  623. func TestNop(t *testing.T) {
  624. testCases := []struct {
  625. str string
  626. dstSize int
  627. err error
  628. }{
  629. {"", 0, nil},
  630. {"", 10, nil},
  631. {"a", 0, ErrShortDst},
  632. {"a", 1, nil},
  633. {"a", 10, nil},
  634. }
  635. for i, tc := range testCases {
  636. dst := make([]byte, tc.dstSize)
  637. nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true)
  638. want := tc.str
  639. if tc.dstSize < len(want) {
  640. want = want[:tc.dstSize]
  641. }
  642. if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst {
  643. t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err)
  644. }
  645. }
  646. }
  647. func TestDiscard(t *testing.T) {
  648. testCases := []struct {
  649. str string
  650. dstSize int
  651. }{
  652. {"", 0},
  653. {"", 10},
  654. {"a", 0},
  655. {"ab", 10},
  656. }
  657. for i, tc := range testCases {
  658. nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true)
  659. if nDst != 0 || nSrc != len(tc.str) || err != nil {
  660. t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str))
  661. }
  662. }
  663. }
  664. // mkChain creates a Chain transformer. x must be alternating between transformer
  665. // and bufSize, like T, (sz, T)*
  666. func mkChain(x ...interface{}) *chain {
  667. t := []Transformer{}
  668. for i := 0; i < len(x); i += 2 {
  669. t = append(t, x[i].(Transformer))
  670. }
  671. c := Chain(t...).(*chain)
  672. for i, j := 1, 1; i < len(x); i, j = i+2, j+1 {
  673. c.link[j].b = make([]byte, x[i].(int))
  674. }
  675. return c
  676. }
  677. func chainTests() []testCase {
  678. return []testCase{
  679. {
  680. desc: "nil error",
  681. t: mkChain(rleEncode{}, 100, lowerCaseASCII{}),
  682. src: "ABB",
  683. dstSize: 100,
  684. srcSize: 100,
  685. wantStr: "1a2b",
  686. wantErr: nil,
  687. wantIter: 1,
  688. },
  689. {
  690. desc: "short dst buffer",
  691. t: mkChain(lowerCaseASCII{}, 3, rleDecode{}),
  692. src: "1a2b3c10d11e0f1g",
  693. dstSize: 10,
  694. srcSize: 3,
  695. wantStr: "abbcccdddddddddd",
  696. wantErr: ErrShortDst,
  697. },
  698. {
  699. desc: "short internal dst buffer",
  700. t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
  701. src: "1a2b3c10d11e0f1g",
  702. dstSize: 100,
  703. srcSize: 3,
  704. wantStr: "abbcccdddddddddd",
  705. wantErr: errShortInternal,
  706. },
  707. {
  708. desc: "short internal dst buffer from input",
  709. t: mkChain(rleDecode{}, 10, Nop),
  710. src: "1a2b3c10d11e0f1g",
  711. dstSize: 100,
  712. srcSize: 3,
  713. wantStr: "abbcccdddddddddd",
  714. wantErr: errShortInternal,
  715. },
  716. {
  717. desc: "empty short internal dst buffer",
  718. t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
  719. src: "4a7b11e0f1g",
  720. dstSize: 100,
  721. srcSize: 3,
  722. wantStr: "aaaabbbbbbb",
  723. wantErr: errShortInternal,
  724. },
  725. {
  726. desc: "empty short internal dst buffer from input",
  727. t: mkChain(rleDecode{}, 10, Nop),
  728. src: "4a7b11e0f1g",
  729. dstSize: 100,
  730. srcSize: 3,
  731. wantStr: "aaaabbbbbbb",
  732. wantErr: errShortInternal,
  733. },
  734. {
  735. desc: "short internal src buffer after full dst buffer",
  736. t: mkChain(Nop, 5, rleEncode{}, 10, Nop),
  737. src: "cccccddddd",
  738. dstSize: 100,
  739. srcSize: 100,
  740. wantStr: "",
  741. wantErr: errShortInternal,
  742. wantIter: 1,
  743. },
  744. {
  745. desc: "short internal src buffer after short dst buffer; test lastFull",
  746. t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop),
  747. src: "2a1b4c6d",
  748. dstSize: 100,
  749. srcSize: 100,
  750. wantStr: "2a1b",
  751. wantErr: errShortInternal,
  752. },
  753. {
  754. desc: "short internal src buffer after successful complete fill",
  755. t: mkChain(Nop, 3, rleDecode{}),
  756. src: "123a4b",
  757. dstSize: 4,
  758. srcSize: 3,
  759. wantStr: "",
  760. wantErr: errShortInternal,
  761. wantIter: 1,
  762. },
  763. {
  764. desc: "short internal src buffer after short dst buffer; test lastFull",
  765. t: mkChain(rleDecode{}, 5, rleEncode{}),
  766. src: "2a1b4c6d",
  767. dstSize: 4,
  768. srcSize: 100,
  769. wantStr: "2a1b",
  770. wantErr: errShortInternal,
  771. },
  772. {
  773. desc: "short src buffer",
  774. t: mkChain(rleEncode{}, 5, Nop),
  775. src: "abbcccddddeeeee",
  776. dstSize: 4,
  777. srcSize: 4,
  778. ioSize: 4,
  779. wantStr: "1a2b3c",
  780. wantErr: ErrShortSrc,
  781. },
  782. {
  783. desc: "process all in one go",
  784. t: mkChain(rleEncode{}, 5, Nop),
  785. src: "abbcccddddeeeeeffffff",
  786. dstSize: 100,
  787. srcSize: 100,
  788. wantStr: "1a2b3c4d5e6f",
  789. wantErr: nil,
  790. wantIter: 1,
  791. },
  792. {
  793. desc: "complete processing downstream after error",
  794. t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop),
  795. src: "3a4b5eX",
  796. dstSize: 100,
  797. srcSize: 100,
  798. ioSize: 100,
  799. wantStr: "aaabbbbeeeee",
  800. wantErr: errYouMentionedX,
  801. },
  802. {
  803. desc: "return downstream fatal errors first (followed by short dst)",
  804. t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop),
  805. src: "3a4b5eX",
  806. dstSize: 100,
  807. srcSize: 100,
  808. ioSize: 100,
  809. wantStr: "aaabbbb",
  810. wantErr: errShortInternal,
  811. },
  812. {
  813. desc: "return downstream fatal errors first (followed by short src)",
  814. t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}),
  815. src: "1a5bX",
  816. dstSize: 100,
  817. srcSize: 100,
  818. ioSize: 100,
  819. wantStr: "",
  820. wantErr: errShortInternal,
  821. },
  822. {
  823. desc: "short internal",
  824. t: mkChain(Nop, 11, rleEncode{}, 3, Nop),
  825. src: "abbcccddddddddddeeeeeeeeeeeg",
  826. dstSize: 3,
  827. srcSize: 100,
  828. wantStr: "1a2b3c10d",
  829. wantErr: errShortInternal,
  830. },
  831. }
  832. }
  833. func doTransform(tc testCase) (res string, iter int, err error) {
  834. tc.t.Reset()
  835. dst := make([]byte, tc.dstSize)
  836. out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src)
  837. for {
  838. iter++
  839. src, atEOF := in, true
  840. if len(src) > tc.srcSize {
  841. src, atEOF = src[:tc.srcSize], false
  842. }
  843. nDst, nSrc, err := tc.t.Transform(dst, src, atEOF)
  844. out = append(out, dst[:nDst]...)
  845. in = in[nSrc:]
  846. switch {
  847. case err == nil && len(in) != 0:
  848. case err == ErrShortSrc && nSrc > 0:
  849. case err == ErrShortDst && (nDst > 0 || nSrc > 0):
  850. default:
  851. return string(out), iter, err
  852. }
  853. }
  854. }
  855. func TestChain(t *testing.T) {
  856. if c, ok := Chain().(nop); !ok {
  857. t.Errorf("empty chain: %v; want Nop", c)
  858. }
  859. // Test Chain for a single Transformer.
  860. for _, tc := range testCases {
  861. tc.t = Chain(tc.t)
  862. str, _, err := doTransform(tc)
  863. if str != tc.wantStr || err != tc.wantErr {
  864. t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
  865. }
  866. }
  867. tests := chainTests()
  868. sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000}
  869. addTest := func(tc testCase, t *chain) {
  870. if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc {
  871. tc.wantErr = errShortInternal
  872. }
  873. if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst {
  874. tc.wantErr = errShortInternal
  875. }
  876. tc.t = t
  877. tests = append(tests, tc)
  878. }
  879. for _, tc := range testCases {
  880. for _, sz := range sizes {
  881. tt := tc
  882. tt.dstSize = sz
  883. addTest(tt, mkChain(tc.t, tc.dstSize, Nop))
  884. addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop))
  885. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop))
  886. if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) {
  887. addTest(tt, mkChain(Nop, tc.srcSize, tc.t))
  888. addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t))
  889. }
  890. }
  891. }
  892. for _, tc := range testCases {
  893. tt := tc
  894. tt.dstSize = 1
  895. tt.wantStr = ""
  896. addTest(tt, mkChain(tc.t, tc.dstSize, Discard))
  897. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard))
  898. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard))
  899. }
  900. for _, tc := range testCases {
  901. tt := tc
  902. tt.dstSize = 100
  903. tt.wantStr = strings.Replace(tc.src, "0f", "", -1)
  904. // Chain encoders and decoders.
  905. if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil {
  906. addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{}))
  907. addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{}))
  908. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}))
  909. // decoding needs larger destinations
  910. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop))
  911. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop))
  912. } else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil {
  913. // The internal buffer size may need to be the sum of the maximum segment
  914. // size of the two encoders!
  915. addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{}))
  916. addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{}))
  917. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{}))
  918. addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop))
  919. }
  920. }
  921. for _, tc := range tests {
  922. str, iter, err := doTransform(tc)
  923. mi := tc.wantIter != 0 && tc.wantIter != iter
  924. if str != tc.wantStr || err != tc.wantErr || mi {
  925. t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
  926. }
  927. break
  928. }
  929. }
  930. func TestRemoveFunc(t *testing.T) {
  931. filter := RemoveFunc(func(r rune) bool {
  932. return strings.IndexRune("ab\u0300\u1234,", r) != -1
  933. })
  934. tests := []testCase{
  935. {
  936. src: ",",
  937. wantStr: "",
  938. },
  939. {
  940. src: "c",
  941. wantStr: "c",
  942. },
  943. {
  944. src: "\u2345",
  945. wantStr: "\u2345",
  946. },
  947. {
  948. src: "tschüß",
  949. wantStr: "tschüß",
  950. },
  951. {
  952. src: ",до,свидания,",
  953. wantStr: "досвидания",
  954. },
  955. {
  956. src: "a\xbd\xb2=\xbc ⌘",
  957. wantStr: "\uFFFD\uFFFD=\uFFFD ⌘",
  958. },
  959. {
  960. // If we didn't replace illegal bytes with RuneError, the result
  961. // would be \u0300 or the code would need to be more complex.
  962. src: "\xcc\u0300\x80",
  963. wantStr: "\uFFFD\uFFFD",
  964. },
  965. {
  966. src: "\xcc\u0300\x80",
  967. dstSize: 3,
  968. wantStr: "\uFFFD\uFFFD",
  969. wantIter: 2,
  970. },
  971. {
  972. // Test a long buffer greater than the internal buffer size
  973. src: "hello\xcc\xcc\xccworld",
  974. srcSize: 13,
  975. wantStr: "hello\uFFFD\uFFFD\uFFFDworld",
  976. wantIter: 1,
  977. },
  978. {
  979. src: "\u2345",
  980. dstSize: 2,
  981. wantStr: "",
  982. wantErr: ErrShortDst,
  983. },
  984. {
  985. src: "\xcc",
  986. dstSize: 2,
  987. wantStr: "",
  988. wantErr: ErrShortDst,
  989. },
  990. {
  991. src: "\u0300",
  992. dstSize: 2,
  993. srcSize: 1,
  994. wantStr: "",
  995. wantErr: ErrShortSrc,
  996. },
  997. {
  998. t: RemoveFunc(func(r rune) bool {
  999. return r == utf8.RuneError
  1000. }),
  1001. src: "\xcc\u0300\x80",
  1002. wantStr: "\u0300",
  1003. },
  1004. }
  1005. for _, tc := range tests {
  1006. tc.desc = tc.src
  1007. if tc.t == nil {
  1008. tc.t = filter
  1009. }
  1010. if tc.dstSize == 0 {
  1011. tc.dstSize = 100
  1012. }
  1013. if tc.srcSize == 0 {
  1014. tc.srcSize = 100
  1015. }
  1016. str, iter, err := doTransform(tc)
  1017. mi := tc.wantIter != 0 && tc.wantIter != iter
  1018. if str != tc.wantStr || err != tc.wantErr || mi {
  1019. t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
  1020. }
  1021. tc.src = str
  1022. idem, _, _ := doTransform(tc)
  1023. if str != idem {
  1024. t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str)
  1025. }
  1026. }
  1027. }
  1028. func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
  1029. for _, tt := range append(testCases, chainTests()...) {
  1030. if tt.desc == "allowStutter = true" {
  1031. // We don't have control over the buffer size, so we eliminate tests
  1032. // that depend on a specific buffer size being set.
  1033. continue
  1034. }
  1035. if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
  1036. // The result string will be different.
  1037. continue
  1038. }
  1039. testtext.Run(t, tt.desc, func(t *testing.T) {
  1040. got, n, err := f(tt.t, tt.src)
  1041. if tt.wantErr != err {
  1042. t.Errorf("error: got %v; want %v", err, tt.wantErr)
  1043. }
  1044. // Check that err == nil implies that n == len(tt.src). Note that vice
  1045. // versa isn't necessarily true.
  1046. if err == nil && n != len(tt.src) {
  1047. t.Errorf("err == nil: got %d bytes, want %d", n, err)
  1048. }
  1049. if got != tt.wantStr {
  1050. t.Errorf("string: got %q; want %q", got, tt.wantStr)
  1051. }
  1052. })
  1053. }
  1054. }
  1055. func TestBytes(t *testing.T) {
  1056. testString(t, func(z Transformer, s string) (string, int, error) {
  1057. b, n, err := Bytes(z, []byte(s))
  1058. return string(b), n, err
  1059. })
  1060. }
  1061. func TestAppend(t *testing.T) {
  1062. // Create a bunch of subtests for different buffer sizes.
  1063. testCases := [][]byte{
  1064. nil,
  1065. make([]byte, 0, 0),
  1066. make([]byte, 0, 1),
  1067. make([]byte, 1, 1),
  1068. make([]byte, 1, 5),
  1069. make([]byte, 100, 100),
  1070. make([]byte, 100, 200),
  1071. }
  1072. for _, tc := range testCases {
  1073. testString(t, func(z Transformer, s string) (string, int, error) {
  1074. b, n, err := Append(z, tc, []byte(s))
  1075. return string(b[len(tc):]), n, err
  1076. })
  1077. }
  1078. }
  1079. func TestString(t *testing.T) {
  1080. testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) })
  1081. // Overrun the internal destination buffer.
  1082. for i, s := range []string{
  1083. aaa[:1*initialBufSize-1],
  1084. aaa[:1*initialBufSize+0],
  1085. aaa[:1*initialBufSize+1],
  1086. AAA[:1*initialBufSize-1],
  1087. AAA[:1*initialBufSize+0],
  1088. AAA[:1*initialBufSize+1],
  1089. AAA[:2*initialBufSize-1],
  1090. AAA[:2*initialBufSize+0],
  1091. AAA[:2*initialBufSize+1],
  1092. aaa[:1*initialBufSize-2] + "A",
  1093. aaa[:1*initialBufSize-1] + "A",
  1094. aaa[:1*initialBufSize+0] + "A",
  1095. aaa[:1*initialBufSize+1] + "A",
  1096. } {
  1097. testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) {
  1098. got, _, _ := String(lowerCaseASCII{}, s)
  1099. if want := strings.ToLower(s); got != want {
  1100. t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
  1101. }
  1102. })
  1103. }
  1104. // Overrun the internal source buffer.
  1105. for i, s := range []string{
  1106. aaa[:1*initialBufSize-1],
  1107. aaa[:1*initialBufSize+0],
  1108. aaa[:1*initialBufSize+1],
  1109. aaa[:2*initialBufSize+1],
  1110. aaa[:2*initialBufSize+0],
  1111. aaa[:2*initialBufSize+1],
  1112. } {
  1113. testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) {
  1114. got, _, _ := String(rleEncode{}, s)
  1115. if want := fmt.Sprintf("%da", len(s)); got != want {
  1116. t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
  1117. }
  1118. })
  1119. }
  1120. // Test allocations for non-changing strings.
  1121. // Note we still need to allocate a single buffer.
  1122. for i, s := range []string{
  1123. "",
  1124. "123456789",
  1125. aaa[:initialBufSize-1],
  1126. aaa[:initialBufSize+0],
  1127. aaa[:initialBufSize+1],
  1128. aaa[:10*initialBufSize],
  1129. } {
  1130. testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) {
  1131. if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 {
  1132. t.Errorf("#allocs was %f; want 1", n)
  1133. }
  1134. })
  1135. }
  1136. }
  1137. // TestBytesAllocation tests that buffer growth stays limited with the trickler
  1138. // transformer, which behaves oddly but within spec. In case buffer growth is
  1139. // not correctly handled, the test will either panic with a failed allocation or
  1140. // thrash. To ensure the tests terminate under the last condition, we time out
  1141. // after some sufficiently long period of time.
  1142. func TestBytesAllocation(t *testing.T) {
  1143. done := make(chan bool)
  1144. go func() {
  1145. in := bytes.Repeat([]byte{'a'}, 1000)
  1146. tr := trickler(make([]byte, 1))
  1147. Bytes(&tr, in)
  1148. done <- true
  1149. }()
  1150. select {
  1151. case <-done:
  1152. case <-time.After(3 * time.Second):
  1153. t.Error("time out, likely due to excessive allocation")
  1154. }
  1155. }
  1156. // TestStringAllocation tests that buffer growth stays limited with the trickler
  1157. // transformer, which behaves oddly but within spec. In case buffer growth is
  1158. // not correctly handled, the test will either panic with a failed allocation or
  1159. // thrash. To ensure the tests terminate under the last condition, we time out
  1160. // after some sufficiently long period of time.
  1161. func TestStringAllocation(t *testing.T) {
  1162. done := make(chan bool)
  1163. go func() {
  1164. tr := trickler(make([]byte, 1))
  1165. String(&tr, aaa[:1000])
  1166. done <- true
  1167. }()
  1168. select {
  1169. case <-done:
  1170. case <-time.After(3 * time.Second):
  1171. t.Error("time out, likely due to excessive allocation")
  1172. }
  1173. }
  1174. func BenchmarkStringLowerEmpty(b *testing.B) {
  1175. for i := 0; i < b.N; i++ {
  1176. String(&lowerCaseASCIILookahead{}, "")
  1177. }
  1178. }
  1179. func BenchmarkStringLowerIdentical(b *testing.B) {
  1180. for i := 0; i < b.N; i++ {
  1181. String(&lowerCaseASCIILookahead{}, aaa[:4096])
  1182. }
  1183. }
  1184. func BenchmarkStringLowerChanged(b *testing.B) {
  1185. for i := 0; i < b.N; i++ {
  1186. String(&lowerCaseASCIILookahead{}, AAA[:4096])
  1187. }
  1188. }
  1189. var (
  1190. aaa = strings.Repeat("a", 4096)
  1191. AAA = strings.Repeat("A", 4096)
  1192. )