You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

181 lines
5.3 KiB

  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package enctest
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "strings"
  11. "testing"
  12. "golang.org/x/text/encoding"
  13. "golang.org/x/text/encoding/internal/identifier"
  14. "golang.org/x/text/transform"
  15. )
  16. // Encoder or Decoder
  17. type Transcoder interface {
  18. transform.Transformer
  19. Bytes([]byte) ([]byte, error)
  20. String(string) (string, error)
  21. }
  22. func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) {
  23. for _, direction := range []string{"Decode", "Encode"} {
  24. t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) {
  25. var coder Transcoder
  26. var want, src, wPrefix, sPrefix, wSuffix, sSuffix string
  27. if direction == "Decode" {
  28. coder, want, src = e.NewDecoder(), utf8, encoded
  29. wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix
  30. } else {
  31. coder, want, src = e.NewEncoder(), encoded, utf8
  32. wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, ""
  33. }
  34. dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix))
  35. nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true)
  36. if err != nil {
  37. t.Fatal(err)
  38. }
  39. if nDst != len(wPrefix)+len(want)+len(wSuffix) {
  40. t.Fatalf("nDst got %d, want %d",
  41. nDst, len(wPrefix)+len(want)+len(wSuffix))
  42. }
  43. if nSrc != len(sPrefix)+len(src)+len(sSuffix) {
  44. t.Fatalf("nSrc got %d, want %d",
  45. nSrc, len(sPrefix)+len(src)+len(sSuffix))
  46. }
  47. if got := string(dst); got != wPrefix+want+wSuffix {
  48. t.Fatalf("\ngot %q\nwant %q", got, wPrefix+want+wSuffix)
  49. }
  50. for _, n := range []int{0, 1, 2, 10, 123, 4567} {
  51. input := sPrefix + strings.Repeat(src, n) + sSuffix
  52. g, err := coder.String(input)
  53. if err != nil {
  54. t.Fatalf("Bytes: n=%d: %v", n, err)
  55. }
  56. if len(g) == 0 && len(input) == 0 {
  57. // If the input is empty then the output can be empty,
  58. // regardless of whatever wPrefix is.
  59. continue
  60. }
  61. got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix
  62. if got1 != want1 {
  63. t.Fatalf("ReadAll: n=%d\ngot %q\nwant %q",
  64. n, trim(got1), trim(want1))
  65. }
  66. }
  67. })
  68. }
  69. }
  70. func TestFile(t *testing.T, e encoding.Encoding) {
  71. for _, dir := range []string{"Decode", "Encode"} {
  72. t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) {
  73. dst, src, transformer, err := load(dir, e)
  74. if err != nil {
  75. t.Fatalf("load: %v", err)
  76. }
  77. buf, err := transformer.Bytes(src)
  78. if err != nil {
  79. t.Fatalf("transform: %v", err)
  80. }
  81. if !bytes.Equal(buf, dst) {
  82. t.Error("transformed bytes did not match golden file")
  83. }
  84. })
  85. }
  86. }
  87. func Benchmark(b *testing.B, enc encoding.Encoding) {
  88. for _, direction := range []string{"Decode", "Encode"} {
  89. b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) {
  90. _, src, transformer, err := load(direction, enc)
  91. if err != nil {
  92. b.Fatal(err)
  93. }
  94. b.SetBytes(int64(len(src)))
  95. b.ResetTimer()
  96. for i := 0; i < b.N; i++ {
  97. r := transform.NewReader(bytes.NewReader(src), transformer)
  98. io.Copy(ioutil.Discard, r)
  99. }
  100. })
  101. }
  102. }
  103. // testdataFiles are files in testdata/*.txt.
  104. var testdataFiles = []struct {
  105. mib identifier.MIB
  106. basename, ext string
  107. }{
  108. {identifier.Windows1252, "candide", "windows-1252"},
  109. {identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"},
  110. {identifier.ISO2022JP, "rashomon", "iso-2022-jp"},
  111. {identifier.ShiftJIS, "rashomon", "shift-jis"},
  112. {identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"},
  113. {identifier.GBK, "sunzi-bingfa-simplified", "gbk"},
  114. {identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"},
  115. {identifier.Big5, "sunzi-bingfa-traditional", "big5"},
  116. {identifier.UTF16LE, "candide", "utf-16le"},
  117. {identifier.UTF8, "candide", "utf-8"},
  118. {identifier.UTF32BE, "candide", "utf-32be"},
  119. // GB18030 is a superset of GBK and is nominally a Simplified Chinese
  120. // encoding, but it can also represent the entire Basic Multilingual
  121. // Plane, including codepoints like 'â' that aren't encodable by GBK.
  122. // GB18030 on Simplified Chinese should perform similarly to GBK on
  123. // Simplified Chinese. GB18030 on "candide" is more interesting.
  124. {identifier.GB18030, "candide", "gb18030"},
  125. }
  126. func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) {
  127. basename, ext, count := "", "", 0
  128. for _, tf := range testdataFiles {
  129. if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib {
  130. basename, ext = tf.basename, tf.ext
  131. count++
  132. }
  133. }
  134. if count != 1 {
  135. if count == 0 {
  136. return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc)
  137. }
  138. return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc)
  139. }
  140. dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext)
  141. srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename)
  142. var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder())
  143. if direction == "Decode" {
  144. dstFile, srcFile = srcFile, dstFile
  145. coder = enc.NewDecoder()
  146. }
  147. dst, err := ioutil.ReadFile(dstFile)
  148. if err != nil {
  149. if dst, err = ioutil.ReadFile("../" + dstFile); err != nil {
  150. return nil, nil, nil, err
  151. }
  152. }
  153. src, err := ioutil.ReadFile(srcFile)
  154. if err != nil {
  155. if src, err = ioutil.ReadFile("../" + srcFile); err != nil {
  156. return nil, nil, nil, err
  157. }
  158. }
  159. return dst, src, coder, nil
  160. }
  161. func trim(s string) string {
  162. if len(s) < 120 {
  163. return s
  164. }
  165. return s[:50] + "..." + s[len(s)-50:]
  166. }