You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

95 lines
2.8 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package korean
  5. import (
  6. "strings"
  7. "testing"
  8. "golang.org/x/text/encoding"
  9. "golang.org/x/text/encoding/internal"
  10. "golang.org/x/text/encoding/internal/enctest"
  11. "golang.org/x/text/transform"
  12. )
  13. func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
  14. return "Decode", e.NewDecoder(), nil
  15. }
  16. func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
  17. return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
  18. }
  19. func TestNonRepertoire(t *testing.T) {
  20. // Pick n large enough to cause an overflow in the destination buffer of
  21. // transform.String.
  22. const n = 10000
  23. testCases := []struct {
  24. init func(e encoding.Encoding) (string, transform.Transformer, error)
  25. e encoding.Encoding
  26. src, want string
  27. }{
  28. {dec, EUCKR, "\xfe\xfe", "\ufffd"},
  29. // {dec, EUCKR, "א", "\ufffd"}, // TODO: why is this different?
  30. {enc, EUCKR, "א", ""},
  31. {enc, EUCKR, "aא", "a"},
  32. {enc, EUCKR, "\uac00א", "\xb0\xa1"},
  33. // TODO: should we also handle Jamo?
  34. {dec, EUCKR, "\x80", "\ufffd"},
  35. {dec, EUCKR, "\xff", "\ufffd"},
  36. {dec, EUCKR, "\x81", "\ufffd"},
  37. {dec, EUCKR, "\xb0\x40", "\ufffd@"},
  38. {dec, EUCKR, "\xb0\xff", "\ufffd"},
  39. {dec, EUCKR, "\xd0\x20", "\ufffd "},
  40. {dec, EUCKR, "\xd0\xff", "\ufffd"},
  41. {dec, EUCKR, strings.Repeat("\x81", n), strings.Repeat("걖", n/2)},
  42. }
  43. for _, tc := range testCases {
  44. dir, tr, wantErr := tc.init(tc.e)
  45. dst, _, err := transform.String(tr, tc.src)
  46. if err != wantErr {
  47. t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
  48. }
  49. if got := string(dst); got != tc.want {
  50. t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
  51. }
  52. }
  53. }
  54. func TestBasics(t *testing.T) {
  55. // The encoded forms can be verified by the iconv program:
  56. // $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
  57. testCases := []struct {
  58. e encoding.Encoding
  59. encoded string
  60. utf8 string
  61. }{{
  62. // Korean tests.
  63. //
  64. // "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D" is a
  65. // nonsense string that contains ASCII, Hangul and CJK ideographs.
  66. //
  67. // "세계야, 안녕" translates as "Hello, world".
  68. e: EUCKR,
  69. encoded: "A\x81\x41\x81\x61\x81\x81\xc6\xfeB\xc7\xa1\xc7\xfe\xc8\xa1C\xca\xa1\xfd\xfeD",
  70. utf8: "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D",
  71. }, {
  72. e: EUCKR,
  73. encoded: "\xbc\xbc\xb0\xe8\xbe\xdf\x2c\x20\xbe\xc8\xb3\xe7",
  74. utf8: "세계야, 안녕",
  75. }}
  76. for _, tc := range testCases {
  77. enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
  78. }
  79. }
  80. func TestFiles(t *testing.T) { enctest.TestFile(t, EUCKR) }
  81. func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, EUCKR) }