|
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
-
- package runes
-
- import (
- "strings"
- "testing"
- "unicode"
-
- "golang.org/x/text/cases"
- "golang.org/x/text/language"
- "golang.org/x/text/transform"
- )
-
- var (
- toUpper = cases.Upper(language.Und)
- toLower = cases.Lower(language.Und)
- )
-
- type spanformer interface {
- transform.SpanningTransformer
- }
-
- func TestPredicate(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(Predicate(func(r rune) bool {
- return unicode.Is(rt, r)
- }), t, f)
- })
- }
-
- func TestIn(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(In(rt), t, f)
- })
- }
-
- func TestNotIn(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(NotIn(rt), f, t)
- })
- }
-
- func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
- lower := f(unicode.Latin, toLower, toLower)
-
- for i, tt := range []transformTest{{
- desc: "empty",
- szDst: large,
- atEOF: true,
- in: "",
- out: "",
- outFull: "",
- t: lower,
- }, {
- desc: "small",
- szDst: 1,
- atEOF: true,
- in: "B",
- out: "b",
- outFull: "b",
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst",
- szDst: 2,
- atEOF: true,
- in: "AAA",
- out: "aa",
- outFull: "aaa",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst writing error",
- szDst: 1,
- atEOF: false,
- in: "A\x80",
- out: "a",
- outFull: "a\x80",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst writing incomplete rune",
- szDst: 2,
- atEOF: true,
- in: "Σ\xc2",
- out: "Σ",
- outFull: "Σ\xc2",
- err: transform.ErrShortDst,
- t: f(unicode.Latin, toLower, nil),
- }, {
- desc: "short dst, longer",
- szDst: 5,
- atEOF: true,
- in: "Hellø",
- out: "Hell",
- outFull: "Hellø",
- err: transform.ErrShortDst,
- // idem is used to test short buffers by forcing processing of full-rune increments.
- t: f(unicode.Latin, Map(idem), nil),
- }, {
- desc: "short dst, longer, writing error",
- szDst: 6,
- atEOF: false,
- in: "\x80Hello\x80",
- out: "\x80Hello",
- outFull: "\x80Hello\x80",
- err: transform.ErrShortDst,
- t: f(unicode.Latin, Map(idem), nil),
- }, {
- desc: "short src",
- szDst: 2,
- atEOF: false,
- in: "A\xc2",
- out: "a",
- outFull: "a\xc2",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short src no change",
- szDst: 2,
- atEOF: false,
- in: "a\xc2",
- out: "a",
- outFull: "a\xc2",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- nSpan: 1,
- t: lower,
- }, {
- desc: "invalid input, atEOF",
- szDst: large,
- atEOF: true,
- in: "\x80",
- out: "\x80",
- outFull: "\x80",
- t: lower,
- }, {
- desc: "invalid input, !atEOF",
- szDst: large,
- atEOF: false,
- in: "\x80",
- out: "\x80",
- outFull: "\x80",
- t: lower,
- }, {
- desc: "invalid input, incomplete rune atEOF",
- szDst: large,
- atEOF: true,
- in: "\xc2",
- out: "\xc2",
- outFull: "\xc2",
- t: lower,
- }, {
- desc: "nop",
- szDst: large,
- atEOF: true,
- in: "Hello World!",
- out: "Hello World!",
- outFull: "Hello World!",
- t: f(unicode.Latin, nil, nil),
- }, {
- desc: "nop in",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "this is α ΤΕΣΤ",
- outFull: "this is α ΤΕΣΤ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Greek, nil, toLower),
- }, {
- desc: "nop in latin",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "THIS IS α τεστ",
- outFull: "THIS IS α τεστ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, nil, toLower),
- }, {
- desc: "nop not in",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "this is α ΤΕΣΤ",
- outFull: "this is α ΤΕΣΤ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, toLower, nil),
- }, {
- desc: "pass atEOF is true when at end",
- szDst: large,
- atEOF: true,
- in: "hello",
- out: "HELLO",
- outFull: "HELLO",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end of segment",
- szDst: large,
- atEOF: true,
- in: "hello ",
- out: "HELLO ",
- outFull: "HELLO ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "don't pass atEOF is true when atEOF is false",
- szDst: large,
- atEOF: false,
- in: "hello",
- out: "",
- outFull: "HELLO",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end, no change",
- szDst: large,
- atEOF: true,
- in: "HELLO",
- out: "HELLO",
- outFull: "HELLO",
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end of segment, no change",
- szDst: large,
- atEOF: true,
- in: "HELLO ",
- out: "HELLO ",
- outFull: "HELLO ",
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "large input ASCII",
- szDst: 12000,
- atEOF: false,
- in: strings.Repeat("HELLO", 2000),
- out: strings.Repeat("hello", 2000),
- outFull: strings.Repeat("hello", 2000),
- errSpan: transform.ErrEndOfSpan,
- err: nil,
- t: lower,
- }, {
- desc: "large input non-ASCII",
- szDst: 12000,
- atEOF: false,
- in: strings.Repeat("\u3333", 2000),
- out: strings.Repeat("\u3333", 2000),
- outFull: strings.Repeat("\u3333", 2000),
- err: nil,
- t: lower,
- }} {
- tt.check(t, i)
- }
- }
-
- // upperAtEOF is a strange Transformer that converts text to uppercase, but only
- // if atEOF is true.
- type upperAtEOF struct{ transform.NopResetter }
-
- func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- if !atEOF {
- return 0, 0, transform.ErrShortSrc
- }
- return toUpper.Transform(dst, src, atEOF)
- }
-
- func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
- if !atEOF {
- return 0, transform.ErrShortSrc
- }
- return toUpper.Span(src, atEOF)
- }
-
- func BenchmarkConditional(b *testing.B) {
- doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
- }
|