You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

106 lines
3.4 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package colltab contains functionality related to collation tables.
  5. // It is only to be used by the collate and search packages.
  6. package colltab // import "golang.org/x/text/internal/colltab"
  7. import (
  8. "sort"
  9. "golang.org/x/text/language"
  10. )
  11. // MatchLang finds the index of t in tags, using a matching algorithm used for
  12. // collation and search. tags[0] must be language.Und, the remaining tags should
  13. // be sorted alphabetically.
  14. //
  15. // Language matching for collation and search is different from the matching
  16. // defined by language.Matcher: the (inferred) base language must be an exact
  17. // match for the relevant fields. For example, "gsw" should not match "de".
  18. // Also the parent relation is different, as a parent may have a different
  19. // script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
  20. // zh.
  21. func MatchLang(t language.Tag, tags []language.Tag) int {
  22. // Canonicalize the values, including collapsing macro languages.
  23. t, _ = language.All.Canonicalize(t)
  24. base, conf := t.Base()
  25. // Estimate the base language, but only use high-confidence values.
  26. if conf < language.High {
  27. // The root locale supports "search" and "standard". We assume that any
  28. // implementation will only use one of both.
  29. return 0
  30. }
  31. // Maximize base and script and normalize the tag.
  32. if _, s, r := t.Raw(); (r != language.Region{}) {
  33. p, _ := language.Raw.Compose(base, s, r)
  34. // Taking the parent forces the script to be maximized.
  35. p = p.Parent()
  36. // Add back region and extensions.
  37. t, _ = language.Raw.Compose(p, r, t.Extensions())
  38. } else {
  39. // Set the maximized base language.
  40. t, _ = language.Raw.Compose(base, s, t.Extensions())
  41. }
  42. // Find start index of the language tag.
  43. start := 1 + sort.Search(len(tags)-1, func(i int) bool {
  44. b, _, _ := tags[i+1].Raw()
  45. return base.String() <= b.String()
  46. })
  47. if start < len(tags) {
  48. if b, _, _ := tags[start].Raw(); b != base {
  49. return 0
  50. }
  51. }
  52. // Besides the base language, script and region, only the collation type and
  53. // the custom variant defined in the 'u' extension are used to distinguish a
  54. // locale.
  55. // Strip all variants and extensions and add back the custom variant.
  56. tdef, _ := language.Raw.Compose(t.Raw())
  57. tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
  58. // First search for a specialized collation type, if present.
  59. try := []language.Tag{tdef}
  60. if co := t.TypeForKey("co"); co != "" {
  61. tco, _ := tdef.SetTypeForKey("co", co)
  62. try = []language.Tag{tco, tdef}
  63. }
  64. for _, tx := range try {
  65. for ; tx != language.Und; tx = parent(tx) {
  66. for i, t := range tags[start:] {
  67. if b, _, _ := t.Raw(); b != base {
  68. break
  69. }
  70. if tx == t {
  71. return start + i
  72. }
  73. }
  74. }
  75. }
  76. return 0
  77. }
  78. // parent computes the structural parent. This means inheritance may change
  79. // script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
  80. func parent(t language.Tag) language.Tag {
  81. if t.TypeForKey("va") != "" {
  82. t, _ = t.SetTypeForKey("va", "")
  83. return t
  84. }
  85. result := language.Und
  86. if b, s, r := t.Raw(); (r != language.Region{}) {
  87. result, _ = language.Raw.Compose(b, s, t.Extensions())
  88. } else if (s != language.Script{}) {
  89. result, _ = language.Raw.Compose(b, t.Extensions())
  90. } else if (b != language.Base{}) {
  91. result, _ = language.Raw.Compose(t.Extensions())
  92. }
  93. return result
  94. }