You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

134 lines
3.0 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. import (
  7. "flag"
  8. "log"
  9. "golang.org/x/text/internal/gen"
  10. "golang.org/x/text/internal/triegen"
  11. "golang.org/x/text/internal/ucd"
  12. )
  13. var outputFile = flag.String("out", "tables.go", "output file")
  14. func main() {
  15. gen.Init()
  16. gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
  17. gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
  18. genTables()
  19. }
  20. // bidiClass names and codes taken from class "bc" in
  21. // https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
  22. var bidiClass = map[string]Class{
  23. "AL": AL, // ArabicLetter
  24. "AN": AN, // ArabicNumber
  25. "B": B, // ParagraphSeparator
  26. "BN": BN, // BoundaryNeutral
  27. "CS": CS, // CommonSeparator
  28. "EN": EN, // EuropeanNumber
  29. "ES": ES, // EuropeanSeparator
  30. "ET": ET, // EuropeanTerminator
  31. "L": L, // LeftToRight
  32. "NSM": NSM, // NonspacingMark
  33. "ON": ON, // OtherNeutral
  34. "R": R, // RightToLeft
  35. "S": S, // SegmentSeparator
  36. "WS": WS, // WhiteSpace
  37. "FSI": Control,
  38. "PDF": Control,
  39. "PDI": Control,
  40. "LRE": Control,
  41. "LRI": Control,
  42. "LRO": Control,
  43. "RLE": Control,
  44. "RLI": Control,
  45. "RLO": Control,
  46. }
  47. func genTables() {
  48. if numClass > 0x0F {
  49. log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
  50. }
  51. w := gen.NewCodeWriter()
  52. defer w.WriteVersionedGoFile(*outputFile, "bidi")
  53. gen.WriteUnicodeVersion(w)
  54. t := triegen.NewTrie("bidi")
  55. // Build data about bracket mapping. These bits need to be or-ed with
  56. // any other bits.
  57. orMask := map[rune]uint64{}
  58. xorMap := map[rune]int{}
  59. xorMasks := []rune{0} // First value is no-op.
  60. ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
  61. r1 := p.Rune(0)
  62. r2 := p.Rune(1)
  63. xor := r1 ^ r2
  64. if _, ok := xorMap[xor]; !ok {
  65. xorMap[xor] = len(xorMasks)
  66. xorMasks = append(xorMasks, xor)
  67. }
  68. entry := uint64(xorMap[xor]) << xorMaskShift
  69. switch p.String(2) {
  70. case "o":
  71. entry |= openMask
  72. case "c", "n":
  73. default:
  74. log.Fatalf("Unknown bracket class %q.", p.String(2))
  75. }
  76. orMask[r1] = entry
  77. })
  78. w.WriteComment(`
  79. xorMasks contains masks to be xor-ed with brackets to get the reverse
  80. version.`)
  81. w.WriteVar("xorMasks", xorMasks)
  82. done := map[rune]bool{}
  83. insert := func(r rune, c Class) {
  84. if !done[r] {
  85. t.Insert(r, orMask[r]|uint64(c))
  86. done[r] = true
  87. }
  88. }
  89. // Insert the derived BiDi properties.
  90. ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
  91. r := p.Rune(0)
  92. class, ok := bidiClass[p.String(1)]
  93. if !ok {
  94. log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
  95. }
  96. insert(r, class)
  97. })
  98. visitDefaults(insert)
  99. // TODO: use sparse blocks. This would reduce table size considerably
  100. // from the looks of it.
  101. sz, err := t.Gen(w)
  102. if err != nil {
  103. log.Fatal(err)
  104. }
  105. w.Size += sz
  106. }
  107. // dummy values to make methods in gen_common compile. The real versions
  108. // will be generated by this file to tables.go.
  109. var (
  110. xorMasks []rune
  111. )