You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

124 lines
3.1 KiB

  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. // This file contains definitions for interpreting the trie value of the idna
  7. // trie generated by "go run gen*.go". It is shared by both the generator
  8. // program and the resultant package. Sharing is achieved by the generator
  9. // copying gen_trieval.go to trieval.go and changing what's above this comment.
  10. // info holds information from the IDNA mapping table for a single rune. It is
  11. // the value returned by a trie lookup. In most cases, all information fits in
  12. // a 16-bit value. For mappings, this value may contain an index into a slice
  13. // with the mapped string. Such mappings can consist of the actual mapped value
  14. // or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
  15. // input rune. This technique is used by the cases packages and reduces the
  16. // table size significantly.
  17. //
  18. // The per-rune values have the following format:
  19. //
  20. // if mapped {
  21. // if inlinedXOR {
  22. // 15..13 inline XOR marker
  23. // 12..11 unused
  24. // 10..3 inline XOR mask
  25. // } else {
  26. // 15..3 index into xor or mapping table
  27. // }
  28. // } else {
  29. // 15..14 unused
  30. // 13 mayNeedNorm
  31. // 12..11 attributes
  32. // 10..8 joining type
  33. // 7..3 category type
  34. // }
  35. // 2 use xor pattern
  36. // 1..0 mapped category
  37. //
  38. // See the definitions below for a more detailed description of the various
  39. // bits.
  40. type info uint16
  41. const (
  42. catSmallMask = 0x3
  43. catBigMask = 0xF8
  44. indexShift = 3
  45. xorBit = 0x4 // interpret the index as an xor pattern
  46. inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
  47. joinShift = 8
  48. joinMask = 0x07
  49. // Attributes
  50. attributesMask = 0x1800
  51. viramaModifier = 0x1800
  52. modifier = 0x1000
  53. rtl = 0x0800
  54. mayNeedNorm = 0x2000
  55. )
  56. // A category corresponds to a category defined in the IDNA mapping table.
  57. type category uint16
  58. const (
  59. unknown category = 0 // not currently defined in unicode.
  60. mapped category = 1
  61. disallowedSTD3Mapped category = 2
  62. deviation category = 3
  63. )
  64. const (
  65. valid category = 0x08
  66. validNV8 category = 0x18
  67. validXV8 category = 0x28
  68. disallowed category = 0x40
  69. disallowedSTD3Valid category = 0x80
  70. ignored category = 0xC0
  71. )
  72. // join types and additional rune information
  73. const (
  74. joiningL = (iota + 1)
  75. joiningD
  76. joiningT
  77. joiningR
  78. //the following types are derived during processing
  79. joinZWJ
  80. joinZWNJ
  81. joinVirama
  82. numJoinTypes
  83. )
  84. func (c info) isMapped() bool {
  85. return c&0x3 != 0
  86. }
  87. func (c info) category() category {
  88. small := c & catSmallMask
  89. if small != 0 {
  90. return category(small)
  91. }
  92. return category(c & catBigMask)
  93. }
  94. func (c info) joinType() info {
  95. if c.isMapped() {
  96. return 0
  97. }
  98. return (c >> joinShift) & joinMask
  99. }
  100. func (c info) isModifier() bool {
  101. return c&(modifier|catSmallMask) == modifier
  102. }
  103. func (c info) isViramaModifier() bool {
  104. return c&(attributesMask|catSmallMask) == viramaModifier
  105. }