You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

401 lines
9.8 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. // This tool generates types for the various XML formats of CLDR.
  6. package main
  7. import (
  8. "archive/zip"
  9. "bytes"
  10. "encoding/xml"
  11. "flag"
  12. "fmt"
  13. "io"
  14. "io/ioutil"
  15. "log"
  16. "os"
  17. "regexp"
  18. "strings"
  19. "golang.org/x/text/internal/gen"
  20. )
  21. var outputFile = flag.String("output", "xml.go", "output file name")
  22. func main() {
  23. flag.Parse()
  24. r := gen.OpenCLDRCoreZip()
  25. buffer, err := ioutil.ReadAll(r)
  26. if err != nil {
  27. log.Fatal("Could not read zip file")
  28. }
  29. r.Close()
  30. z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
  31. if err != nil {
  32. log.Fatalf("Could not read zip archive: %v", err)
  33. }
  34. var buf bytes.Buffer
  35. version := gen.CLDRVersion()
  36. for _, dtd := range files {
  37. for _, f := range z.File {
  38. if strings.HasSuffix(f.Name, dtd.file+".dtd") {
  39. r, err := f.Open()
  40. failOnError(err)
  41. b := makeBuilder(&buf, dtd)
  42. b.parseDTD(r)
  43. b.resolve(b.index[dtd.top[0]])
  44. b.write()
  45. if b.version != "" && version != b.version {
  46. println(f.Name)
  47. log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
  48. }
  49. break
  50. }
  51. }
  52. }
  53. fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
  54. fmt.Fprintf(&buf, "const Version = %q\n", version)
  55. gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
  56. }
  57. func failOnError(err error) {
  58. if err != nil {
  59. log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
  60. os.Exit(1)
  61. }
  62. }
  63. // configuration data per DTD type
  64. type dtd struct {
  65. file string // base file name
  66. root string // Go name of the root XML element
  67. top []string // create a different type for this section
  68. skipElem []string // hard-coded or deprecated elements
  69. skipAttr []string // attributes to exclude
  70. predefined []string // hard-coded elements exist of the form <name>Elem
  71. forceRepeat []string // elements to make slices despite DTD
  72. }
  73. var files = []dtd{
  74. {
  75. file: "ldmlBCP47",
  76. root: "LDMLBCP47",
  77. top: []string{"ldmlBCP47"},
  78. skipElem: []string{
  79. "cldrVersion", // deprecated, not used
  80. },
  81. },
  82. {
  83. file: "ldmlSupplemental",
  84. root: "SupplementalData",
  85. top: []string{"supplementalData"},
  86. skipElem: []string{
  87. "cldrVersion", // deprecated, not used
  88. },
  89. forceRepeat: []string{
  90. "plurals", // data defined in plurals.xml and ordinals.xml
  91. },
  92. },
  93. {
  94. file: "ldml",
  95. root: "LDML",
  96. top: []string{
  97. "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
  98. },
  99. skipElem: []string{
  100. "cp", // not used anywhere
  101. "special", // not used anywhere
  102. "fallback", // deprecated, not used
  103. "alias", // in Common
  104. "default", // in Common
  105. },
  106. skipAttr: []string{
  107. "hiraganaQuarternary", // typo in DTD, correct version included as well
  108. },
  109. predefined: []string{"rules"},
  110. },
  111. }
  112. var comments = map[string]string{
  113. "ldmlBCP47": `
  114. // LDMLBCP47 holds information on allowable values for various variables in LDML.
  115. `,
  116. "supplementalData": `
  117. // SupplementalData holds information relevant for internationalization
  118. // and proper use of CLDR, but that is not contained in the locale hierarchy.
  119. `,
  120. "ldml": `
  121. // LDML is the top-level type for locale-specific data.
  122. `,
  123. "collation": `
  124. // Collation contains rules that specify a certain sort-order,
  125. // as a tailoring of the root order.
  126. // The parsed rules are obtained by passing a RuleProcessor to Collation's
  127. // Process method.
  128. `,
  129. "calendar": `
  130. // Calendar specifies the fields used for formatting and parsing dates and times.
  131. // The month and quarter names are identified numerically, starting at 1.
  132. // The day (of the week) names are identified with short strings, since there is
  133. // no universally-accepted numeric designation.
  134. `,
  135. "dates": `
  136. // Dates contains information regarding the format and parsing of dates and times.
  137. `,
  138. "localeDisplayNames": `
  139. // LocaleDisplayNames specifies localized display names for scripts, languages,
  140. // countries, currencies, and variants.
  141. `,
  142. "numbers": `
  143. // Numbers supplies information for formatting and parsing numbers and currencies.
  144. `,
  145. }
  146. type element struct {
  147. name string // XML element name
  148. category string // elements contained by this element
  149. signature string // category + attrKey*
  150. attr []*attribute // attributes supported by this element.
  151. sub []struct { // parsed and evaluated sub elements of this element.
  152. e *element
  153. repeat bool // true if the element needs to be a slice
  154. }
  155. resolved bool // prevent multiple resolutions of this element.
  156. }
  157. type attribute struct {
  158. name string
  159. key string
  160. list []string
  161. tag string // Go tag
  162. }
  163. var (
  164. reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
  165. reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
  166. reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
  167. reToken = regexp.MustCompile(`\w\-`)
  168. )
  169. // builder is used to read in the DTD files from CLDR and generate Go code
  170. // to be used with the encoding/xml package.
  171. type builder struct {
  172. w io.Writer
  173. index map[string]*element
  174. elem []*element
  175. info dtd
  176. version string
  177. }
  178. func makeBuilder(w io.Writer, d dtd) builder {
  179. return builder{
  180. w: w,
  181. index: make(map[string]*element),
  182. elem: []*element{},
  183. info: d,
  184. }
  185. }
  186. // parseDTD parses a DTD file.
  187. func (b *builder) parseDTD(r io.Reader) {
  188. for d := xml.NewDecoder(r); ; {
  189. t, err := d.Token()
  190. if t == nil {
  191. break
  192. }
  193. failOnError(err)
  194. dir, ok := t.(xml.Directive)
  195. if !ok {
  196. continue
  197. }
  198. m := reHead.FindSubmatch(dir)
  199. dir = dir[len(m[0]):]
  200. ename := string(m[2])
  201. el, elementFound := b.index[ename]
  202. switch string(m[1]) {
  203. case "ELEMENT":
  204. if elementFound {
  205. log.Fatal("parseDTD: duplicate entry for element %q", ename)
  206. }
  207. m := reElem.FindSubmatch(dir)
  208. if m == nil {
  209. log.Fatalf("parseDTD: invalid element %q", string(dir))
  210. }
  211. if len(m[0]) != len(dir) {
  212. log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
  213. }
  214. s := string(m[1])
  215. el = &element{
  216. name: ename,
  217. category: s,
  218. }
  219. b.index[ename] = el
  220. case "ATTLIST":
  221. if !elementFound {
  222. log.Fatalf("parseDTD: unknown element %q", ename)
  223. }
  224. s := string(dir)
  225. m := reAttr.FindStringSubmatch(s)
  226. if m == nil {
  227. log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
  228. }
  229. if m[4] == "FIXED" {
  230. b.version = m[5]
  231. } else {
  232. switch m[1] {
  233. case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
  234. case "type", "choice":
  235. default:
  236. el.attr = append(el.attr, &attribute{
  237. name: m[1],
  238. key: s,
  239. list: reToken.FindAllString(m[3], -1),
  240. })
  241. el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
  242. }
  243. }
  244. }
  245. }
  246. }
  247. var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
  248. // resolve takes a parsed element and converts it into structured data
  249. // that can be used to generate the XML code.
  250. func (b *builder) resolve(e *element) {
  251. if e.resolved {
  252. return
  253. }
  254. b.elem = append(b.elem, e)
  255. e.resolved = true
  256. s := e.category
  257. found := make(map[string]bool)
  258. sequenceStart := []int{}
  259. for len(s) > 0 {
  260. m := reCat.FindStringSubmatch(s)
  261. if m == nil {
  262. log.Fatalf("%s: invalid category string %q", e.name, s)
  263. }
  264. repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
  265. switch m[1] {
  266. case "":
  267. case "(":
  268. sequenceStart = append(sequenceStart, len(e.sub))
  269. case ")":
  270. if len(sequenceStart) == 0 {
  271. log.Fatalf("%s: unmatched closing parenthesis", e.name)
  272. }
  273. for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
  274. e.sub[i].repeat = e.sub[i].repeat || repeat
  275. }
  276. sequenceStart = sequenceStart[:len(sequenceStart)-1]
  277. default:
  278. if in(b.info.skipElem, m[1]) {
  279. } else if sub, ok := b.index[m[1]]; ok {
  280. if !found[sub.name] {
  281. e.sub = append(e.sub, struct {
  282. e *element
  283. repeat bool
  284. }{sub, repeat})
  285. found[sub.name] = true
  286. b.resolve(sub)
  287. }
  288. } else if m[1] == "#PCDATA" || m[1] == "ANY" {
  289. } else if m[1] != "EMPTY" {
  290. log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
  291. }
  292. }
  293. s = s[len(m[0]):]
  294. }
  295. }
  296. // return true if s is contained in set.
  297. func in(set []string, s string) bool {
  298. for _, v := range set {
  299. if v == s {
  300. return true
  301. }
  302. }
  303. return false
  304. }
  305. var repl = strings.NewReplacer("-", " ", "_", " ")
  306. // title puts the first character or each character following '_' in title case and
  307. // removes all occurrences of '_'.
  308. func title(s string) string {
  309. return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
  310. }
  311. // writeElem generates Go code for a single element, recursively.
  312. func (b *builder) writeElem(tab int, e *element) {
  313. p := func(f string, x ...interface{}) {
  314. f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
  315. fmt.Fprintf(b.w, f, x...)
  316. }
  317. if len(e.sub) == 0 && len(e.attr) == 0 {
  318. p("Common")
  319. return
  320. }
  321. p("struct {")
  322. tab++
  323. p("\nCommon")
  324. for _, attr := range e.attr {
  325. if !in(b.info.skipAttr, attr.name) {
  326. p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
  327. }
  328. }
  329. for _, sub := range e.sub {
  330. if in(b.info.predefined, sub.e.name) {
  331. p("\n%sElem", sub.e.name)
  332. continue
  333. }
  334. if in(b.info.skipElem, sub.e.name) {
  335. continue
  336. }
  337. p("\n%s ", title(sub.e.name))
  338. if sub.repeat {
  339. p("[]")
  340. }
  341. p("*")
  342. if in(b.info.top, sub.e.name) {
  343. p(title(sub.e.name))
  344. } else {
  345. b.writeElem(tab, sub.e)
  346. }
  347. p(" `xml:\"%s\"`", sub.e.name)
  348. }
  349. tab--
  350. p("\n}")
  351. }
  352. // write generates the Go XML code.
  353. func (b *builder) write() {
  354. for i, name := range b.info.top {
  355. e := b.index[name]
  356. if e != nil {
  357. fmt.Fprintf(b.w, comments[name])
  358. name := title(e.name)
  359. if i == 0 {
  360. name = b.info.root
  361. }
  362. fmt.Fprintf(b.w, "type %s ", name)
  363. b.writeElem(0, e)
  364. fmt.Fprint(b.w, "\n")
  365. }
  366. }
  367. }