選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

gen.go 12 KiB

8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
8年前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. //go:generate go run gen.go
  6. //go:generate go run gen.go -test
  7. package main
  8. import (
  9. "bytes"
  10. "flag"
  11. "fmt"
  12. "go/format"
  13. "io/ioutil"
  14. "math/rand"
  15. "os"
  16. "sort"
  17. "strings"
  18. )
  19. // identifier converts s to a Go exported identifier.
  20. // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
  21. func identifier(s string) string {
  22. b := make([]byte, 0, len(s))
  23. cap := true
  24. for _, c := range s {
  25. if c == '-' {
  26. cap = true
  27. continue
  28. }
  29. if cap && 'a' <= c && c <= 'z' {
  30. c -= 'a' - 'A'
  31. }
  32. cap = false
  33. b = append(b, byte(c))
  34. }
  35. return string(b)
  36. }
  37. var test = flag.Bool("test", false, "generate table_test.go")
  38. func genFile(name string, buf *bytes.Buffer) {
  39. b, err := format.Source(buf.Bytes())
  40. if err != nil {
  41. fmt.Fprintln(os.Stderr, err)
  42. os.Exit(1)
  43. }
  44. if err := ioutil.WriteFile(name, b, 0644); err != nil {
  45. fmt.Fprintln(os.Stderr, err)
  46. os.Exit(1)
  47. }
  48. }
  49. func main() {
  50. flag.Parse()
  51. var all []string
  52. all = append(all, elements...)
  53. all = append(all, attributes...)
  54. all = append(all, eventHandlers...)
  55. all = append(all, extra...)
  56. sort.Strings(all)
  57. // uniq - lists have dups
  58. w := 0
  59. for _, s := range all {
  60. if w == 0 || all[w-1] != s {
  61. all[w] = s
  62. w++
  63. }
  64. }
  65. all = all[:w]
  66. if *test {
  67. var buf bytes.Buffer
  68. fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
  69. fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
  70. fmt.Fprintln(&buf, "package atom\n")
  71. fmt.Fprintln(&buf, "var testAtomList = []string{")
  72. for _, s := range all {
  73. fmt.Fprintf(&buf, "\t%q,\n", s)
  74. }
  75. fmt.Fprintln(&buf, "}")
  76. genFile("table_test.go", &buf)
  77. return
  78. }
  79. // Find hash that minimizes table size.
  80. var best *table
  81. for i := 0; i < 1000000; i++ {
  82. if best != nil && 1<<(best.k-1) < len(all) {
  83. break
  84. }
  85. h := rand.Uint32()
  86. for k := uint(0); k <= 16; k++ {
  87. if best != nil && k >= best.k {
  88. break
  89. }
  90. var t table
  91. if t.init(h, k, all) {
  92. best = &t
  93. break
  94. }
  95. }
  96. }
  97. if best == nil {
  98. fmt.Fprintf(os.Stderr, "failed to construct string table\n")
  99. os.Exit(1)
  100. }
  101. // Lay out strings, using overlaps when possible.
  102. layout := append([]string{}, all...)
  103. // Remove strings that are substrings of other strings
  104. for changed := true; changed; {
  105. changed = false
  106. for i, s := range layout {
  107. if s == "" {
  108. continue
  109. }
  110. for j, t := range layout {
  111. if i != j && t != "" && strings.Contains(s, t) {
  112. changed = true
  113. layout[j] = ""
  114. }
  115. }
  116. }
  117. }
  118. // Join strings where one suffix matches another prefix.
  119. for {
  120. // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
  121. // maximizing overlap length k.
  122. besti := -1
  123. bestj := -1
  124. bestk := 0
  125. for i, s := range layout {
  126. if s == "" {
  127. continue
  128. }
  129. for j, t := range layout {
  130. if i == j {
  131. continue
  132. }
  133. for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
  134. if s[len(s)-k:] == t[:k] {
  135. besti = i
  136. bestj = j
  137. bestk = k
  138. }
  139. }
  140. }
  141. }
  142. if bestk > 0 {
  143. layout[besti] += layout[bestj][bestk:]
  144. layout[bestj] = ""
  145. continue
  146. }
  147. break
  148. }
  149. text := strings.Join(layout, "")
  150. atom := map[string]uint32{}
  151. for _, s := range all {
  152. off := strings.Index(text, s)
  153. if off < 0 {
  154. panic("lost string " + s)
  155. }
  156. atom[s] = uint32(off<<8 | len(s))
  157. }
  158. var buf bytes.Buffer
  159. // Generate the Go code.
  160. fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
  161. fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
  162. fmt.Fprintln(&buf, "package atom\n\nconst (")
  163. // compute max len
  164. maxLen := 0
  165. for _, s := range all {
  166. if maxLen < len(s) {
  167. maxLen = len(s)
  168. }
  169. fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
  170. }
  171. fmt.Fprintln(&buf, ")\n")
  172. fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
  173. fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
  174. fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
  175. for i, s := range best.tab {
  176. if s == "" {
  177. continue
  178. }
  179. fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
  180. }
  181. fmt.Fprintf(&buf, "}\n")
  182. datasize := (1 << best.k) * 4
  183. fmt.Fprintln(&buf, "const atomText =")
  184. textsize := len(text)
  185. for len(text) > 60 {
  186. fmt.Fprintf(&buf, "\t%q +\n", text[:60])
  187. text = text[60:]
  188. }
  189. fmt.Fprintf(&buf, "\t%q\n\n", text)
  190. genFile("table.go", &buf)
  191. fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
  192. }
  193. type byLen []string
  194. func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
  195. func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  196. func (x byLen) Len() int { return len(x) }
  197. // fnv computes the FNV hash with an arbitrary starting value h.
  198. func fnv(h uint32, s string) uint32 {
  199. for i := 0; i < len(s); i++ {
  200. h ^= uint32(s[i])
  201. h *= 16777619
  202. }
  203. return h
  204. }
  205. // A table represents an attempt at constructing the lookup table.
  206. // The lookup table uses cuckoo hashing, meaning that each string
  207. // can be found in one of two positions.
  208. type table struct {
  209. h0 uint32
  210. k uint
  211. mask uint32
  212. tab []string
  213. }
  214. // hash returns the two hashes for s.
  215. func (t *table) hash(s string) (h1, h2 uint32) {
  216. h := fnv(t.h0, s)
  217. h1 = h & t.mask
  218. h2 = (h >> 16) & t.mask
  219. return
  220. }
  221. // init initializes the table with the given parameters.
  222. // h0 is the initial hash value,
  223. // k is the number of bits of hash value to use, and
  224. // x is the list of strings to store in the table.
  225. // init returns false if the table cannot be constructed.
  226. func (t *table) init(h0 uint32, k uint, x []string) bool {
  227. t.h0 = h0
  228. t.k = k
  229. t.tab = make([]string, 1<<k)
  230. t.mask = 1<<k - 1
  231. for _, s := range x {
  232. if !t.insert(s) {
  233. return false
  234. }
  235. }
  236. return true
  237. }
  238. // insert inserts s in the table.
  239. func (t *table) insert(s string) bool {
  240. h1, h2 := t.hash(s)
  241. if t.tab[h1] == "" {
  242. t.tab[h1] = s
  243. return true
  244. }
  245. if t.tab[h2] == "" {
  246. t.tab[h2] = s
  247. return true
  248. }
  249. if t.push(h1, 0) {
  250. t.tab[h1] = s
  251. return true
  252. }
  253. if t.push(h2, 0) {
  254. t.tab[h2] = s
  255. return true
  256. }
  257. return false
  258. }
  259. // push attempts to push aside the entry in slot i.
  260. func (t *table) push(i uint32, depth int) bool {
  261. if depth > len(t.tab) {
  262. return false
  263. }
  264. s := t.tab[i]
  265. h1, h2 := t.hash(s)
  266. j := h1 + h2 - i
  267. if t.tab[j] != "" && !t.push(j, depth+1) {
  268. return false
  269. }
  270. t.tab[j] = s
  271. return true
  272. }
  273. // The lists of element names and attribute keys were taken from
  274. // https://html.spec.whatwg.org/multipage/indices.html#index
  275. // as of the "HTML Living Standard - Last Updated 16 April 2018" version.
  276. // "command", "keygen" and "menuitem" have been removed from the spec,
  277. // but are kept here for backwards compatibility.
  278. var elements = []string{
  279. "a",
  280. "abbr",
  281. "address",
  282. "area",
  283. "article",
  284. "aside",
  285. "audio",
  286. "b",
  287. "base",
  288. "bdi",
  289. "bdo",
  290. "blockquote",
  291. "body",
  292. "br",
  293. "button",
  294. "canvas",
  295. "caption",
  296. "cite",
  297. "code",
  298. "col",
  299. "colgroup",
  300. "command",
  301. "data",
  302. "datalist",
  303. "dd",
  304. "del",
  305. "details",
  306. "dfn",
  307. "dialog",
  308. "div",
  309. "dl",
  310. "dt",
  311. "em",
  312. "embed",
  313. "fieldset",
  314. "figcaption",
  315. "figure",
  316. "footer",
  317. "form",
  318. "h1",
  319. "h2",
  320. "h3",
  321. "h4",
  322. "h5",
  323. "h6",
  324. "head",
  325. "header",
  326. "hgroup",
  327. "hr",
  328. "html",
  329. "i",
  330. "iframe",
  331. "img",
  332. "input",
  333. "ins",
  334. "kbd",
  335. "keygen",
  336. "label",
  337. "legend",
  338. "li",
  339. "link",
  340. "main",
  341. "map",
  342. "mark",
  343. "menu",
  344. "menuitem",
  345. "meta",
  346. "meter",
  347. "nav",
  348. "noscript",
  349. "object",
  350. "ol",
  351. "optgroup",
  352. "option",
  353. "output",
  354. "p",
  355. "param",
  356. "picture",
  357. "pre",
  358. "progress",
  359. "q",
  360. "rp",
  361. "rt",
  362. "ruby",
  363. "s",
  364. "samp",
  365. "script",
  366. "section",
  367. "select",
  368. "slot",
  369. "small",
  370. "source",
  371. "span",
  372. "strong",
  373. "style",
  374. "sub",
  375. "summary",
  376. "sup",
  377. "table",
  378. "tbody",
  379. "td",
  380. "template",
  381. "textarea",
  382. "tfoot",
  383. "th",
  384. "thead",
  385. "time",
  386. "title",
  387. "tr",
  388. "track",
  389. "u",
  390. "ul",
  391. "var",
  392. "video",
  393. "wbr",
  394. }
  395. // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
  396. //
  397. // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
  398. // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
  399. // but are kept here for backwards compatibility.
  400. var attributes = []string{
  401. "abbr",
  402. "accept",
  403. "accept-charset",
  404. "accesskey",
  405. "action",
  406. "allowfullscreen",
  407. "allowpaymentrequest",
  408. "allowusermedia",
  409. "alt",
  410. "as",
  411. "async",
  412. "autocomplete",
  413. "autofocus",
  414. "autoplay",
  415. "challenge",
  416. "charset",
  417. "checked",
  418. "cite",
  419. "class",
  420. "color",
  421. "cols",
  422. "colspan",
  423. "command",
  424. "content",
  425. "contenteditable",
  426. "contextmenu",
  427. "controls",
  428. "coords",
  429. "crossorigin",
  430. "data",
  431. "datetime",
  432. "default",
  433. "defer",
  434. "dir",
  435. "dirname",
  436. "disabled",
  437. "download",
  438. "draggable",
  439. "dropzone",
  440. "enctype",
  441. "for",
  442. "form",
  443. "formaction",
  444. "formenctype",
  445. "formmethod",
  446. "formnovalidate",
  447. "formtarget",
  448. "headers",
  449. "height",
  450. "hidden",
  451. "high",
  452. "href",
  453. "hreflang",
  454. "http-equiv",
  455. "icon",
  456. "id",
  457. "inputmode",
  458. "integrity",
  459. "is",
  460. "ismap",
  461. "itemid",
  462. "itemprop",
  463. "itemref",
  464. "itemscope",
  465. "itemtype",
  466. "keytype",
  467. "kind",
  468. "label",
  469. "lang",
  470. "list",
  471. "loop",
  472. "low",
  473. "manifest",
  474. "max",
  475. "maxlength",
  476. "media",
  477. "mediagroup",
  478. "method",
  479. "min",
  480. "minlength",
  481. "multiple",
  482. "muted",
  483. "name",
  484. "nomodule",
  485. "nonce",
  486. "novalidate",
  487. "open",
  488. "optimum",
  489. "pattern",
  490. "ping",
  491. "placeholder",
  492. "playsinline",
  493. "poster",
  494. "preload",
  495. "radiogroup",
  496. "readonly",
  497. "referrerpolicy",
  498. "rel",
  499. "required",
  500. "reversed",
  501. "rows",
  502. "rowspan",
  503. "sandbox",
  504. "spellcheck",
  505. "scope",
  506. "scoped",
  507. "seamless",
  508. "selected",
  509. "shape",
  510. "size",
  511. "sizes",
  512. "sortable",
  513. "sorted",
  514. "slot",
  515. "span",
  516. "spellcheck",
  517. "src",
  518. "srcdoc",
  519. "srclang",
  520. "srcset",
  521. "start",
  522. "step",
  523. "style",
  524. "tabindex",
  525. "target",
  526. "title",
  527. "translate",
  528. "type",
  529. "typemustmatch",
  530. "updateviacache",
  531. "usemap",
  532. "value",
  533. "width",
  534. "workertype",
  535. "wrap",
  536. }
  537. // "onautocomplete", "onautocompleteerror", "onmousewheel",
  538. // "onshow" and "onsort" have been removed from the spec,
  539. // but are kept here for backwards compatibility.
  540. var eventHandlers = []string{
  541. "onabort",
  542. "onautocomplete",
  543. "onautocompleteerror",
  544. "onauxclick",
  545. "onafterprint",
  546. "onbeforeprint",
  547. "onbeforeunload",
  548. "onblur",
  549. "oncancel",
  550. "oncanplay",
  551. "oncanplaythrough",
  552. "onchange",
  553. "onclick",
  554. "onclose",
  555. "oncontextmenu",
  556. "oncopy",
  557. "oncuechange",
  558. "oncut",
  559. "ondblclick",
  560. "ondrag",
  561. "ondragend",
  562. "ondragenter",
  563. "ondragexit",
  564. "ondragleave",
  565. "ondragover",
  566. "ondragstart",
  567. "ondrop",
  568. "ondurationchange",
  569. "onemptied",
  570. "onended",
  571. "onerror",
  572. "onfocus",
  573. "onhashchange",
  574. "oninput",
  575. "oninvalid",
  576. "onkeydown",
  577. "onkeypress",
  578. "onkeyup",
  579. "onlanguagechange",
  580. "onload",
  581. "onloadeddata",
  582. "onloadedmetadata",
  583. "onloadend",
  584. "onloadstart",
  585. "onmessage",
  586. "onmessageerror",
  587. "onmousedown",
  588. "onmouseenter",
  589. "onmouseleave",
  590. "onmousemove",
  591. "onmouseout",
  592. "onmouseover",
  593. "onmouseup",
  594. "onmousewheel",
  595. "onwheel",
  596. "onoffline",
  597. "ononline",
  598. "onpagehide",
  599. "onpageshow",
  600. "onpaste",
  601. "onpause",
  602. "onplay",
  603. "onplaying",
  604. "onpopstate",
  605. "onprogress",
  606. "onratechange",
  607. "onreset",
  608. "onresize",
  609. "onrejectionhandled",
  610. "onscroll",
  611. "onsecuritypolicyviolation",
  612. "onseeked",
  613. "onseeking",
  614. "onselect",
  615. "onshow",
  616. "onsort",
  617. "onstalled",
  618. "onstorage",
  619. "onsubmit",
  620. "onsuspend",
  621. "ontimeupdate",
  622. "ontoggle",
  623. "onunhandledrejection",
  624. "onunload",
  625. "onvolumechange",
  626. "onwaiting",
  627. }
  628. // extra are ad-hoc values not covered by any of the lists above.
  629. var extra = []string{
  630. "acronym",
  631. "align",
  632. "annotation",
  633. "annotation-xml",
  634. "applet",
  635. "basefont",
  636. "bgsound",
  637. "big",
  638. "blink",
  639. "center",
  640. "color",
  641. "desc",
  642. "face",
  643. "font",
  644. "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
  645. "foreignobject",
  646. "frame",
  647. "frameset",
  648. "image",
  649. "isindex",
  650. "listing",
  651. "malignmark",
  652. "marquee",
  653. "math",
  654. "mglyph",
  655. "mi",
  656. "mn",
  657. "mo",
  658. "ms",
  659. "mtext",
  660. "nobr",
  661. "noembed",
  662. "noframes",
  663. "plaintext",
  664. "prompt",
  665. "public",
  666. "rb",
  667. "rtc",
  668. "spacer",
  669. "strike",
  670. "svg",
  671. "system",
  672. "tt",
  673. "xmp",
  674. }