You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

82 lines
2.7 KiB

  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package ucd_test
  5. import (
  6. "fmt"
  7. "strings"
  8. "golang.org/x/text/internal/ucd"
  9. )
  10. func Example() {
  11. // Read rune-by-rune from UnicodeData.
  12. var count int
  13. p := ucd.New(strings.NewReader(unicodeData))
  14. for p.Next() {
  15. count++
  16. if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
  17. fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
  18. }
  19. }
  20. if err := p.Err(); err != nil {
  21. fmt.Println(err)
  22. }
  23. fmt.Println("Number of runes visited:", count)
  24. // Read raw ranges from Scripts.
  25. p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
  26. for p.Next() {
  27. start, end := p.Range(0)
  28. fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
  29. }
  30. if err := p.Err(); err != nil {
  31. fmt.Println(err)
  32. }
  33. // Output:
  34. // lower(U+00C0) -> U+00E0
  35. // lower(U+00C1) -> U+00E1
  36. // lower(U+00C2) -> U+00E2
  37. // lower(U+00C3) -> U+00E3
  38. // lower(U+00C4) -> U+00E4
  39. // Number of runes visited: 6594
  40. // 0000..001F: Common
  41. // 0020..0020: Common
  42. // 0021..0023: Common
  43. // 0024..0024: Common
  44. }
  45. // Excerpt from UnicodeData.txt
  46. const unicodeData = `
  47. 00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;
  48. 00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;;
  49. 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;;
  50. 00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;;
  51. 00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;;
  52. 00BE;VULGAR FRACTION THREE QUARTERS;No;0;ON;<fraction> 0033 2044 0034;;;3/4;N;FRACTION THREE QUARTERS;;;;
  53. 00BF;INVERTED QUESTION MARK;Po;0;ON;;;;;N;;;;;
  54. 00C0;LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
  55. 00C1;LATIN CAPITAL LETTER A WITH ACUTE;Lu;0;L;0041 0301;;;;N;LATIN CAPITAL LETTER A ACUTE;;;00E1;
  56. 00C2;LATIN CAPITAL LETTER A WITH CIRCUMFLEX;Lu;0;L;0041 0302;;;;N;LATIN CAPITAL LETTER A CIRCUMFLEX;;;00E2;
  57. 00C3;LATIN CAPITAL LETTER A WITH TILDE;Lu;0;L;0041 0303;;;;N;LATIN CAPITAL LETTER A TILDE;;;00E3;
  58. 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0041 0308;;;;N;LATIN CAPITAL LETTER A DIAERESIS;;;00E4;
  59. # A legacy rune range.
  60. 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
  61. 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
  62. `
  63. // Excerpt from Scripts.txt
  64. const scripts = `
  65. # Property: Script
  66. # ================================================
  67. 0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
  68. 0020 ; Common # Zs SPACE
  69. 0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
  70. 0024 ; Common # Sc DOLLAR SIGN
  71. `