You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1059 lines
29 KiB

  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package bidi
  5. import "log"
  6. // This implementation is a port based on the reference implementation found at:
  7. // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
  8. //
  9. // described in Unicode Bidirectional Algorithm (UAX #9).
  10. //
  11. // Input:
  12. // There are two levels of input to the algorithm, since clients may prefer to
  13. // supply some information from out-of-band sources rather than relying on the
  14. // default behavior.
  15. //
  16. // - Bidi class array
  17. // - Bidi class array, with externally supplied base line direction
  18. //
  19. // Output:
  20. // Output is separated into several stages:
  21. //
  22. // - levels array over entire paragraph
  23. // - reordering array over entire paragraph
  24. // - levels array over line
  25. // - reordering array over line
  26. //
  27. // Note that for conformance to the Unicode Bidirectional Algorithm,
  28. // implementations are only required to generate correct reordering and
  29. // character directionality (odd or even levels) over a line. Generating
  30. // identical level arrays over a line is not required. Bidi explicit format
  31. // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
  32. // positions as long as the rest of the input is properly reordered.
  33. //
  34. // As the algorithm is defined to operate on a single paragraph at a time, this
  35. // implementation is written to handle single paragraphs. Thus rule P1 is
  36. // presumed by this implementation-- the data provided to the implementation is
  37. // assumed to be a single paragraph, and either contains no 'B' codes, or a
  38. // single 'B' code at the end of the input. 'B' is allowed as input to
  39. // illustrate how the algorithm assigns it a level.
  40. //
  41. // Also note that rules L3 and L4 depend on the rendering engine that uses the
  42. // result of the bidi algorithm. This implementation assumes that the rendering
  43. // engine expects combining marks in visual order (e.g. to the left of their
  44. // base character in RTL runs) and that it adjusts the glyphs used to render
  45. // mirrored characters that are in RTL runs so that they render appropriately.
  46. // level is the embedding level of a character. Even embedding levels indicate
  47. // left-to-right order and odd levels indicate right-to-left order. The special
  48. // level of -1 is reserved for undefined order.
  49. type level int8
  50. const implicitLevel level = -1
  51. // in returns if x is equal to any of the values in set.
  52. func (c Class) in(set ...Class) bool {
  53. for _, s := range set {
  54. if c == s {
  55. return true
  56. }
  57. }
  58. return false
  59. }
  60. // A paragraph contains the state of a paragraph.
  61. type paragraph struct {
  62. initialTypes []Class
  63. // Arrays of properties needed for paired bracket evaluation in N0
  64. pairTypes []bracketType // paired Bracket types for paragraph
  65. pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
  66. embeddingLevel level // default: = implicitLevel;
  67. // at the paragraph levels
  68. resultTypes []Class
  69. resultLevels []level
  70. // Index of matching PDI for isolate initiator characters. For other
  71. // characters, the value of matchingPDI will be set to -1. For isolate
  72. // initiators with no matching PDI, matchingPDI will be set to the length of
  73. // the input string.
  74. matchingPDI []int
  75. // Index of matching isolate initiator for PDI characters. For other
  76. // characters, and for PDIs with no matching isolate initiator, the value of
  77. // matchingIsolateInitiator will be set to -1.
  78. matchingIsolateInitiator []int
  79. }
  80. // newParagraph initializes a paragraph. The user needs to supply a few arrays
  81. // corresponding to the preprocessed text input. The types correspond to the
  82. // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
  83. // each rune. pairValues provides a unique bracket class identifier for each
  84. // rune (suggested is the rune of the open bracket for opening and matching
  85. // close brackets, after normalization). The embedding levels are optional, but
  86. // may be supplied to encode embedding levels of styled text.
  87. //
  88. // TODO: return an error.
  89. func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
  90. validateTypes(types)
  91. validatePbTypes(pairTypes)
  92. validatePbValues(pairValues, pairTypes)
  93. validateParagraphEmbeddingLevel(levels)
  94. p := &paragraph{
  95. initialTypes: append([]Class(nil), types...),
  96. embeddingLevel: levels,
  97. pairTypes: pairTypes,
  98. pairValues: pairValues,
  99. resultTypes: append([]Class(nil), types...),
  100. }
  101. p.run()
  102. return p
  103. }
  104. func (p *paragraph) Len() int { return len(p.initialTypes) }
  105. // The algorithm. Does not include line-based processing (Rules L1, L2).
  106. // These are applied later in the line-based phase of the algorithm.
  107. func (p *paragraph) run() {
  108. p.determineMatchingIsolates()
  109. // 1) determining the paragraph level
  110. // Rule P1 is the requirement for entering this algorithm.
  111. // Rules P2, P3.
  112. // If no externally supplied paragraph embedding level, use default.
  113. if p.embeddingLevel == implicitLevel {
  114. p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
  115. }
  116. // Initialize result levels to paragraph embedding level.
  117. p.resultLevels = make([]level, p.Len())
  118. setLevels(p.resultLevels, p.embeddingLevel)
  119. // 2) Explicit levels and directions
  120. // Rules X1-X8.
  121. p.determineExplicitEmbeddingLevels()
  122. // Rule X9.
  123. // We do not remove the embeddings, the overrides, the PDFs, and the BNs
  124. // from the string explicitly. But they are not copied into isolating run
  125. // sequences when they are created, so they are removed for all
  126. // practical purposes.
  127. // Rule X10.
  128. // Run remainder of algorithm one isolating run sequence at a time
  129. for _, seq := range p.determineIsolatingRunSequences() {
  130. // 3) resolving weak types
  131. // Rules W1-W7.
  132. seq.resolveWeakTypes()
  133. // 4a) resolving paired brackets
  134. // Rule N0
  135. resolvePairedBrackets(seq)
  136. // 4b) resolving neutral types
  137. // Rules N1-N3.
  138. seq.resolveNeutralTypes()
  139. // 5) resolving implicit embedding levels
  140. // Rules I1, I2.
  141. seq.resolveImplicitLevels()
  142. // Apply the computed levels and types
  143. seq.applyLevelsAndTypes()
  144. }
  145. // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
  146. // BNs. This is for convenience, so the resulting level array will have
  147. // a value for every character.
  148. p.assignLevelsToCharactersRemovedByX9()
  149. }
  150. // determineMatchingIsolates determines the matching PDI for each isolate
  151. // initiator and vice versa.
  152. //
  153. // Definition BD9.
  154. //
  155. // At the end of this function:
  156. //
  157. // - The member variable matchingPDI is set to point to the index of the
  158. // matching PDI character for each isolate initiator character. If there is
  159. // no matching PDI, it is set to the length of the input text. For other
  160. // characters, it is set to -1.
  161. // - The member variable matchingIsolateInitiator is set to point to the
  162. // index of the matching isolate initiator character for each PDI character.
  163. // If there is no matching isolate initiator, or the character is not a PDI,
  164. // it is set to -1.
  165. func (p *paragraph) determineMatchingIsolates() {
  166. p.matchingPDI = make([]int, p.Len())
  167. p.matchingIsolateInitiator = make([]int, p.Len())
  168. for i := range p.matchingIsolateInitiator {
  169. p.matchingIsolateInitiator[i] = -1
  170. }
  171. for i := range p.matchingPDI {
  172. p.matchingPDI[i] = -1
  173. if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
  174. depthCounter := 1
  175. for j := i + 1; j < p.Len(); j++ {
  176. if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
  177. depthCounter++
  178. } else if u == PDI {
  179. if depthCounter--; depthCounter == 0 {
  180. p.matchingPDI[i] = j
  181. p.matchingIsolateInitiator[j] = i
  182. break
  183. }
  184. }
  185. }
  186. if p.matchingPDI[i] == -1 {
  187. p.matchingPDI[i] = p.Len()
  188. }
  189. }
  190. }
  191. }
  192. // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
  193. // the substring limited by the given range [start, end).
  194. //
  195. // Determines the paragraph level based on rules P2, P3. This is also used
  196. // in rule X5c to find if an FSI should resolve to LRI or RLI.
  197. func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
  198. var strongType Class = unknownClass
  199. // Rule P2.
  200. for i := start; i < end; i++ {
  201. if t := p.resultTypes[i]; t.in(L, AL, R) {
  202. strongType = t
  203. break
  204. } else if t.in(FSI, LRI, RLI) {
  205. i = p.matchingPDI[i] // skip over to the matching PDI
  206. if i > end {
  207. log.Panic("assert (i <= end)")
  208. }
  209. }
  210. }
  211. // Rule P3.
  212. switch strongType {
  213. case unknownClass: // none found
  214. // default embedding level when no strong types found is 0.
  215. return 0
  216. case L:
  217. return 0
  218. default: // AL, R
  219. return 1
  220. }
  221. }
  222. const maxDepth = 125
  223. // This stack will store the embedding levels and override and isolated
  224. // statuses
  225. type directionalStatusStack struct {
  226. stackCounter int
  227. embeddingLevelStack [maxDepth + 1]level
  228. overrideStatusStack [maxDepth + 1]Class
  229. isolateStatusStack [maxDepth + 1]bool
  230. }
  231. func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
  232. func (s *directionalStatusStack) pop() { s.stackCounter-- }
  233. func (s *directionalStatusStack) depth() int { return s.stackCounter }
  234. func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
  235. s.embeddingLevelStack[s.stackCounter] = level
  236. s.overrideStatusStack[s.stackCounter] = overrideStatus
  237. s.isolateStatusStack[s.stackCounter] = isolateStatus
  238. s.stackCounter++
  239. }
  240. func (s *directionalStatusStack) lastEmbeddingLevel() level {
  241. return s.embeddingLevelStack[s.stackCounter-1]
  242. }
  243. func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
  244. return s.overrideStatusStack[s.stackCounter-1]
  245. }
  246. func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
  247. return s.isolateStatusStack[s.stackCounter-1]
  248. }
  249. // Determine explicit levels using rules X1 - X8
  250. func (p *paragraph) determineExplicitEmbeddingLevels() {
  251. var stack directionalStatusStack
  252. var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
  253. // Rule X1.
  254. stack.push(p.embeddingLevel, ON, false)
  255. for i, t := range p.resultTypes {
  256. // Rules X2, X3, X4, X5, X5a, X5b, X5c
  257. switch t {
  258. case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
  259. isIsolate := t.in(RLI, LRI, FSI)
  260. isRTL := t.in(RLE, RLO, RLI)
  261. // override if this is an FSI that resolves to RLI
  262. if t == FSI {
  263. isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
  264. }
  265. if isIsolate {
  266. p.resultLevels[i] = stack.lastEmbeddingLevel()
  267. if stack.lastDirectionalOverrideStatus() != ON {
  268. p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
  269. }
  270. }
  271. var newLevel level
  272. if isRTL {
  273. // least greater odd
  274. newLevel = (stack.lastEmbeddingLevel() + 1) | 1
  275. } else {
  276. // least greater even
  277. newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
  278. }
  279. if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
  280. if isIsolate {
  281. validIsolateCount++
  282. }
  283. // Push new embedding level, override status, and isolated
  284. // status.
  285. // No check for valid stack counter, since the level check
  286. // suffices.
  287. switch t {
  288. case LRO:
  289. stack.push(newLevel, L, isIsolate)
  290. case RLO:
  291. stack.push(newLevel, R, isIsolate)
  292. default:
  293. stack.push(newLevel, ON, isIsolate)
  294. }
  295. // Not really part of the spec
  296. if !isIsolate {
  297. p.resultLevels[i] = newLevel
  298. }
  299. } else {
  300. // This is an invalid explicit formatting character,
  301. // so apply the "Otherwise" part of rules X2-X5b.
  302. if isIsolate {
  303. overflowIsolateCount++
  304. } else { // !isIsolate
  305. if overflowIsolateCount == 0 {
  306. overflowEmbeddingCount++
  307. }
  308. }
  309. }
  310. // Rule X6a
  311. case PDI:
  312. if overflowIsolateCount > 0 {
  313. overflowIsolateCount--
  314. } else if validIsolateCount == 0 {
  315. // do nothing
  316. } else {
  317. overflowEmbeddingCount = 0
  318. for !stack.lastDirectionalIsolateStatus() {
  319. stack.pop()
  320. }
  321. stack.pop()
  322. validIsolateCount--
  323. }
  324. p.resultLevels[i] = stack.lastEmbeddingLevel()
  325. // Rule X7
  326. case PDF:
  327. // Not really part of the spec
  328. p.resultLevels[i] = stack.lastEmbeddingLevel()
  329. if overflowIsolateCount > 0 {
  330. // do nothing
  331. } else if overflowEmbeddingCount > 0 {
  332. overflowEmbeddingCount--
  333. } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
  334. stack.pop()
  335. }
  336. case B: // paragraph separator.
  337. // Rule X8.
  338. // These values are reset for clarity, in this implementation B
  339. // can only occur as the last code in the array.
  340. stack.empty()
  341. overflowIsolateCount = 0
  342. overflowEmbeddingCount = 0
  343. validIsolateCount = 0
  344. p.resultLevels[i] = p.embeddingLevel
  345. default:
  346. p.resultLevels[i] = stack.lastEmbeddingLevel()
  347. if stack.lastDirectionalOverrideStatus() != ON {
  348. p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
  349. }
  350. }
  351. }
  352. }
  353. type isolatingRunSequence struct {
  354. p *paragraph
  355. indexes []int // indexes to the original string
  356. types []Class // type of each character using the index
  357. resolvedLevels []level // resolved levels after application of rules
  358. level level
  359. sos, eos Class
  360. }
  361. func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
  362. func maxLevel(a, b level) level {
  363. if a > b {
  364. return a
  365. }
  366. return b
  367. }
  368. // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
  369. // either L or R, for each isolating run sequence.
  370. func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
  371. length := len(indexes)
  372. types := make([]Class, length)
  373. for i, x := range indexes {
  374. types[i] = p.resultTypes[x]
  375. }
  376. // assign level, sos and eos
  377. prevChar := indexes[0] - 1
  378. for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
  379. prevChar--
  380. }
  381. prevLevel := p.embeddingLevel
  382. if prevChar >= 0 {
  383. prevLevel = p.resultLevels[prevChar]
  384. }
  385. var succLevel level
  386. lastType := types[length-1]
  387. if lastType.in(LRI, RLI, FSI) {
  388. succLevel = p.embeddingLevel
  389. } else {
  390. // the first character after the end of run sequence
  391. limit := indexes[length-1] + 1
  392. for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
  393. }
  394. succLevel = p.embeddingLevel
  395. if limit < p.Len() {
  396. succLevel = p.resultLevels[limit]
  397. }
  398. }
  399. level := p.resultLevels[indexes[0]]
  400. return &isolatingRunSequence{
  401. p: p,
  402. indexes: indexes,
  403. types: types,
  404. level: level,
  405. sos: typeForLevel(maxLevel(prevLevel, level)),
  406. eos: typeForLevel(maxLevel(succLevel, level)),
  407. }
  408. }
  409. // Resolving weak types Rules W1-W7.
  410. //
  411. // Note that some weak types (EN, AN) remain after this processing is
  412. // complete.
  413. func (s *isolatingRunSequence) resolveWeakTypes() {
  414. // on entry, only these types remain
  415. s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
  416. // Rule W1.
  417. // Changes all NSMs.
  418. preceedingCharacterType := s.sos
  419. for i, t := range s.types {
  420. if t == NSM {
  421. s.types[i] = preceedingCharacterType
  422. } else {
  423. if t.in(LRI, RLI, FSI, PDI) {
  424. preceedingCharacterType = ON
  425. }
  426. preceedingCharacterType = t
  427. }
  428. }
  429. // Rule W2.
  430. // EN does not change at the start of the run, because sos != AL.
  431. for i, t := range s.types {
  432. if t == EN {
  433. for j := i - 1; j >= 0; j-- {
  434. if t := s.types[j]; t.in(L, R, AL) {
  435. if t == AL {
  436. s.types[i] = AN
  437. }
  438. break
  439. }
  440. }
  441. }
  442. }
  443. // Rule W3.
  444. for i, t := range s.types {
  445. if t == AL {
  446. s.types[i] = R
  447. }
  448. }
  449. // Rule W4.
  450. // Since there must be values on both sides for this rule to have an
  451. // effect, the scan skips the first and last value.
  452. //
  453. // Although the scan proceeds left to right, and changes the type
  454. // values in a way that would appear to affect the computations
  455. // later in the scan, there is actually no problem. A change in the
  456. // current value can only affect the value to its immediate right,
  457. // and only affect it if it is ES or CS. But the current value can
  458. // only change if the value to its right is not ES or CS. Thus
  459. // either the current value will not change, or its change will have
  460. // no effect on the remainder of the analysis.
  461. for i := 1; i < s.Len()-1; i++ {
  462. t := s.types[i]
  463. if t == ES || t == CS {
  464. prevSepType := s.types[i-1]
  465. succSepType := s.types[i+1]
  466. if prevSepType == EN && succSepType == EN {
  467. s.types[i] = EN
  468. } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
  469. s.types[i] = AN
  470. }
  471. }
  472. }
  473. // Rule W5.
  474. for i, t := range s.types {
  475. if t == ET {
  476. // locate end of sequence
  477. runStart := i
  478. runEnd := s.findRunLimit(runStart, ET)
  479. // check values at ends of sequence
  480. t := s.sos
  481. if runStart > 0 {
  482. t = s.types[runStart-1]
  483. }
  484. if t != EN {
  485. t = s.eos
  486. if runEnd < len(s.types) {
  487. t = s.types[runEnd]
  488. }
  489. }
  490. if t == EN {
  491. setTypes(s.types[runStart:runEnd], EN)
  492. }
  493. // continue at end of sequence
  494. i = runEnd
  495. }
  496. }
  497. // Rule W6.
  498. for i, t := range s.types {
  499. if t.in(ES, ET, CS) {
  500. s.types[i] = ON
  501. }
  502. }
  503. // Rule W7.
  504. for i, t := range s.types {
  505. if t == EN {
  506. // set default if we reach start of run
  507. prevStrongType := s.sos
  508. for j := i - 1; j >= 0; j-- {
  509. t = s.types[j]
  510. if t == L || t == R { // AL's have been changed to R
  511. prevStrongType = t
  512. break
  513. }
  514. }
  515. if prevStrongType == L {
  516. s.types[i] = L
  517. }
  518. }
  519. }
  520. }
  521. // 6) resolving neutral types Rules N1-N2.
  522. func (s *isolatingRunSequence) resolveNeutralTypes() {
  523. // on entry, only these types can be in resultTypes
  524. s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
  525. for i, t := range s.types {
  526. switch t {
  527. case WS, ON, B, S, RLI, LRI, FSI, PDI:
  528. // find bounds of run of neutrals
  529. runStart := i
  530. runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
  531. // determine effective types at ends of run
  532. var leadType, trailType Class
  533. // Note that the character found can only be L, R, AN, or
  534. // EN.
  535. if runStart == 0 {
  536. leadType = s.sos
  537. } else {
  538. leadType = s.types[runStart-1]
  539. if leadType.in(AN, EN) {
  540. leadType = R
  541. }
  542. }
  543. if runEnd == len(s.types) {
  544. trailType = s.eos
  545. } else {
  546. trailType = s.types[runEnd]
  547. if trailType.in(AN, EN) {
  548. trailType = R
  549. }
  550. }
  551. var resolvedType Class
  552. if leadType == trailType {
  553. // Rule N1.
  554. resolvedType = leadType
  555. } else {
  556. // Rule N2.
  557. // Notice the embedding level of the run is used, not
  558. // the paragraph embedding level.
  559. resolvedType = typeForLevel(s.level)
  560. }
  561. setTypes(s.types[runStart:runEnd], resolvedType)
  562. // skip over run of (former) neutrals
  563. i = runEnd
  564. }
  565. }
  566. }
  567. func setLevels(levels []level, newLevel level) {
  568. for i := range levels {
  569. levels[i] = newLevel
  570. }
  571. }
  572. func setTypes(types []Class, newType Class) {
  573. for i := range types {
  574. types[i] = newType
  575. }
  576. }
  577. // 7) resolving implicit embedding levels Rules I1, I2.
  578. func (s *isolatingRunSequence) resolveImplicitLevels() {
  579. // on entry, only these types can be in resultTypes
  580. s.assertOnly(L, R, EN, AN)
  581. s.resolvedLevels = make([]level, len(s.types))
  582. setLevels(s.resolvedLevels, s.level)
  583. if (s.level & 1) == 0 { // even level
  584. for i, t := range s.types {
  585. // Rule I1.
  586. if t == L {
  587. // no change
  588. } else if t == R {
  589. s.resolvedLevels[i] += 1
  590. } else { // t == AN || t == EN
  591. s.resolvedLevels[i] += 2
  592. }
  593. }
  594. } else { // odd level
  595. for i, t := range s.types {
  596. // Rule I2.
  597. if t == R {
  598. // no change
  599. } else { // t == L || t == AN || t == EN
  600. s.resolvedLevels[i] += 1
  601. }
  602. }
  603. }
  604. }
  605. // Applies the levels and types resolved in rules W1-I2 to the
  606. // resultLevels array.
  607. func (s *isolatingRunSequence) applyLevelsAndTypes() {
  608. for i, x := range s.indexes {
  609. s.p.resultTypes[x] = s.types[i]
  610. s.p.resultLevels[x] = s.resolvedLevels[i]
  611. }
  612. }
  613. // Return the limit of the run consisting only of the types in validSet
  614. // starting at index. This checks the value at index, and will return
  615. // index if that value is not in validSet.
  616. func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
  617. loop:
  618. for ; index < len(s.types); index++ {
  619. t := s.types[index]
  620. for _, valid := range validSet {
  621. if t == valid {
  622. continue loop
  623. }
  624. }
  625. return index // didn't find a match in validSet
  626. }
  627. return len(s.types)
  628. }
  629. // Algorithm validation. Assert that all values in types are in the
  630. // provided set.
  631. func (s *isolatingRunSequence) assertOnly(codes ...Class) {
  632. loop:
  633. for i, t := range s.types {
  634. for _, c := range codes {
  635. if t == c {
  636. continue loop
  637. }
  638. }
  639. log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
  640. }
  641. }
  642. // determineLevelRuns returns an array of level runs. Each level run is
  643. // described as an array of indexes into the input string.
  644. //
  645. // Determines the level runs. Rule X9 will be applied in determining the
  646. // runs, in the way that makes sure the characters that are supposed to be
  647. // removed are not included in the runs.
  648. func (p *paragraph) determineLevelRuns() [][]int {
  649. run := []int{}
  650. allRuns := [][]int{}
  651. currentLevel := implicitLevel
  652. for i := range p.initialTypes {
  653. if !isRemovedByX9(p.initialTypes[i]) {
  654. if p.resultLevels[i] != currentLevel {
  655. // we just encountered a new run; wrap up last run
  656. if currentLevel >= 0 { // only wrap it up if there was a run
  657. allRuns = append(allRuns, run)
  658. run = nil
  659. }
  660. // Start new run
  661. currentLevel = p.resultLevels[i]
  662. }
  663. run = append(run, i)
  664. }
  665. }
  666. // Wrap up the final run, if any
  667. if len(run) > 0 {
  668. allRuns = append(allRuns, run)
  669. }
  670. return allRuns
  671. }
  672. // Definition BD13. Determine isolating run sequences.
  673. func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
  674. levelRuns := p.determineLevelRuns()
  675. // Compute the run that each character belongs to
  676. runForCharacter := make([]int, p.Len())
  677. for i, run := range levelRuns {
  678. for _, index := range run {
  679. runForCharacter[index] = i
  680. }
  681. }
  682. sequences := []*isolatingRunSequence{}
  683. var currentRunSequence []int
  684. for _, run := range levelRuns {
  685. first := run[0]
  686. if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
  687. currentRunSequence = nil
  688. // int run = i;
  689. for {
  690. // Copy this level run into currentRunSequence
  691. currentRunSequence = append(currentRunSequence, run...)
  692. last := currentRunSequence[len(currentRunSequence)-1]
  693. lastT := p.initialTypes[last]
  694. if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
  695. run = levelRuns[runForCharacter[p.matchingPDI[last]]]
  696. } else {
  697. break
  698. }
  699. }
  700. sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
  701. }
  702. }
  703. return sequences
  704. }
  705. // Assign level information to characters removed by rule X9. This is for
  706. // ease of relating the level information to the original input data. Note
  707. // that the levels assigned to these codes are arbitrary, they're chosen so
  708. // as to avoid breaking level runs.
  709. func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
  710. for i, t := range p.initialTypes {
  711. if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
  712. p.resultTypes[i] = t
  713. p.resultLevels[i] = -1
  714. }
  715. }
  716. // now propagate forward the levels information (could have
  717. // propagated backward, the main thing is not to introduce a level
  718. // break where one doesn't already exist).
  719. if p.resultLevels[0] == -1 {
  720. p.resultLevels[0] = p.embeddingLevel
  721. }
  722. for i := 1; i < len(p.initialTypes); i++ {
  723. if p.resultLevels[i] == -1 {
  724. p.resultLevels[i] = p.resultLevels[i-1]
  725. }
  726. }
  727. // Embedding information is for informational purposes only so need not be
  728. // adjusted.
  729. }
  730. //
  731. // Output
  732. //
  733. // getLevels computes levels array breaking lines at offsets in linebreaks.
  734. // Rule L1.
  735. //
  736. // The linebreaks array must include at least one value. The values must be
  737. // in strictly increasing order (no duplicates) between 1 and the length of
  738. // the text, inclusive. The last value must be the length of the text.
  739. func (p *paragraph) getLevels(linebreaks []int) []level {
  740. // Note that since the previous processing has removed all
  741. // P, S, and WS values from resultTypes, the values referred to
  742. // in these rules are the initial types, before any processing
  743. // has been applied (including processing of overrides).
  744. //
  745. // This example implementation has reinserted explicit format codes
  746. // and BN, in order that the levels array correspond to the
  747. // initial text. Their final placement is not normative.
  748. // These codes are treated like WS in this implementation,
  749. // so they don't interrupt sequences of WS.
  750. validateLineBreaks(linebreaks, p.Len())
  751. result := append([]level(nil), p.resultLevels...)
  752. // don't worry about linebreaks since if there is a break within
  753. // a series of WS values preceding S, the linebreak itself
  754. // causes the reset.
  755. for i, t := range p.initialTypes {
  756. if t.in(B, S) {
  757. // Rule L1, clauses one and two.
  758. result[i] = p.embeddingLevel
  759. // Rule L1, clause three.
  760. for j := i - 1; j >= 0; j-- {
  761. if isWhitespace(p.initialTypes[j]) { // including format codes
  762. result[j] = p.embeddingLevel
  763. } else {
  764. break
  765. }
  766. }
  767. }
  768. }
  769. // Rule L1, clause four.
  770. start := 0
  771. for _, limit := range linebreaks {
  772. for j := limit - 1; j >= start; j-- {
  773. if isWhitespace(p.initialTypes[j]) { // including format codes
  774. result[j] = p.embeddingLevel
  775. } else {
  776. break
  777. }
  778. }
  779. start = limit
  780. }
  781. return result
  782. }
  783. // getReordering returns the reordering of lines from a visual index to a
  784. // logical index for line breaks at the given offsets.
  785. //
  786. // Lines are concatenated from left to right. So for example, the fifth
  787. // character from the left on the third line is
  788. //
  789. // getReordering(linebreaks)[linebreaks[1] + 4]
  790. //
  791. // (linebreaks[1] is the position after the last character of the second
  792. // line, which is also the index of the first character on the third line,
  793. // and adding four gets the fifth character from the left).
  794. //
  795. // The linebreaks array must include at least one value. The values must be
  796. // in strictly increasing order (no duplicates) between 1 and the length of
  797. // the text, inclusive. The last value must be the length of the text.
  798. func (p *paragraph) getReordering(linebreaks []int) []int {
  799. validateLineBreaks(linebreaks, p.Len())
  800. return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
  801. }
  802. // Return multiline reordering array for a given level array. Reordering
  803. // does not occur across a line break.
  804. func computeMultilineReordering(levels []level, linebreaks []int) []int {
  805. result := make([]int, len(levels))
  806. start := 0
  807. for _, limit := range linebreaks {
  808. tempLevels := make([]level, limit-start)
  809. copy(tempLevels, levels[start:])
  810. for j, order := range computeReordering(tempLevels) {
  811. result[start+j] = order + start
  812. }
  813. start = limit
  814. }
  815. return result
  816. }
  817. // Return reordering array for a given level array. This reorders a single
  818. // line. The reordering is a visual to logical map. For example, the
  819. // leftmost char is string.charAt(order[0]). Rule L2.
  820. func computeReordering(levels []level) []int {
  821. result := make([]int, len(levels))
  822. // initialize order
  823. for i := range result {
  824. result[i] = i
  825. }
  826. // locate highest level found on line.
  827. // Note the rules say text, but no reordering across line bounds is
  828. // performed, so this is sufficient.
  829. highestLevel := level(0)
  830. lowestOddLevel := level(maxDepth + 2)
  831. for _, level := range levels {
  832. if level > highestLevel {
  833. highestLevel = level
  834. }
  835. if level&1 != 0 && level < lowestOddLevel {
  836. lowestOddLevel = level
  837. }
  838. }
  839. for level := highestLevel; level >= lowestOddLevel; level-- {
  840. for i := 0; i < len(levels); i++ {
  841. if levels[i] >= level {
  842. // find range of text at or above this level
  843. start := i
  844. limit := i + 1
  845. for limit < len(levels) && levels[limit] >= level {
  846. limit++
  847. }
  848. for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
  849. result[j], result[k] = result[k], result[j]
  850. }
  851. // skip to end of level run
  852. i = limit
  853. }
  854. }
  855. }
  856. return result
  857. }
  858. // isWhitespace reports whether the type is considered a whitespace type for the
  859. // line break rules.
  860. func isWhitespace(c Class) bool {
  861. switch c {
  862. case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
  863. return true
  864. }
  865. return false
  866. }
  867. // isRemovedByX9 reports whether the type is one of the types removed in X9.
  868. func isRemovedByX9(c Class) bool {
  869. switch c {
  870. case LRE, RLE, LRO, RLO, PDF, BN:
  871. return true
  872. }
  873. return false
  874. }
  875. // typeForLevel reports the strong type (L or R) corresponding to the level.
  876. func typeForLevel(level level) Class {
  877. if (level & 0x1) == 0 {
  878. return L
  879. }
  880. return R
  881. }
  882. // TODO: change validation to not panic
  883. func validateTypes(types []Class) {
  884. if len(types) == 0 {
  885. log.Panic("types is null")
  886. }
  887. for i, t := range types[:len(types)-1] {
  888. if t == B {
  889. log.Panicf("B type before end of paragraph at index: %d", i)
  890. }
  891. }
  892. }
  893. func validateParagraphEmbeddingLevel(embeddingLevel level) {
  894. if embeddingLevel != implicitLevel &&
  895. embeddingLevel != 0 &&
  896. embeddingLevel != 1 {
  897. log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
  898. }
  899. }
  900. func validateLineBreaks(linebreaks []int, textLength int) {
  901. prev := 0
  902. for i, next := range linebreaks {
  903. if next <= prev {
  904. log.Panicf("bad linebreak: %d at index: %d", next, i)
  905. }
  906. prev = next
  907. }
  908. if prev != textLength {
  909. log.Panicf("last linebreak was %d, want %d", prev, textLength)
  910. }
  911. }
  912. func validatePbTypes(pairTypes []bracketType) {
  913. if len(pairTypes) == 0 {
  914. log.Panic("pairTypes is null")
  915. }
  916. for i, pt := range pairTypes {
  917. switch pt {
  918. case bpNone, bpOpen, bpClose:
  919. default:
  920. log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
  921. }
  922. }
  923. }
  924. func validatePbValues(pairValues []rune, pairTypes []bracketType) {
  925. if pairValues == nil {
  926. log.Panic("pairValues is null")
  927. }
  928. if len(pairTypes) != len(pairValues) {
  929. log.Panic("pairTypes is different length from pairValues")
  930. }
  931. }