|
| 1 | +// bidi implements the Unicode Bidi algorithm |
| 2 | +// |
| 3 | +// The implementation is inspired by x/text/unicode/bidi. |
| 4 | +package bidi |
| 5 | + |
| 6 | +import ( |
| 7 | + "fmt" |
| 8 | + |
| 9 | + "github.com/go-text/typesetting/internal/unicodedata" |
| 10 | + ucd "github.com/go-text/typesetting/internal/unicodedata" |
| 11 | +) |
| 12 | + |
| 13 | +// Paragraph is the main entry point of the package. |
| 14 | +// |
| 15 | +// It holds a single text for Bidi processing, |
| 16 | +// stores internal data required to segment a string, |
| 17 | +// and should be reused to reduce allocations. |
| 18 | +type Paragraph struct { |
| 19 | + text []rune // input values |
| 20 | + |
| 21 | + // o Ordering |
| 22 | + initialTypes []ucd.BidiClass |
| 23 | + pairTypes []bracketType |
| 24 | + pairValues []rune |
| 25 | + |
| 26 | + embeddingLevel level // default: = implicitLevel; |
| 27 | + |
| 28 | + // at the paragraph levels |
| 29 | + resultTypes []ucd.BidiClass |
| 30 | + resultLevels []level |
| 31 | + |
| 32 | + // TODO: holds enough for run computation |
| 33 | + |
| 34 | + // Index of matching PDI for isolate initiator characters. For other |
| 35 | + // characters, the value of matchingPDI will be set to -1. For isolate |
| 36 | + // initiators with no matching PDI, matchingPDI will be set to the length of |
| 37 | + // the input string. |
| 38 | + matchingPDI []int |
| 39 | + |
| 40 | + // Index of matching isolate initiator for PDI characters. For other |
| 41 | + // characters, and for PDIs with no matching isolate initiator, the value of |
| 42 | + // matchingIsolateInitiator will be set to -1. |
| 43 | + matchingIsolateInitiator []int |
| 44 | +} |
| 45 | + |
| 46 | +// Run is a slice of text with a constant direction. |
| 47 | +type Run struct { |
| 48 | + // Start and End indicate the subslice of the input text. |
| 49 | + Start, End int |
| 50 | + |
| 51 | + IsRTL bool |
| 52 | + |
| 53 | + level level |
| 54 | +} |
| 55 | + |
| 56 | +type Runs struct { |
| 57 | + levels []level |
| 58 | +} |
| 59 | + |
| 60 | +// NumRuns returns the number of runs. |
| 61 | +func (o *Runs) NumRuns() int { |
| 62 | + return 0 // FIXME |
| 63 | +} |
| 64 | + |
| 65 | +// Run returns the ith run within the ordering. |
| 66 | +func (o *Runs) Run(i int) Run { |
| 67 | + return Run{} // FIXME |
| 68 | +} |
| 69 | + |
| 70 | +// Segment applies the Bidi algorithm. |
| 71 | +// The returned iterator is only valid until the next call to [Segment]. |
| 72 | +// |
| 73 | +// [defaultDirection] sets the default direction for a Paragraph. The direction is |
| 74 | +// overridden if the text contains directional characters. |
| 75 | +func (b *Paragraph) Segment(text []rune, defaultDirection DefaultDirection) Runs { |
| 76 | + b.text = append(b.text[:0], text...) |
| 77 | + return b.segment(defaultDirection) |
| 78 | +} |
| 79 | + |
| 80 | +func (b *Paragraph) SegmentString(text string, defaultDirection DefaultDirection) Runs { |
| 81 | + b.text = b.text[:0] |
| 82 | + for _, r := range text { |
| 83 | + b.text = append(b.text, r) |
| 84 | + } |
| 85 | + return b.segment(defaultDirection) |
| 86 | +} |
| 87 | + |
| 88 | +func (b *Paragraph) SegmentBytes(text string, defaultDirection DefaultDirection) Runs { |
| 89 | + b.text = b.text[:0] |
| 90 | + // The Go compiler should optimize this without allocating a string. |
| 91 | + for _, r := range string(text) { |
| 92 | + b.text = append(b.text, r) |
| 93 | + } |
| 94 | + return b.segment(defaultDirection) |
| 95 | +} |
| 96 | + |
| 97 | +func (b *Paragraph) segment(defaultDirection DefaultDirection) Runs { |
| 98 | + b.prepareInput() |
| 99 | + levels := b.Order(defaultDirection) |
| 100 | + // TODO : runs from levels |
| 101 | + return Runs{levels: levels} |
| 102 | +} |
| 103 | + |
| 104 | +type charType = unicodedata.BidiClass |
| 105 | + |
| 106 | +type Level = level |
| 107 | + |
| 108 | +type ParType = charType |
| 109 | + |
| 110 | +func max(a, b level) level { |
| 111 | + if a < b { |
| 112 | + return b |
| 113 | + } |
| 114 | + return a |
| 115 | +} |
| 116 | + |
| 117 | +// A Direction indicates the overall flow of text. |
| 118 | +type DefaultDirection uint8 |
| 119 | + |
| 120 | +const ( |
| 121 | + Neutral DefaultDirection = iota |
| 122 | + LeftToRight |
| 123 | + RightToLeft |
| 124 | +) |
| 125 | + |
| 126 | +// Initialize the p.pairTypes, p.pairValues and p.types from the input previously |
| 127 | +// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph |
| 128 | +// separator (bidi class B). |
| 129 | +// |
| 130 | +// The function p.Order() needs these values to be set, so this preparation could be postponed. |
| 131 | +// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph |
| 132 | +// separator, the whole input needs to be processed anyway and should not be done twice. |
| 133 | +// |
| 134 | +// The function has the same return values as SetBytes() / SetString() |
| 135 | +func (p *Paragraph) prepareInput() { |
| 136 | + // clear slices from previous SetString or SetBytes |
| 137 | + if L := len(p.text); cap(p.pairTypes) < L { |
| 138 | + p.pairTypes = make([]bracketType, L) |
| 139 | + p.pairValues = make([]rune, L) |
| 140 | + p.initialTypes = make([]ucd.BidiClass, L) |
| 141 | + p.resultTypes = make([]ucd.BidiClass, L) |
| 142 | + } else { |
| 143 | + p.pairTypes = p.pairTypes[:L] |
| 144 | + p.pairValues = p.pairValues[:L] |
| 145 | + p.initialTypes = p.initialTypes[:L] |
| 146 | + p.resultTypes = p.resultTypes[:L] |
| 147 | + } |
| 148 | + |
| 149 | + for i, r := range p.text { |
| 150 | + cls, bracket := ucd.LookupBidiClass(r) |
| 151 | + if cls == ucd.BD_B { |
| 152 | + // Unlikely, but trim the arrays and exit |
| 153 | + p.text = p.text[:i] |
| 154 | + p.pairTypes = p.pairTypes[:i] |
| 155 | + p.pairValues = p.pairValues[:i] |
| 156 | + p.initialTypes = p.initialTypes[:i] |
| 157 | + p.resultTypes = p.resultTypes[:i] |
| 158 | + return |
| 159 | + } |
| 160 | + p.initialTypes[i] = cls |
| 161 | + p.resultTypes[i] = cls |
| 162 | + if bracket.IsOpening() { |
| 163 | + p.pairTypes[i] = bpOpen |
| 164 | + p.pairValues[i] = r |
| 165 | + } else if bracket.IsBracket() { |
| 166 | + // this must be a closing bracket, |
| 167 | + // since IsOpeningBracket is not true |
| 168 | + p.pairTypes[i] = bpClose |
| 169 | + p.pairValues[i] = bracket.Reverse(r) |
| 170 | + } else { |
| 171 | + p.pairTypes[i] = bpNone |
| 172 | + p.pairValues[i] = 0 |
| 173 | + } |
| 174 | + } |
| 175 | +} |
| 176 | + |
| 177 | +// // IsLeftToRight reports whether the principle direction of rendering for this |
| 178 | +// // paragraphs is left-to-right. If this returns false, the principle direction |
| 179 | +// // of rendering is right-to-left. |
| 180 | +// func (p *Paragraph) IsLeftToRight() bool { |
| 181 | +// return p.Direction() == LeftToRight |
| 182 | +// } |
| 183 | + |
| 184 | +// // Direction returns the direction of the text of this paragraph. |
| 185 | +// // |
| 186 | +// // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. |
| 187 | +// func (p *Paragraph) Direction() Direction { |
| 188 | +// return p.o.Direction() |
| 189 | +// } |
| 190 | + |
| 191 | +// // TODO: what happens if the position is > len(input)? This should return an error. |
| 192 | + |
| 193 | +// // RunAt reports the Run at the given position of the input text. |
| 194 | +// // |
| 195 | +// // This method can be used for computing line breaks on paragraphs. |
| 196 | +// func (p *Paragraph) RunAt(pos int) Run { |
| 197 | +// c := 0 |
| 198 | +// runNumber := 0 |
| 199 | +// for i, r := range p.o.runes { |
| 200 | +// c += len(r) |
| 201 | +// if pos < c { |
| 202 | +// runNumber = i |
| 203 | +// } |
| 204 | +// } |
| 205 | +// return p.o.Run(runNumber) |
| 206 | +// } |
| 207 | + |
| 208 | +// func calculateOrdering(levels []level, runes []rune) Ordering { |
| 209 | +// var curDir Direction |
| 210 | + |
| 211 | +// prevDir := Neutral |
| 212 | +// prevI := 0 |
| 213 | + |
| 214 | +// o := Ordering{} |
| 215 | +// // lvl = 0,2,4,...: left to right |
| 216 | +// // lvl = 1,3,5,...: right to left |
| 217 | +// for i, lvl := range levels { |
| 218 | +// if lvl%2 == 0 { |
| 219 | +// curDir = LeftToRight |
| 220 | +// } else { |
| 221 | +// curDir = RightToLeft |
| 222 | +// } |
| 223 | +// if curDir != prevDir { |
| 224 | +// if i > 0 { |
| 225 | +// o.runes = append(o.runes, runes[prevI:i]) |
| 226 | +// o.directions = append(o.directions, prevDir) |
| 227 | +// o.startpos = append(o.startpos, prevI) |
| 228 | +// } |
| 229 | +// prevI = i |
| 230 | +// prevDir = curDir |
| 231 | +// } |
| 232 | +// } |
| 233 | +// o.runes = append(o.runes, runes[prevI:]) |
| 234 | +// o.directions = append(o.directions, prevDir) |
| 235 | +// o.startpos = append(o.startpos, prevI) |
| 236 | +// return o |
| 237 | +// } |
| 238 | + |
| 239 | +// Order computes the visual ordering of all the runs in a Paragraph. |
| 240 | +func (p *Paragraph) Order(defaultDirection DefaultDirection) []level { |
| 241 | + if len(p.initialTypes) == 0 { |
| 242 | + return nil |
| 243 | + } |
| 244 | + |
| 245 | + fmt.Println(p.initialTypes) |
| 246 | + fmt.Println(p.pairTypes) |
| 247 | + |
| 248 | + lvl := level(-1) |
| 249 | + if defaultDirection == LeftToRight { |
| 250 | + lvl = 0 |
| 251 | + } else if defaultDirection == RightToLeft { |
| 252 | + lvl = 1 |
| 253 | + } |
| 254 | + |
| 255 | + // if err := validateTypes(p.initialTypes); err != nil { |
| 256 | + // return Ordering{}, err |
| 257 | + // } |
| 258 | + // if err := validatePbTypes(p.pairTypes); err != nil { |
| 259 | + // return Ordering{}, err |
| 260 | + // } |
| 261 | + // if err := validatePbValues(p.pairValues, p.pairTypes); err != nil { |
| 262 | + // return Ordering{}, err |
| 263 | + // } |
| 264 | + // if err := validateParagraphEmbeddingLevel(lvl); err != nil { |
| 265 | + // return Ordering{}, err |
| 266 | + // } |
| 267 | + |
| 268 | + p.embeddingLevel = lvl |
| 269 | + |
| 270 | + p.run() |
| 271 | + |
| 272 | + return p.getLevels() |
| 273 | + |
| 274 | + // p.o = calculateOrdering(levels, p.runes) |
| 275 | + // return p.o, nil |
| 276 | +} |
| 277 | + |
| 278 | +// // Line computes the visual ordering of runs for a single line starting and |
| 279 | +// // ending at the given positions in the original text. |
| 280 | +// func (p *Paragraph) Line(start, end int) (Ordering, error) { |
| 281 | +// lineTypes := p.types[start:end] |
| 282 | +// para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) |
| 283 | +// if err != nil { |
| 284 | +// return Ordering{}, err |
| 285 | +// } |
| 286 | +// levels := para.getLevels([]int{len(lineTypes)}) |
| 287 | +// o := calculateOrdering(levels, p.runes[start:end]) |
| 288 | +// return o, nil |
| 289 | +// } |
0 commit comments