Skip to content

Commit d6db161

Browse files
committed
initial port of x/text/bidi; bidi lookup table; bracket lookup
1 parent 48df487 commit d6db161

10 files changed

Lines changed: 596720 additions & 0 deletions

File tree

internal/bidi/bidi.go

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
// bidi implements the Unicode Bidi algorithm
2+
//
3+
// The implementation is inspired by x/text/unicode/bidi.
4+
package bidi
5+
6+
import (
7+
"fmt"
8+
9+
"github.com/go-text/typesetting/internal/unicodedata"
10+
ucd "github.com/go-text/typesetting/internal/unicodedata"
11+
)
12+
13+
// Paragraph is the main entry point of the package.
14+
//
15+
// It holds a single text for Bidi processing,
16+
// stores internal data required to segment a string,
17+
// and should be reused to reduce allocations.
18+
type Paragraph struct {
19+
text []rune // input values
20+
21+
// o Ordering
22+
initialTypes []ucd.BidiClass
23+
pairTypes []bracketType
24+
pairValues []rune
25+
26+
embeddingLevel level // default: = implicitLevel;
27+
28+
// at the paragraph levels
29+
resultTypes []ucd.BidiClass
30+
resultLevels []level
31+
32+
// TODO: holds enough for run computation
33+
34+
// Index of matching PDI for isolate initiator characters. For other
35+
// characters, the value of matchingPDI will be set to -1. For isolate
36+
// initiators with no matching PDI, matchingPDI will be set to the length of
37+
// the input string.
38+
matchingPDI []int
39+
40+
// Index of matching isolate initiator for PDI characters. For other
41+
// characters, and for PDIs with no matching isolate initiator, the value of
42+
// matchingIsolateInitiator will be set to -1.
43+
matchingIsolateInitiator []int
44+
}
45+
46+
// Run is a slice of text with a constant direction.
47+
type Run struct {
48+
// Start and End indicate the subslice of the input text.
49+
Start, End int
50+
51+
IsRTL bool
52+
53+
level level
54+
}
55+
56+
type Runs struct {
57+
levels []level
58+
}
59+
60+
// NumRuns returns the number of runs.
61+
func (o *Runs) NumRuns() int {
62+
return 0 // FIXME
63+
}
64+
65+
// Run returns the ith run within the ordering.
66+
func (o *Runs) Run(i int) Run {
67+
return Run{} // FIXME
68+
}
69+
70+
// Segment applies the Bidi algorithm.
71+
// The returned iterator is only valid until the next call to [Segment].
72+
//
73+
// [defaultDirection] sets the default direction for a Paragraph. The direction is
74+
// overridden if the text contains directional characters.
75+
func (b *Paragraph) Segment(text []rune, defaultDirection DefaultDirection) Runs {
76+
b.text = append(b.text[:0], text...)
77+
return b.segment(defaultDirection)
78+
}
79+
80+
func (b *Paragraph) SegmentString(text string, defaultDirection DefaultDirection) Runs {
81+
b.text = b.text[:0]
82+
for _, r := range text {
83+
b.text = append(b.text, r)
84+
}
85+
return b.segment(defaultDirection)
86+
}
87+
88+
func (b *Paragraph) SegmentBytes(text string, defaultDirection DefaultDirection) Runs {
89+
b.text = b.text[:0]
90+
// The Go compiler should optimize this without allocating a string.
91+
for _, r := range string(text) {
92+
b.text = append(b.text, r)
93+
}
94+
return b.segment(defaultDirection)
95+
}
96+
97+
func (b *Paragraph) segment(defaultDirection DefaultDirection) Runs {
98+
b.prepareInput()
99+
levels := b.Order(defaultDirection)
100+
// TODO : runs from levels
101+
return Runs{levels: levels}
102+
}
103+
104+
type charType = unicodedata.BidiClass
105+
106+
type Level = level
107+
108+
type ParType = charType
109+
110+
func max(a, b level) level {
111+
if a < b {
112+
return b
113+
}
114+
return a
115+
}
116+
117+
// A Direction indicates the overall flow of text.
118+
type DefaultDirection uint8
119+
120+
const (
121+
Neutral DefaultDirection = iota
122+
LeftToRight
123+
RightToLeft
124+
)
125+
126+
// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
127+
// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
128+
// separator (bidi class B).
129+
//
130+
// The function p.Order() needs these values to be set, so this preparation could be postponed.
131+
// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
132+
// separator, the whole input needs to be processed anyway and should not be done twice.
133+
//
134+
// The function has the same return values as SetBytes() / SetString()
135+
func (p *Paragraph) prepareInput() {
136+
// clear slices from previous SetString or SetBytes
137+
if L := len(p.text); cap(p.pairTypes) < L {
138+
p.pairTypes = make([]bracketType, L)
139+
p.pairValues = make([]rune, L)
140+
p.initialTypes = make([]ucd.BidiClass, L)
141+
p.resultTypes = make([]ucd.BidiClass, L)
142+
} else {
143+
p.pairTypes = p.pairTypes[:L]
144+
p.pairValues = p.pairValues[:L]
145+
p.initialTypes = p.initialTypes[:L]
146+
p.resultTypes = p.resultTypes[:L]
147+
}
148+
149+
for i, r := range p.text {
150+
cls, bracket := ucd.LookupBidiClass(r)
151+
if cls == ucd.BD_B {
152+
// Unlikely, but trim the arrays and exit
153+
p.text = p.text[:i]
154+
p.pairTypes = p.pairTypes[:i]
155+
p.pairValues = p.pairValues[:i]
156+
p.initialTypes = p.initialTypes[:i]
157+
p.resultTypes = p.resultTypes[:i]
158+
return
159+
}
160+
p.initialTypes[i] = cls
161+
p.resultTypes[i] = cls
162+
if bracket.IsOpening() {
163+
p.pairTypes[i] = bpOpen
164+
p.pairValues[i] = r
165+
} else if bracket.IsBracket() {
166+
// this must be a closing bracket,
167+
// since IsOpeningBracket is not true
168+
p.pairTypes[i] = bpClose
169+
p.pairValues[i] = bracket.Reverse(r)
170+
} else {
171+
p.pairTypes[i] = bpNone
172+
p.pairValues[i] = 0
173+
}
174+
}
175+
}
176+
177+
// // IsLeftToRight reports whether the principle direction of rendering for this
178+
// // paragraphs is left-to-right. If this returns false, the principle direction
179+
// // of rendering is right-to-left.
180+
// func (p *Paragraph) IsLeftToRight() bool {
181+
// return p.Direction() == LeftToRight
182+
// }
183+
184+
// // Direction returns the direction of the text of this paragraph.
185+
// //
186+
// // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
187+
// func (p *Paragraph) Direction() Direction {
188+
// return p.o.Direction()
189+
// }
190+
191+
// // TODO: what happens if the position is > len(input)? This should return an error.
192+
193+
// // RunAt reports the Run at the given position of the input text.
194+
// //
195+
// // This method can be used for computing line breaks on paragraphs.
196+
// func (p *Paragraph) RunAt(pos int) Run {
197+
// c := 0
198+
// runNumber := 0
199+
// for i, r := range p.o.runes {
200+
// c += len(r)
201+
// if pos < c {
202+
// runNumber = i
203+
// }
204+
// }
205+
// return p.o.Run(runNumber)
206+
// }
207+
208+
// func calculateOrdering(levels []level, runes []rune) Ordering {
209+
// var curDir Direction
210+
211+
// prevDir := Neutral
212+
// prevI := 0
213+
214+
// o := Ordering{}
215+
// // lvl = 0,2,4,...: left to right
216+
// // lvl = 1,3,5,...: right to left
217+
// for i, lvl := range levels {
218+
// if lvl%2 == 0 {
219+
// curDir = LeftToRight
220+
// } else {
221+
// curDir = RightToLeft
222+
// }
223+
// if curDir != prevDir {
224+
// if i > 0 {
225+
// o.runes = append(o.runes, runes[prevI:i])
226+
// o.directions = append(o.directions, prevDir)
227+
// o.startpos = append(o.startpos, prevI)
228+
// }
229+
// prevI = i
230+
// prevDir = curDir
231+
// }
232+
// }
233+
// o.runes = append(o.runes, runes[prevI:])
234+
// o.directions = append(o.directions, prevDir)
235+
// o.startpos = append(o.startpos, prevI)
236+
// return o
237+
// }
238+
239+
// Order computes the visual ordering of all the runs in a Paragraph.
240+
func (p *Paragraph) Order(defaultDirection DefaultDirection) []level {
241+
if len(p.initialTypes) == 0 {
242+
return nil
243+
}
244+
245+
fmt.Println(p.initialTypes)
246+
fmt.Println(p.pairTypes)
247+
248+
lvl := level(-1)
249+
if defaultDirection == LeftToRight {
250+
lvl = 0
251+
} else if defaultDirection == RightToLeft {
252+
lvl = 1
253+
}
254+
255+
// if err := validateTypes(p.initialTypes); err != nil {
256+
// return Ordering{}, err
257+
// }
258+
// if err := validatePbTypes(p.pairTypes); err != nil {
259+
// return Ordering{}, err
260+
// }
261+
// if err := validatePbValues(p.pairValues, p.pairTypes); err != nil {
262+
// return Ordering{}, err
263+
// }
264+
// if err := validateParagraphEmbeddingLevel(lvl); err != nil {
265+
// return Ordering{}, err
266+
// }
267+
268+
p.embeddingLevel = lvl
269+
270+
p.run()
271+
272+
return p.getLevels()
273+
274+
// p.o = calculateOrdering(levels, p.runes)
275+
// return p.o, nil
276+
}
277+
278+
// // Line computes the visual ordering of runs for a single line starting and
279+
// // ending at the given positions in the original text.
280+
// func (p *Paragraph) Line(start, end int) (Ordering, error) {
281+
// lineTypes := p.types[start:end]
282+
// para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
283+
// if err != nil {
284+
// return Ordering{}, err
285+
// }
286+
// levels := para.getLevels([]int{len(lineTypes)})
287+
// o := calculateOrdering(levels, p.runes[start:end])
288+
// return o, nil
289+
// }

0 commit comments

Comments
 (0)