@@ -86,7 +86,8 @@ const word = choiceOnlyOne(latinWord, singleUcsurWord);
8686const properWords = allAtLeastOnce (
8787 match ( / [ A - Z ] [ a - z A - Z ] * / , "proper word" ) . skip ( spaces ) ,
8888)
89- . map ( ( array ) => array . join ( " " ) ) ;
89+ . map ( ( array ) => array . join ( " " ) )
90+ . map < Token > ( ( words ) => ( { type : "proper word" , words, kind : "latin" } ) ) ;
9091/** Parses a specific word, either UCSUR or latin. */
9192function specificWord ( thatWord : string ) : Parser < string > {
9293 return word . filter ( ( thisWord ) => {
@@ -98,13 +99,16 @@ function specificWord(thatWord: string): Parser<string> {
9899 } ) ;
99100}
100101/** Parses multiple a. */
101- const multipleA = sequence ( specificWord ( "a" ) , allAtLeastOnce ( specificWord ( "a" ) ) )
102- . map ( ( [ a , as ] ) => [ a , ...as ] . length ) ;
102+ const multipleA = sequence (
103+ specificWord ( "a" ) ,
104+ count ( allAtLeastOnce ( specificWord ( "a" ) ) ) ,
105+ )
106+ . map < Token > ( ( [ _ , count ] ) => ( { type : "multiple a" , count : count + 1 } ) ) ;
103107/** Parses lengthened words. */
104108const longWord = choiceOnlyOne ( matchString ( "a" ) , matchString ( "n" ) )
105109 . then ( ( word ) =>
106110 count ( allAtLeastOnce ( matchString ( word ) ) )
107- . map < Token & { type : "long word" } > ( ( count ) => ( {
111+ . map < Token > ( ( count ) => ( {
108112 type : "long word" ,
109113 word,
110114 length : count + 1 ,
@@ -124,7 +128,8 @@ const xAlaX = lazy(() => {
124128 sequence ( specificWord ( "ala" ) , specificWord ( word ) ) . map ( ( ) => word )
125129 ) ;
126130 }
127- } ) ;
131+ } )
132+ . map < Token > ( ( word ) => ( { type : "x ala x" , word } ) ) ;
128133
129134Parser . endCache ( ) ;
130135
@@ -139,7 +144,8 @@ const punctuation = choiceOnlyOne(
139144 )
140145 . skip ( spaces ) ,
141146 newline . map ( ( ) => "." ) ,
142- ) ;
147+ )
148+ . map < Token > ( ( punctuation ) => ( { type : "punctuation" , punctuation } ) ) ;
143149/**
144150 * Parses cartouche element and returns the phonemes or letters it represents.
145151 */
@@ -181,7 +187,13 @@ const cartouche = sequence(
181187 return `${ word [ 0 ] . toUpperCase ( ) } ${ word . slice ( 1 ) } ` ;
182188 } ) ;
183189/** Parses multiple cartouches. */
184- const cartouches = allAtLeastOnce ( cartouche ) . map ( ( words ) => words . join ( " " ) ) ;
190+ const cartouches = allAtLeastOnce ( cartouche )
191+ . map ( ( words ) => words . join ( " " ) )
192+ . map < Token > ( ( words ) => ( {
193+ type : "proper word" ,
194+ words,
195+ kind : "cartouche" ,
196+ } ) ) ;
185197/**
186198 * Parses long glyph container.
187199 *
@@ -243,32 +255,31 @@ const insideLongGlyph = specificSpecialUcsur(END_OF_REVERSE_LONG_GLYPH)
243255 . skip ( specificSpecialUcsur ( START_OF_LONG_GLYPH ) )
244256 . skip ( spaces )
245257 . map < Token > ( ( words ) => ( { type : "inside long glyph" , words } ) ) ;
258+ const combinedGlyphsToken = combinedGlyphs
259+ . skip ( spaces )
260+ . map < Token > ( ( words ) => ( { type : "combined glyphs" , words } ) ) ;
261+ const wordToken = word . map < Token > ( ( word ) => ( { type : "word" , word } ) ) ;
246262
247263Parser . startCache ( cache ) ;
248264
249265/** Parses a token. */
250- export const token = choiceOnlyOne < Token > (
266+ export const token = choiceOnlyOne (
267+ longWord ,
268+ xAlaX ,
269+ multipleA ,
270+ wordToken ,
271+ properWords ,
272+ // UCSUR only
251273 spaceLongGlyph ,
252274 headedLongGlyphStart ,
253- combinedGlyphs
254- . skip ( spaces )
255- . map ( ( words ) => ( { type : "combined glyphs" , words } ) ) ,
256- properWords . map ( ( words ) => ( { type : "proper word" , words, kind : "latin" } ) ) ,
257- longWord ,
258- xAlaX . map ( ( word ) => ( { type : "x ala x" , word } ) ) ,
259- multipleA . map ( ( count ) => ( { type : "multiple a" , count } ) ) ,
260- word . map ( ( word ) => ( { type : "word" , word } ) ) ,
275+ combinedGlyphsToken ,
261276 // starting with non-words:
262- punctuation . map ( ( punctuation ) => ( { type : "punctuation" , punctuation } ) ) ,
277+ punctuation ,
263278 headlessLongGlyphEnd ,
264279 headedLongGlyphEnd ,
265280 headlessLongGlyphStart ,
266281 insideLongGlyph ,
267- cartouches . map ( ( words ) => ( {
268- type : "proper word" ,
269- words,
270- kind : "cartouche" ,
271- } ) ) ,
282+ cartouches ,
272283) ;
273284
274285Parser . endCache ( ) ;
0 commit comments