1+ import { memoize } from "@std/cache/memoize" ;
12import { escape as escapeHtml } from "@std/html/entities" ;
23import { escape as escapeRegex } from "@std/regexp/escape" ;
34import nlp from "compromise/three" ;
@@ -35,12 +36,30 @@ import {
3536const RESERVED_SYMBOLS = "#()*+/:;<=>@[\\]^`{|}~" ;
3637const WORDS = new RegExp ( `[^${ escapeRegex ( RESERVED_SYMBOLS ) } ]` ) ;
3738
38- const comment = match ( / # [ ^ \n \r ] * / , "comment" ) ;
39- const spaces = sourceOnly ( all ( choiceOnlyOne ( match ( / \s / , "space" ) , comment ) ) ) ;
4039function lex < T > ( parser : Parser < T > ) : Parser < T > {
4140 return parser . skip ( spaces ) ;
4241}
42+ const comment = match ( / # [ ^ \n \r ] * / , "comment" ) ;
43+ const spaces = sourceOnly ( all ( choiceOnlyOne ( match ( / \s / , "space" ) , comment ) ) ) ;
4344const backtick = matchString ( "`" , "backtick" ) ;
45+
46+ const tokiPonaWord = lex ( match ( / [ a - z ] [ a - z A - Z ] * / , "word" ) ) ;
47+ const openParenthesis = lex ( matchString ( "(" , "open parenthesis" ) ) ;
48+ const closeParenthesis = lex ( matchString ( ")" , "close parenthesis" ) ) ;
49+ const openBracket = lex ( matchString ( "[" , "open bracket" ) ) ;
50+ const closeBracket = lex ( matchString ( "]" , "close bracket" ) ) ;
51+ const comma = lex ( matchString ( "," , "comma" ) ) ;
52+ const colon = lex ( matchString ( ":" , "colon" ) ) ;
53+ const semicolon = lex ( matchString ( ";" , "semicolon" ) ) ;
54+ const slash = lex ( matchString ( "/" , "slash" ) ) ;
55+
56+ const keyword = memoize ( < T extends string > ( keyword : T ) : Parser < T > =>
57+ lex ( match ( / [ a - z \- ] + / , keyword ) )
58+ . filter ( ( that ) =>
59+ keyword === that ||
60+ throwError ( new UnexpectedError ( `"${ that } "` , `"${ keyword } "` ) )
61+ ) as Parser < T >
62+ ) ;
4463const unescapedWord = allAtLeastOnce (
4564 choiceOnlyOne (
4665 match ( WORDS , "word" ) ,
@@ -55,16 +74,8 @@ const unescapedWord = allAtLeastOnce(
5574 word !== "" || throwError ( new ArrayResultError ( "missing word" ) )
5675 ) ;
5776const word = unescapedWord . map ( escapeHtml ) ;
58- const slash = lex ( matchString ( "/" , "slash" ) ) ;
5977const forms = sequence ( word , all ( slash . with ( word ) ) )
6078 . map ( ( [ first , rest ] ) => [ first , ...rest ] ) ;
61- function keyword < T extends string > ( keyword : T ) : Parser < T > {
62- return lex ( match ( / [ a - z \- ] + / , keyword ) )
63- . filter ( ( that ) =>
64- keyword === that ||
65- throwError ( new UnexpectedError ( `"${ that } "` , `"${ keyword } "` ) )
66- ) as Parser < T > ;
67- }
6879const number = choiceOnlyOne ( keyword ( "singular" ) , keyword ( "plural" ) ) ;
6980const optionalNumber = optionalAll ( number ) ;
7081const perspective = choiceOnlyOne (
@@ -73,18 +84,12 @@ const perspective = choiceOnlyOne(
7384 keyword ( "third" ) ,
7485) ;
7586function tag < T > ( parser : Parser < T > ) : Parser < T > {
76- return lex ( matchString ( "(" , "open parenthesis" ) )
77- . with ( parser )
78- . skip ( lex ( matchString ( ")" , "close parenthesis" ) ) ) ;
87+ return openParenthesis . with ( parser ) . skip ( closeParenthesis ) ;
7988}
8089function template < T > ( parser : Parser < T > ) : Parser < T > {
81- return lex ( matchString ( "[" , "open square bracket" ) )
82- . with ( parser )
83- . skip ( lex ( matchString ( "]" , "close square bracket" ) ) ) ;
84- }
85- function simpleUnit ( kind : string ) : Parser < string > {
86- return word . skip ( tag ( keyword ( kind ) ) ) ;
90+ return openBracket . with ( parser ) . skip ( closeBracket ) ;
8791}
92+ const simpleUnit = memoize ( ( kind : string ) => word . skip ( tag ( keyword ( kind ) ) ) ) ;
8893function detectRepetition (
8994 source : ReadonlyArray < string > ,
9095) : { before : string ; repeat : string ; after : string } {
@@ -324,7 +329,6 @@ function verbOnly(tagInside: Parser<unknown>): Parser<VerbForms> {
324329 } ) ,
325330 ) ;
326331}
327- const semicolon = lex ( matchString ( ";" , "semicolon" ) ) ;
328332const definition = choiceOnlyOne < Definition > (
329333 adjective
330334 . skip ( semicolon )
@@ -492,12 +496,8 @@ const definition = choiceOnlyOne<Definition>(
492496 type : "filler" ,
493497 } ) ) ,
494498) ;
495- const singleWord = lex ( match ( / [ a - z ] [ a - z A - Z ] * / , "word" ) ) ;
496- const head = sequence (
497- all ( singleWord . skip ( lex ( matchString ( "," , "comma" ) ) ) ) ,
498- singleWord ,
499- )
500- . skip ( matchString ( ":" , "colon" ) )
499+ const head = sequence ( all ( tokiPonaWord . skip ( comma ) ) , tokiPonaWord )
500+ . skip ( colon )
501501 . map ( ( [ init , last ] ) => [ ...init , last ] ) ;
502502const entry = withSource ( spaces . with ( all ( definition ) ) )
503503 . map ( ( [ definitions , src ] ) => ( { definitions, src : src . trimEnd ( ) } ) ) ;
@@ -512,6 +512,7 @@ const dictionaryParser = spaces
512512 )
513513 )
514514 . parser ( ) ;
515+
515516const definitionExtractor = spaces
516517 . with ( all ( optionalAll ( lex ( head ) ) . with ( lex ( match ( / [ ^ ; ] * ; / , "definition" ) ) ) ) )
517518 . skip ( end )
0 commit comments