diff --git a/NEWS.md b/NEWS.md index 5a828805..dd96bd74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,22 @@ ## Lrama 0.8.0 (2026-xx-xx) +### [EXPERIMENTAL] Support the generation of the PSLR(1) parser described in this dissertation + +Support the generation of the PSLR(1) parser described in this dissertation. +https://open.clemson.edu/all_dissertations/519/ + +If you use PSLR(1) parser, you can write the following directives in your grammar file. + +```yacc +%token-pattern RSHIFT />>/ "right shift" +%token-pattern RANGLE />/ "right angle" + +%lex-prec RANGLE -s RSHIFT +``` + +But, currently PSLR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you. + ## Lrama 0.7.1 (2025-12-24) ### Optimize IELR diff --git a/lib/lrama.rb b/lib/lrama.rb index 56ba0044..c676b32d 100644 --- a/lib/lrama.rb +++ b/lib/lrama.rb @@ -15,8 +15,10 @@ require_relative "lrama/output" require_relative "lrama/parser" require_relative "lrama/reporter" +require_relative "lrama/scanner_fsa" require_relative "lrama/state" require_relative "lrama/states" +require_relative "lrama/length_precedences" require_relative "lrama/tracer" require_relative "lrama/version" require_relative "lrama/warnings" diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 95a80bb0..0cbff5a6 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -20,6 +20,8 @@ require_relative "grammar/symbols" require_relative "grammar/type" require_relative "grammar/union" +require_relative "grammar/token_pattern" +require_relative "grammar/lex_prec" require_relative "lexer" module Lrama @@ -68,6 +70,8 @@ class Grammar # @union: Union # @precedences: Array[Precedence] # @start_nterm: Lrama::Lexer::Token::Base? + # @token_patterns: Array[Grammar::TokenPattern] + # @lex_prec: Grammar::LexPrec extend Forwardable @@ -100,6 +104,8 @@ class Grammar attr_accessor :locations #: bool attr_accessor :define #: Hash[String, String] attr_accessor :required #: bool + attr_reader :token_patterns #: Array[Grammar::TokenPattern] + attr_reader :lex_prec #: Grammar::LexPrec def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, @@ -133,6 +139,9 @@ def initialize(rule_counter, locations, define = {}) @required = false @precedences = [] @start_nterm = nil + @token_patterns = [] + @lex_prec = Grammar::LexPrec.new + @token_pattern_counter = 0 append_special_symbols end @@ -304,6 +313,48 @@ def ielr_defined? @define.key?('lr.type') && @define['lr.type'] == 'ielr' end + # @rbs () -> bool + def pslr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'pslr' + end + + # Add a token pattern from %token-pattern directive + # @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer) -> Grammar::TokenPattern + def add_token_pattern(id:, pattern:, alias_name: nil, tag: nil, lineno:) + token_pattern = Grammar::TokenPattern.new( + id: id, + pattern: pattern, + alias_name: alias_name, + tag: tag, + lineno: lineno, + definition_order: @token_pattern_counter + ) + @token_pattern_counter += 1 + @token_patterns << token_pattern + + # Also register as a terminal symbol + add_term(id: id, alias_name: alias_name, tag: tag) + + token_pattern + end + + # Add a lex-prec rule from %lex-prec directive + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Grammar::LexPrec::Rule + def add_lex_prec_rule(left_token:, operator:, right_token:, lineno:) + @lex_prec.add_rule( + left_token: left_token, + operator: operator, + right_token: right_token, + lineno: lineno + ) + end + + # Find a token pattern by its name + # @rbs (String name) -> Grammar::TokenPattern? + def find_token_pattern(name) + @token_patterns.find { |tp| tp.name == name } + end + private # @rbs () -> void diff --git a/lib/lrama/grammar/lex_prec.rb b/lib/lrama/grammar/lex_prec.rb new file mode 100644 index 00000000..c5d30da9 --- /dev/null +++ b/lib/lrama/grammar/lex_prec.rb @@ -0,0 +1,98 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + # Represents lexical precedence rules defined by %lex-prec directive + # Based on Definition 3.2.3, 3.2.4, 3.2.10 from the PSLR dissertation + # + # Example: %lex-prec RANGLE -s RSHIFT # RANGLE is shorter than RSHIFT + # %lex-prec IF - ID # IF has higher priority than ID (same length) + class LexPrec + # Precedence relation types + # "," : Same priority (lex-tie) + # "-" : Left has higher priority than right + # "-s" : Left is shorter match priority over right + SAME_PRIORITY = :same #: Symbol + HIGHER = :higher #: Symbol + SHORTER = :shorter #: Symbol + + # Represents a single precedence rule + class Rule + attr_reader :left_token #: Lexer::Token::Ident + attr_reader :operator #: Symbol + attr_reader :right_token #: Lexer::Token::Ident + attr_reader :lineno #: Integer + + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> void + def initialize(left_token:, operator:, right_token:, lineno:) + @left_token = left_token + @operator = operator + @right_token = right_token + @lineno = lineno + end + + # @rbs () -> String + def left_name + @left_token.s_value + end + + # @rbs () -> String + def right_name + @right_token.s_value + end + end + + attr_reader :rules #: Array[Rule] + + # @rbs () -> void + def initialize + @rules = [] + end + + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Rule + def add_rule(left_token:, operator:, right_token:, lineno:) + rule = Rule.new( + left_token: left_token, + operator: operator, + right_token: right_token, + lineno: lineno + ) + @rules << rule + rule + end + + # Check if token t1 has higher priority than t2 + # Based on Definition 3.2.4 + # @rbs (String t1, String t2) -> bool + def higher_priority?(t1, t2) + @rules.any? do |rule| + rule.operator == HIGHER && + rule.left_name == t1 && + rule.right_name == t2 + end + end + + # Check if token t1 has shorter-match priority over t2 + # Based on Definition 3.2.15 + # @rbs (String t1, String t2) -> bool + def shorter_priority?(t1, t2) + @rules.any? do |rule| + rule.operator == SHORTER && + rule.left_name == t1 && + rule.right_name == t2 + end + end + + # Check if tokens t1 and t2 are in a lex-tie relationship + # @rbs (String t1, String t2) -> bool + def same_priority?(t1, t2) + @rules.any? do |rule| + rule.operator == SAME_PRIORITY && + ((rule.left_name == t1 && rule.right_name == t2) || + (rule.left_name == t2 && rule.right_name == t1)) + end + end + end + end +end diff --git a/lib/lrama/grammar/token_pattern.rb b/lib/lrama/grammar/token_pattern.rb new file mode 100644 index 00000000..92e8374e --- /dev/null +++ b/lib/lrama/grammar/token_pattern.rb @@ -0,0 +1,38 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + # Represents a token pattern defined by %token-pattern directive + # Example: %token-pattern RSHIFT />>/ "right shift" + class TokenPattern + attr_reader :id #: Lexer::Token::Ident + attr_reader :pattern #: Lexer::Token::Regex + attr_reader :alias_name #: String? + attr_reader :tag #: Lexer::Token::Tag? + attr_reader :lineno #: Integer + attr_reader :definition_order #: Integer + + # @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer, definition_order: Integer) -> void + def initialize(id:, pattern:, alias_name: nil, tag: nil, lineno:, definition_order:) + @id = id + @pattern = pattern + @alias_name = alias_name + @tag = tag + @lineno = lineno + @definition_order = definition_order + end + + # @rbs () -> String + def name + @id.s_value + end + + # Returns the regex pattern string (without slashes) + # @rbs () -> String + def regex_pattern + @pattern.pattern + end + end + end +end diff --git a/lib/lrama/length_precedences.rb b/lib/lrama/length_precedences.rb new file mode 100644 index 00000000..15ba218c --- /dev/null +++ b/lib/lrama/length_precedences.rb @@ -0,0 +1,57 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + # Length precedences table for PSLR(1) + # Based on Definition 3.2.15 from the PSLR dissertation + # + # Determines which token should be preferred when there's a length conflict: + # - :left - the shorter token (t1) should be preferred + # - :right - the longer token (t2) should be preferred + # - :undefined - no preference defined, use default (longest match) + class LengthPrecedences + # Result of length precedence lookup + LEFT = :left #: Symbol + RIGHT = :right #: Symbol + UNDEFINED = :undefined #: Symbol + + attr_reader :table #: Hash[[String, String], Symbol] + + # @rbs (Grammar::LexPrec lex_prec) -> void + def initialize(lex_prec) + @table = build_table(lex_prec) + end + + # Get the length precedence between two tokens + # @rbs (String t1, String t2) -> Symbol + def precedence(t1, t2) + @table[[t1, t2]] || UNDEFINED + end + + # Check if t1 (shorter) should be preferred over t2 (longer) + # @rbs (String t1, String t2) -> bool + def prefer_shorter?(t1, t2) + precedence(t1, t2) == LEFT + end + + private + + # Build the length precedence table from lex-prec rules + # @rbs (Grammar::LexPrec lex_prec) -> Hash[[String, String], Symbol] + def build_table(lex_prec) + table = {} + + lex_prec.rules.each do |rule| + case rule.operator + when Grammar::LexPrec::SHORTER + # t1 -s t2: t1 (shorter) should be preferred over t2 (longer) + table[[rule.left_name, rule.right_name]] = LEFT + # Inverse: t2 (longer) should not be preferred over t1 (shorter) + table[[rule.right_name, rule.left_name]] = RIGHT + end + end + + table + end + end +end diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index ce98b505..4c4eabc6 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -18,7 +18,8 @@ class Lexer # [::Symbol, Token::Char] | # [::Symbol, Token::Str] | # [::Symbol, Token::Int] | - # [::Symbol, Token::Ident] + # [::Symbol, Token::Ident] | + # [::Symbol, Token::Regex] # # type c_token = [:C_DECLARATION, Token::UserCode] @@ -32,6 +33,7 @@ class Lexer PERCENT_TOKENS = %w( %union %token + %token-pattern %type %nterm %left @@ -43,6 +45,7 @@ class Lexer %printer %destructor %lex-param + %lex-prec %parse-param %initial-action %precedence @@ -121,7 +124,7 @@ def lex_token return when @scanner.scan(/#{SYMBOLS.join('|')}/) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] - when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + when @scanner.scan(/#{PERCENT_TOKENS.sort_by { |s| -s.length }.join('|')}/) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/[\?\+\*]/) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] @@ -133,6 +136,12 @@ def lex_token return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/".*?"/) return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)] + when @scanner.scan(%r{/[^/]+/}) + return [:REGEX, Lrama::Lexer::Token::Regex.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/-s(?=\s)/) + return ['-s', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/-(?=\s)/) + return ['-', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/\d+/) return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)] when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) diff --git a/lib/lrama/lexer/token.rb b/lib/lrama/lexer/token.rb index 37f77aa0..3932486e 100644 --- a/lib/lrama/lexer/token.rb +++ b/lib/lrama/lexer/token.rb @@ -7,6 +7,7 @@ require_relative 'token/ident' require_relative 'token/instantiate_rule' require_relative 'token/int' +require_relative 'token/regex' require_relative 'token/str' require_relative 'token/tag' require_relative 'token/token' diff --git a/lib/lrama/lexer/token/regex.rb b/lib/lrama/lexer/token/regex.rb new file mode 100644 index 00000000..c4295f40 --- /dev/null +++ b/lib/lrama/lexer/token/regex.rb @@ -0,0 +1,19 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Lexer + module Token + # Token class for regex patterns used in %token-pattern directive + # Example: /[a-zA-Z_][a-zA-Z0-9_]*/ + class Regex < Base + # Returns the regex pattern without the surrounding slashes + # @rbs () -> String + def pattern + # Remove leading and trailing slashes + s_value[1..-2].to_s + end + end + end + end +end diff --git a/lib/lrama/output.rb b/lib/lrama/output.rb index d527be8b..2b716eb9 100644 --- a/lib/lrama/output.rb +++ b/lib/lrama/output.rb @@ -401,6 +401,243 @@ def percent_code(name) end.join end + # PSLR Output Helper Methods + # Based on PSLR::OutputHelper - generates PSLR-specific C code + + # Check if PSLR output is needed + def pslr_enabled? + scanner_fsa = @context.states.scanner_fsa + !scanner_fsa.nil? && !scanner_fsa.states.empty? + end + + # Generate Scanner FSA transition table as C code + def scanner_transition_table + return "" unless pslr_enabled? + scanner_fsa = @context.states.scanner_fsa + + lines = [] + lines << "/* Scanner FSA transition table */" + lines << "#define YY_SCANNER_NUM_STATES #{scanner_fsa.states.size}" + lines << "#define YY_SCANNER_INVALID_STATE (-1)" + lines << "" + lines << "static const int yy_scanner_transition[YY_SCANNER_NUM_STATES][256] = {" + + scanner_fsa.states.each_with_index do |state, idx| + transitions = Array.new(256, -1) + state.transitions.each do |char, target_id| + transitions[char.ord] = target_id + end + lines << " /* state #{idx} */ {#{transitions.join(', ')}}#{idx < scanner_fsa.states.size - 1 ? ',' : ''}" + end + + lines << "};" + lines.join("\n") + end + + # Generate state_to_accepting table as C code + def state_to_accepting_table + return "" unless pslr_enabled? + scanner_fsa = @context.states.scanner_fsa + + lines = [] + lines << "" + lines << "/* FSA state -> accepting state mapping */" + lines << "#define YY_ACCEPTING_NONE (-1)" + lines << "" + lines << "static const int yy_state_to_accepting[YY_SCANNER_NUM_STATES] = {" + + accepting_ids = scanner_fsa.states.map do |state| + state.accepting? ? state.id : -1 + end + + lines << " #{accepting_ids.join(', ')}" + lines << "};" + lines.join("\n") + end + + # Generate token IDs for accepting states as C code + def accepting_tokens_table + return "" unless pslr_enabled? + scanner_fsa = @context.states.scanner_fsa + + lines = [] + lines << "" + lines << "/* Accepting state token IDs */" + lines << "/* For each accepting state, list of (token_id, definition_order) pairs */" + lines << "" + + # Collect all unique tokens + all_tokens = @context.states.token_patterns.map(&:name) + lines << "/* Token pattern names: #{all_tokens.join(', ')} */" + lines << "" + + # Generate accepting tokens for each FSA state + scanner_fsa.states.each do |state| + next unless state.accepting? + + token_names = state.accepting_tokens.map(&:name) + lines << "/* State #{state.id} accepts: #{token_names.join(', ')} */" + end + + lines.join("\n") + end + + # Generate scanner_accepts table as C code + def scanner_accepts_table_code + return "" unless pslr_enabled? + scanner_fsa = @context.states.scanner_fsa + scanner_accepts = @context.states.scanner_accepts_table + return "" unless scanner_accepts + + lines = [] + lines << "" + lines << "/* scanner_accepts[parser_state][accepting_state] -> token_id */" + lines << "/* YYEMPTY = -2, means no token accepted */" + lines << "" + + num_parser_states = @context.states.states.size + num_accepting_states = scanner_fsa.states.count(&:accepting?) + + lines << "#define YY_NUM_PARSER_STATES #{num_parser_states}" + lines << "#define YY_NUM_ACCEPTING_STATES #{num_accepting_states}" + lines << "" + + if num_accepting_states > 0 + lines << "static const int yy_scanner_accepts[YY_NUM_PARSER_STATES][YY_NUM_ACCEPTING_STATES] = {" + + @context.states.states.each_with_index do |parser_state, ps_idx| + row = [] + scanner_fsa.states.each do |fsa_state| + next unless fsa_state.accepting? + + token = scanner_accepts[parser_state.id, fsa_state.id] + if token + # Use definition order as token ID for now + row << token.definition_order + else + row << -2 # YYEMPTY + end + end + + lines << " /* parser state #{ps_idx} */ {#{row.join(', ')}}#{ps_idx < num_parser_states - 1 ? ',' : ''}" + end + + lines << "};" + end + + lines.join("\n") + end + + # Generate length_precedences table as C code + def length_precedences_table_code + return "" unless pslr_enabled? + length_precedences = @context.states.length_precedences + return "" unless length_precedences + + lines = [] + lines << "" + lines << "/* length_precedences[token1][token2] -> precedence */" + lines << "#define YY_LENGTH_PREC_UNDEFINED 0" + lines << "#define YY_LENGTH_PREC_LEFT 1 /* shorter token wins */" + lines << "#define YY_LENGTH_PREC_RIGHT 2 /* longer token wins */" + lines << "" + + num_tokens = @context.states.token_patterns.size + if num_tokens > 0 + lines << "static const int yy_length_precedences[#{num_tokens}][#{num_tokens}] = {" + + @context.states.token_patterns.each_with_index do |t1, i| + row = @context.states.token_patterns.map do |t2| + case length_precedences.precedence(t1.name, t2.name) + when :left then 1 + when :right then 2 + else 0 + end + end + lines << " /* #{t1.name} */ {#{row.join(', ')}}#{i < num_tokens - 1 ? ',' : ''}" + end + + lines << "};" + end + + lines.join("\n") + end + + # Generate pseudo_scan function as C code + def pseudo_scan_function + return "" unless pslr_enabled? + + <<~C_CODE + + /* + * pseudo_scan: PSLR(1) scanning function + * Based on Definition 3.2.16 from the PSLR dissertation + * + * Input: + * parser_state: Current parser state + * input: Input buffer pointer + * match_length: Output parameter for matched length + * + * Returns: Selected token ID, or YYEMPTY if no match + */ + static int + yy_pseudo_scan(int parser_state, const char *input, int *match_length) + { + int ss = 0; /* FSA initial state */ + int ibest = 0; + int tbest = YYEMPTY; + int i = 0; + + while (input[i] != '\\0') { + int c = (unsigned char)input[i]; + int next_ss = yy_scanner_transition[ss][c]; + + if (next_ss == YY_SCANNER_INVALID_STATE) { + break; + } + + ss = next_ss; + i++; + + /* Check if this is an accepting state */ + int sa = yy_state_to_accepting[ss]; + if (sa != YY_ACCEPTING_NONE) { + int t = yy_scanner_accepts[parser_state][sa]; + if (t != YYEMPTY) { + /* Check length precedences */ + if (tbest == YYEMPTY || + (i > ibest && yy_length_precedences[tbest][t] != YY_LENGTH_PREC_LEFT) || + (i == ibest && yy_length_precedences[t][tbest] == YY_LENGTH_PREC_LEFT)) { + tbest = t; + ibest = i; + } + } + } + } + + *match_length = ibest; + return tbest; + } + C_CODE + end + + # Generate all PSLR C code + def pslr_tables_and_functions + return "" unless pslr_enabled? + + [ + "/* PSLR(1) Scanner Tables and Functions */", + "/* Generated by Lrama PSLR implementation */", + "", + scanner_transition_table, + state_to_accepting_table, + accepting_tokens_table, + scanner_accepts_table_code, + length_precedences_table_code, + pseudo_scan_function + ].join("\n") + end + private def eval_template(file, path) diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 20c3ad34..cdc931b6 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -655,7 +655,7 @@ def token_to_str(t) module Lrama class Parser < Racc::Parser -module_eval(<<'...end parser.y/module_eval...', 'parser.y', 504) +module_eval(<<'...end parser.y/module_eval...', 'parser.y', 576) include Lrama::Tracer::Duration @@ -745,322 +745,351 @@ def raise_parse_error(error_message, location) ##### State transition tables begin ### racc_action_table = [ - 98, 98, 99, 99, 87, 53, 53, 52, 178, 110, - 110, 97, 53, 53, 184, 178, 110, 110, 53, 181, - 184, 162, 110, 6, 163, 181, 181, 53, 53, 52, - 52, 181, 79, 79, 53, 53, 52, 52, 43, 79, - 79, 53, 4, 52, 5, 110, 88, 94, 182, 125, - 126, 163, 100, 100, 180, 193, 194, 195, 137, 185, - 188, 180, 4, 44, 5, 185, 188, 94, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 46, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 47, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 47, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 12, 13, 50, - 57, 14, 15, 16, 17, 18, 19, 20, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 57, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 12, 13, 57, - 60, 14, 15, 16, 17, 18, 19, 20, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 57, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 53, 53, 52, - 52, 110, 105, 53, 53, 52, 52, 110, 105, 53, - 53, 52, 52, 110, 105, 53, 53, 52, 52, 110, - 105, 53, 53, 52, 52, 110, 110, 53, 53, 52, - 209, 110, 110, 53, 53, 209, 52, 110, 110, 53, - 53, 209, 52, 110, 193, 194, 195, 137, 216, 222, - 229, 217, 217, 217, 53, 53, 52, 52, 193, 194, - 195, 57, 57, 57, 57, 66, 67, 68, 69, 70, - 72, 72, 72, 86, 89, 47, 57, 57, 113, 117, - 117, 79, 123, 124, 131, 47, 133, 137, 139, 143, - 149, 150, 151, 152, 133, 155, 156, 157, 110, 166, - 149, 169, 172, 173, 72, 175, 176, 183, 189, 166, - 196, 137, 200, 202, 137, 166, 211, 166, 137, 72, - 176, 218, 176, 72, 72, 227, 137, 72 ] + 105, 105, 106, 106, 94, 4, 55, 5, 200, 55, + 117, 206, 104, 117, 55, 6, 200, 55, 117, 206, + 203, 117, 55, 203, 54, 141, 142, 86, 203, 45, + 55, 203, 54, 46, 55, 86, 54, 48, 55, 86, + 54, 49, 55, 86, 54, 185, 117, 112, 49, 101, + 184, 55, 95, 54, 52, 117, 112, 107, 107, 202, + 185, 207, 210, 239, 4, 204, 5, 202, 238, 207, + 210, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 101, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 55, 239, 54, 43, 117, 112, 244, 215, 216, + 217, 153, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 59, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 12, 13, 59, 43, 14, 15, 16, 17, + 18, 19, 20, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 59, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 12, 13, 62, 43, 14, 15, 16, + 17, 18, 19, 20, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 59, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 55, 239, 54, 43, 117, 112, + 251, 55, 55, 54, 54, 117, 117, 55, 55, 54, + 54, 117, 117, 55, 55, 54, 231, 117, 117, 55, + 55, 231, 54, 117, 117, 55, 55, 231, 54, 117, + 215, 216, 217, 153, 55, 59, 54, 129, 130, 131, + 129, 130, 131, 55, 55, 54, 54, 55, 55, 54, + 54, 55, 59, 54, 215, 216, 217, 59, 59, 68, + 69, 70, 71, 72, 74, 74, 80, 74, 74, 93, + 96, 49, 59, 59, 120, 124, 127, 133, 133, 86, + 139, 140, 147, 49, 149, 153, 155, 159, 127, 127, + 163, 164, 165, 170, 171, 172, 173, 149, 176, 177, + 178, 117, 117, 188, 170, 191, 194, 195, 74, 197, + 198, 205, 211, 188, 218, 153, 222, 224, 153, 188, + 233, 188, 153, 74, 198, 240, 198, 74, 74, 249, + 153, 74 ] racc_action_check = [ - 51, 97, 51, 97, 41, 75, 165, 75, 165, 75, - 165, 51, 171, 190, 171, 190, 171, 190, 201, 165, - 201, 148, 201, 1, 148, 171, 190, 36, 37, 36, - 37, 201, 36, 37, 38, 39, 38, 39, 5, 38, - 39, 117, 0, 117, 0, 117, 41, 46, 168, 88, - 88, 168, 51, 97, 165, 177, 177, 177, 177, 171, - 171, 190, 2, 6, 2, 201, 201, 90, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 9, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 10, 90, 90, - 90, 90, 90, 90, 90, 90, 90, 11, 90, 90, - 90, 90, 90, 90, 90, 90, 90, 3, 3, 12, - 14, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 15, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 8, 8, 16, - 17, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 18, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 63, 13, 63, - 13, 63, 63, 64, 73, 64, 73, 64, 64, 65, - 78, 65, 78, 65, 65, 106, 79, 106, 79, 106, - 106, 118, 180, 118, 180, 118, 180, 188, 196, 188, - 196, 188, 196, 202, 217, 202, 217, 202, 217, 218, - 113, 218, 113, 218, 186, 186, 186, 186, 208, 213, - 226, 208, 213, 226, 114, 123, 114, 123, 210, 210, - 210, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 40, 42, 47, 55, 60, 71, 74, - 76, 80, 81, 87, 91, 92, 93, 94, 102, 116, - 124, 125, 126, 127, 133, 136, 137, 138, 144, 150, - 151, 153, 156, 158, 162, 163, 164, 170, 174, 176, - 178, 179, 182, 184, 187, 189, 199, 200, 204, 205, - 207, 209, 212, 214, 216, 221, 222, 228 ] + 53, 104, 53, 104, 43, 0, 187, 0, 187, 193, + 187, 193, 53, 193, 212, 1, 212, 223, 212, 223, + 187, 223, 38, 193, 38, 95, 95, 38, 212, 5, + 39, 223, 39, 6, 40, 39, 40, 9, 41, 40, + 41, 10, 65, 41, 65, 169, 65, 65, 11, 48, + 169, 66, 43, 66, 12, 66, 66, 53, 104, 187, + 190, 193, 193, 230, 2, 190, 2, 212, 230, 223, + 223, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 97, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 67, 235, 67, 48, 67, 67, 235, 199, 199, + 199, 199, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 14, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 3, 3, 15, 97, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 16, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 8, 8, 17, 3, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 18, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 113, 248, 113, 8, 113, 113, + 248, 82, 133, 82, 133, 82, 133, 134, 202, 134, + 202, 134, 202, 210, 218, 210, 218, 210, 218, 224, + 239, 224, 239, 224, 239, 240, 13, 240, 13, 240, + 208, 208, 208, 208, 75, 24, 75, 79, 79, 79, + 80, 80, 80, 85, 86, 85, 86, 120, 121, 120, + 121, 139, 25, 139, 232, 232, 232, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, + 44, 49, 57, 62, 73, 76, 77, 81, 83, 87, + 88, 94, 98, 99, 100, 101, 109, 123, 124, 125, + 127, 128, 132, 140, 141, 142, 143, 149, 152, 153, + 154, 160, 163, 171, 172, 174, 177, 179, 184, 185, + 186, 192, 196, 198, 200, 201, 204, 206, 209, 211, + 221, 222, 226, 227, 229, 231, 234, 236, 238, 243, + 244, 250 ] racc_action_pointer = [ - 32, 23, 52, 93, nil, 31, 63, nil, 123, 68, - 74, 84, 103, 165, 94, 111, 123, 135, 141, nil, - nil, nil, nil, nil, 215, 216, 217, 218, 230, 231, - 232, 233, 234, 232, 233, 234, 24, 25, 31, 32, - 238, -1, 242, nil, nil, nil, 43, 232, nil, nil, - nil, -5, nil, nil, nil, 230, nil, nil, nil, nil, - 231, nil, nil, 164, 170, 176, nil, nil, nil, nil, - nil, 240, nil, 171, 241, 2, 242, nil, 177, 183, - 243, 244, nil, nil, nil, nil, nil, 209, 45, nil, - 63, 245, 242, 243, 202, nil, nil, -4, nil, nil, - nil, nil, 256, nil, nil, nil, 182, nil, nil, nil, - nil, nil, nil, 207, 221, nil, 253, 38, 188, nil, - nil, nil, nil, 222, 255, 215, 218, 252, nil, nil, - nil, nil, nil, 251, nil, nil, 219, 261, 250, nil, - nil, nil, nil, nil, 261, nil, nil, nil, -24, nil, - 219, 265, nil, 269, nil, nil, 216, nil, 256, nil, - nil, nil, 266, 270, 227, 3, nil, nil, 3, nil, - 228, 9, nil, nil, 232, nil, 229, 3, 236, 226, - 189, nil, 236, nil, 239, nil, 162, 229, 194, 235, - 10, nil, nil, nil, nil, nil, 195, nil, nil, 284, - 237, 15, 200, nil, 233, 281, nil, 241, 173, 247, - 176, nil, 243, 174, 285, nil, 286, 201, 206, nil, - nil, 278, 241, nil, nil, nil, 175, nil, 289, nil, - nil ] + -6, 15, 53, 107, nil, 22, 33, nil, 138, 27, + 27, 34, 48, 213, 94, 107, 125, 150, 156, nil, + nil, nil, nil, nil, 208, 225, 230, 231, 244, 245, + 246, 247, 248, 246, 247, 251, 249, 250, 19, 27, + 31, 35, 254, -1, 258, nil, nil, nil, 45, 247, + nil, nil, nil, -5, nil, nil, nil, 245, nil, nil, + nil, nil, 246, nil, nil, 39, 48, 88, nil, nil, + nil, nil, nil, 256, nil, 221, 257, 261, nil, 181, + 184, 259, 188, 260, nil, 230, 231, 261, 262, nil, + nil, nil, nil, nil, 221, 21, nil, 76, 262, 259, + 260, 215, nil, nil, -4, nil, nil, nil, nil, 274, + nil, nil, nil, 181, nil, nil, nil, nil, nil, nil, + 234, 235, nil, 271, 273, 274, nil, 271, 276, nil, + nil, nil, 277, 189, 194, nil, nil, nil, nil, 238, + 278, 232, 235, 274, nil, nil, nil, nil, nil, 273, + nil, nil, 236, 284, 272, nil, nil, nil, nil, nil, + 284, nil, nil, 285, nil, nil, nil, nil, nil, -1, + nil, 238, 289, nil, 293, nil, nil, 235, nil, 279, + nil, nil, nil, nil, 290, 294, 246, 3, nil, nil, + 14, nil, 247, 6, nil, nil, 250, nil, 248, 41, + 254, 245, 195, nil, 254, nil, 257, nil, 163, 248, + 200, 254, 11, nil, nil, nil, nil, nil, 201, nil, + nil, 308, 256, 14, 206, nil, 252, 305, nil, 260, + 17, 265, 187, nil, 262, 46, 309, nil, 310, 207, + 212, nil, nil, 301, 260, nil, nil, nil, 139, nil, + 313, nil, nil ] racc_action_default = [ - -1, -136, -1, -3, -10, -136, -136, -2, -3, -136, - -14, -14, -136, -136, -136, -136, -136, -136, -136, -28, - -29, -34, -35, -36, -136, -136, -136, -136, -136, -136, - -136, -136, -136, -54, -54, -54, -136, -136, -136, -136, - -136, -136, -136, -13, 231, -4, -136, -14, -16, -17, - -20, -131, -100, -101, -130, -18, -23, -89, -24, -25, - -136, -27, -37, -136, -136, -136, -41, -42, -43, -44, - -45, -46, -55, -136, -47, -136, -48, -49, -92, -136, - -95, -97, -98, -50, -51, -52, -53, -136, -136, -11, - -5, -7, -14, -136, -72, -15, -21, -131, -132, -133, - -134, -19, -136, -26, -30, -31, -32, -38, -87, -88, - -135, -39, -40, -136, -56, -58, -60, -136, -83, -85, - -93, -94, -96, -136, -136, -136, -136, -136, -6, -8, - -9, -128, -104, -102, -105, -73, -136, -136, -136, -90, - -33, -59, -57, -61, -80, -86, -84, -99, -136, -66, - -70, -136, -12, -136, -103, -109, -136, -22, -136, -62, - -81, -82, -54, -136, -64, -68, -71, -74, -136, -129, - -106, -107, -127, -91, -136, -67, -70, -72, -100, -72, - -136, -124, -136, -109, -100, -110, -72, -72, -136, -70, - -69, -75, -76, -116, -117, -118, -136, -78, -79, -136, - -70, -108, -136, -111, -72, -54, -115, -63, -136, -100, - -119, -125, -65, -136, -54, -114, -54, -136, -136, -120, - -121, -136, -72, -112, -77, -122, -136, -126, -54, -123, - -113 ] + -1, -149, -1, -3, -10, -149, -149, -2, -3, -149, + -14, -14, -149, -149, -149, -149, -149, -149, -149, -28, + -29, -34, -35, -36, -149, -149, -149, -149, -149, -149, + -149, -149, -149, -56, -56, -149, -56, -56, -149, -149, + -149, -149, -149, -149, -149, -13, 253, -4, -149, -14, + -16, -17, -20, -144, -113, -114, -143, -18, -23, -102, + -24, -25, -149, -27, -37, -149, -149, -149, -41, -42, + -43, -44, -45, -46, -57, -149, -47, -149, -48, -70, + -149, -49, -149, -50, -51, -105, -149, -108, -110, -111, + -52, -53, -54, -55, -149, -149, -11, -5, -7, -14, + -149, -85, -15, -21, -144, -145, -146, -147, -19, -149, + -26, -30, -31, -32, -38, -100, -101, -148, -39, -40, + -149, -58, -60, -62, -149, -65, -67, -149, -149, -73, + -74, -75, -149, -149, -96, -98, -106, -107, -109, -149, + -149, -149, -149, -149, -6, -8, -9, -141, -117, -115, + -118, -86, -149, -149, -149, -103, -33, -61, -59, -63, + -93, -68, -66, -93, -72, -71, -99, -97, -112, -149, + -79, -83, -149, -12, -149, -116, -122, -149, -22, -149, + -64, -94, -95, -69, -56, -149, -77, -81, -84, -87, + -149, -142, -119, -120, -140, -104, -149, -80, -83, -85, + -113, -85, -149, -137, -149, -122, -113, -123, -85, -85, + -149, -83, -82, -88, -89, -129, -130, -131, -149, -91, + -92, -149, -83, -121, -149, -124, -85, -56, -128, -76, + -149, -113, -132, -138, -78, -149, -56, -127, -56, -149, + -149, -133, -134, -149, -85, -125, -90, -135, -149, -139, + -56, -136, -126 ] racc_goto_table = [ - 73, 118, 136, 54, 48, 49, 164, 96, 91, 120, - 121, 93, 187, 148, 107, 111, 112, 119, 134, 171, - 56, 58, 59, 3, 61, 7, 78, 78, 78, 78, - 62, 63, 64, 65, 115, 74, 76, 192, 1, 129, - 168, 95, 187, 118, 118, 207, 204, 201, 77, 83, - 84, 85, 128, 138, 147, 93, 212, 140, 154, 145, - 146, 101, 130, 116, 42, 127, 103, 208, 78, 78, - 219, 9, 51, 213, 141, 142, 45, 71, 159, 144, - 190, 160, 161, 102, 158, 191, 132, 197, 122, 226, - 170, 177, 220, 199, 203, 205, 221, 186, 153, nil, - nil, nil, nil, 116, 116, nil, 198, nil, nil, nil, - nil, nil, 214, 78, 206, nil, 177, nil, nil, nil, - nil, nil, 210, nil, nil, nil, nil, 186, 210, 174, - 228, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, 225, 210, nil, nil, nil, nil, nil, + 75, 77, 134, 152, 50, 51, 135, 56, 136, 137, + 103, 98, 186, 169, 209, 100, 150, 214, 114, 118, + 119, 1, 122, 126, 9, 230, 226, 193, 145, 47, + 63, 235, 85, 85, 85, 85, 64, 65, 66, 67, + 58, 60, 61, 102, 209, 190, 180, 248, 181, 183, + 241, 181, 229, 134, 134, 146, 223, 166, 167, 44, + 144, 154, 168, 234, 100, 175, 156, 157, 158, 123, + 161, 162, 81, 83, 110, 84, 90, 91, 92, 85, + 85, 143, 3, 108, 7, 128, 132, 53, 73, 76, + 78, 160, 79, 148, 212, 109, 179, 138, 192, 242, + 221, 213, 243, 219, 174, nil, nil, 199, nil, nil, + 225, 227, nil, 208, 123, 123, nil, nil, nil, nil, + nil, nil, 220, nil, nil, nil, nil, nil, 236, nil, + 228, nil, 199, 85, nil, nil, nil, nil, 232, nil, + nil, nil, nil, 208, 232, nil, 250, nil, nil, nil, + nil, 196, nil, nil, nil, nil, nil, nil, nil, 247, + 232, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, 215, nil, nil, nil, nil, nil, nil, nil, - nil, 223, nil, 224, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 230 ] + nil, nil, nil, nil, 237, nil, nil, nil, nil, nil, + nil, nil, nil, 245, nil, 246, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 252 ] racc_goto_check = [ - 29, 22, 42, 31, 14, 14, 35, 16, 8, 48, - 48, 13, 40, 34, 24, 24, 24, 45, 52, 54, - 18, 18, 18, 6, 17, 6, 31, 31, 31, 31, - 17, 17, 17, 17, 30, 26, 26, 38, 1, 5, - 34, 14, 40, 22, 22, 35, 38, 54, 27, 27, - 27, 27, 8, 16, 48, 13, 35, 24, 52, 45, - 45, 18, 9, 31, 10, 11, 17, 39, 31, 31, - 38, 7, 15, 39, 30, 30, 7, 25, 32, 33, - 36, 43, 44, 46, 47, 42, 14, 42, 50, 39, - 53, 22, 55, 56, 42, 42, 57, 22, 58, nil, - nil, nil, nil, 31, 31, nil, 22, nil, nil, nil, - nil, nil, 42, 31, 22, nil, 22, nil, nil, nil, - nil, nil, 22, nil, nil, nil, nil, 22, 22, 29, - 42, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, 22, 22, nil, nil, nil, nil, nil, + 31, 31, 22, 48, 14, 14, 51, 33, 54, 54, + 16, 8, 41, 40, 46, 13, 58, 44, 24, 24, + 24, 1, 32, 37, 7, 45, 44, 60, 5, 7, + 17, 45, 33, 33, 33, 33, 17, 17, 17, 17, + 18, 18, 18, 14, 46, 40, 34, 45, 49, 34, + 44, 49, 41, 22, 22, 9, 60, 51, 51, 10, + 8, 16, 54, 41, 13, 58, 24, 32, 32, 33, + 37, 37, 28, 28, 17, 29, 29, 29, 29, 33, + 33, 11, 6, 18, 6, 39, 39, 15, 25, 26, + 27, 35, 38, 14, 42, 52, 53, 56, 59, 61, + 62, 48, 63, 48, 64, nil, nil, 22, nil, nil, + 48, 48, nil, 22, 33, 33, nil, nil, nil, nil, + nil, nil, 22, nil, nil, nil, nil, nil, 48, nil, + 22, nil, 22, 33, nil, nil, nil, nil, 22, nil, + nil, nil, nil, 22, 22, nil, 48, nil, nil, nil, + nil, 31, nil, nil, nil, nil, nil, nil, nil, 22, + 22, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, 29, nil, nil, nil, nil, nil, nil, nil, - nil, 29, nil, 29, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 29 ] + nil, nil, nil, nil, 31, nil, nil, nil, nil, nil, + nil, nil, nil, 31, nil, 31, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, 31 ] racc_goto_pointer = [ - nil, 38, nil, nil, nil, -52, 23, 68, -38, -29, - 60, -24, nil, -35, -6, 59, -44, 6, 6, nil, - nil, nil, -74, nil, -49, 44, 1, 12, nil, -33, - -39, -10, -66, -37, -111, -144, -96, nil, -140, -129, - -159, nil, -92, -63, -62, -58, 26, -55, -69, nil, - 8, nil, -75, -65, -136, -118, -88, -115, -33 ] + nil, 21, nil, nil, nil, -70, 82, 21, -37, -43, + 55, -15, nil, -33, -6, 74, -43, 12, 26, nil, + nil, nil, -80, nil, -47, 55, 55, 55, 36, 37, + nil, -33, -53, -6, -114, -32, nil, -54, 57, 6, + -127, -159, -104, nil, -182, -193, -179, nil, -98, -112, + nil, -76, 36, -59, -77, nil, 10, nil, -84, -78, + -149, -133, -103, -131, -43 ] racc_goto_default = [ - nil, nil, 2, 8, 90, nil, nil, nil, nil, nil, - nil, nil, 10, 11, nil, nil, nil, 55, nil, 21, - 22, 23, 104, 106, nil, nil, nil, nil, 114, 75, - nil, 108, nil, nil, nil, nil, 165, 135, nil, nil, - 179, 167, nil, 109, nil, nil, nil, nil, 81, 80, - 82, 92, nil, nil, nil, nil, nil, nil, nil ] + nil, nil, 2, 8, 97, nil, nil, nil, nil, nil, + nil, nil, 10, 11, nil, nil, nil, 57, nil, 21, + 22, 23, 111, 113, nil, nil, nil, nil, nil, nil, + 121, 82, nil, 115, nil, nil, 125, nil, nil, nil, + nil, nil, 187, 151, nil, nil, 201, 189, nil, 116, + 182, nil, nil, nil, 88, 87, 89, 99, nil, nil, + nil, nil, nil, nil, nil ] racc_reduce_table = [ 0, 0, :racc_error, - 0, 64, :_reduce_1, - 2, 64, :_reduce_2, - 0, 65, :_reduce_3, - 2, 65, :_reduce_4, - 1, 66, :_reduce_5, - 2, 66, :_reduce_6, - 0, 67, :_reduce_none, - 1, 67, :_reduce_none, - 5, 59, :_reduce_none, - 0, 68, :_reduce_10, - 0, 69, :_reduce_11, - 5, 60, :_reduce_12, - 2, 60, :_reduce_13, - 0, 72, :_reduce_14, - 2, 72, :_reduce_15, - 2, 61, :_reduce_none, - 2, 61, :_reduce_none, - 1, 76, :_reduce_18, - 2, 76, :_reduce_19, - 2, 70, :_reduce_20, - 3, 70, :_reduce_21, - 5, 70, :_reduce_22, - 2, 70, :_reduce_none, - 2, 70, :_reduce_24, - 2, 70, :_reduce_25, - 3, 70, :_reduce_26, - 2, 70, :_reduce_27, - 1, 70, :_reduce_28, - 1, 70, :_reduce_29, - 1, 81, :_reduce_30, - 1, 81, :_reduce_31, - 1, 82, :_reduce_32, - 2, 82, :_reduce_33, - 1, 71, :_reduce_none, - 1, 71, :_reduce_none, - 1, 71, :_reduce_none, - 2, 71, :_reduce_37, - 3, 71, :_reduce_38, - 3, 71, :_reduce_39, - 3, 71, :_reduce_40, - 2, 71, :_reduce_41, - 2, 71, :_reduce_42, - 2, 71, :_reduce_43, - 2, 71, :_reduce_44, - 2, 71, :_reduce_45, - 2, 77, :_reduce_none, - 2, 77, :_reduce_47, - 2, 77, :_reduce_48, - 2, 77, :_reduce_49, - 2, 77, :_reduce_50, - 2, 77, :_reduce_51, - 2, 77, :_reduce_52, - 2, 77, :_reduce_53, - 0, 87, :_reduce_none, - 1, 87, :_reduce_none, - 1, 88, :_reduce_56, - 2, 88, :_reduce_57, - 2, 83, :_reduce_58, - 3, 83, :_reduce_59, - 0, 91, :_reduce_none, - 1, 91, :_reduce_none, - 3, 86, :_reduce_62, - 8, 78, :_reduce_63, - 5, 79, :_reduce_64, - 8, 79, :_reduce_65, - 1, 92, :_reduce_66, - 3, 92, :_reduce_67, - 1, 93, :_reduce_68, - 3, 93, :_reduce_69, - 0, 99, :_reduce_none, - 1, 99, :_reduce_none, - 0, 100, :_reduce_none, - 1, 100, :_reduce_none, - 1, 94, :_reduce_74, - 3, 94, :_reduce_75, - 3, 94, :_reduce_76, - 6, 94, :_reduce_77, - 3, 94, :_reduce_78, - 3, 94, :_reduce_79, - 0, 102, :_reduce_none, - 1, 102, :_reduce_none, - 1, 90, :_reduce_82, - 1, 103, :_reduce_83, - 2, 103, :_reduce_84, - 2, 84, :_reduce_85, - 3, 84, :_reduce_86, - 1, 80, :_reduce_none, - 1, 80, :_reduce_none, - 0, 104, :_reduce_89, - 0, 105, :_reduce_90, - 5, 75, :_reduce_91, - 1, 106, :_reduce_92, - 2, 106, :_reduce_93, - 2, 107, :_reduce_94, - 1, 108, :_reduce_95, - 2, 108, :_reduce_96, - 1, 85, :_reduce_97, - 1, 85, :_reduce_98, - 3, 85, :_reduce_99, - 1, 89, :_reduce_none, - 1, 89, :_reduce_none, - 1, 110, :_reduce_102, - 2, 110, :_reduce_103, - 2, 62, :_reduce_none, - 2, 62, :_reduce_none, - 4, 109, :_reduce_106, - 1, 111, :_reduce_107, - 3, 111, :_reduce_108, - 0, 112, :_reduce_109, - 2, 112, :_reduce_110, - 3, 112, :_reduce_111, - 5, 112, :_reduce_112, - 7, 112, :_reduce_113, - 4, 112, :_reduce_114, - 3, 112, :_reduce_115, - 1, 96, :_reduce_116, - 1, 96, :_reduce_117, - 1, 96, :_reduce_118, + 0, 69, :_reduce_1, + 2, 69, :_reduce_2, + 0, 70, :_reduce_3, + 2, 70, :_reduce_4, + 1, 71, :_reduce_5, + 2, 71, :_reduce_6, + 0, 72, :_reduce_none, + 1, 72, :_reduce_none, + 5, 64, :_reduce_none, + 0, 73, :_reduce_10, + 0, 74, :_reduce_11, + 5, 65, :_reduce_12, + 2, 65, :_reduce_13, + 0, 77, :_reduce_14, + 2, 77, :_reduce_15, + 2, 66, :_reduce_none, + 2, 66, :_reduce_none, + 1, 81, :_reduce_18, + 2, 81, :_reduce_19, + 2, 75, :_reduce_20, + 3, 75, :_reduce_21, + 5, 75, :_reduce_22, + 2, 75, :_reduce_none, + 2, 75, :_reduce_24, + 2, 75, :_reduce_25, + 3, 75, :_reduce_26, + 2, 75, :_reduce_27, + 1, 75, :_reduce_28, + 1, 75, :_reduce_29, + 1, 86, :_reduce_30, + 1, 86, :_reduce_31, + 1, 87, :_reduce_32, + 2, 87, :_reduce_33, + 1, 76, :_reduce_none, + 1, 76, :_reduce_none, + 1, 76, :_reduce_none, + 2, 76, :_reduce_37, + 3, 76, :_reduce_38, + 3, 76, :_reduce_39, + 3, 76, :_reduce_40, + 2, 76, :_reduce_41, + 2, 76, :_reduce_42, + 2, 76, :_reduce_43, + 2, 76, :_reduce_44, + 2, 76, :_reduce_45, + 2, 82, :_reduce_none, + 2, 82, :_reduce_none, + 2, 82, :_reduce_none, + 2, 82, :_reduce_49, + 2, 82, :_reduce_50, + 2, 82, :_reduce_51, + 2, 82, :_reduce_52, + 2, 82, :_reduce_53, + 2, 82, :_reduce_54, + 2, 82, :_reduce_55, + 0, 94, :_reduce_none, + 1, 94, :_reduce_none, + 1, 95, :_reduce_58, + 2, 95, :_reduce_59, + 2, 88, :_reduce_60, + 3, 88, :_reduce_61, + 0, 98, :_reduce_none, + 1, 98, :_reduce_none, + 3, 93, :_reduce_64, + 1, 100, :_reduce_65, + 2, 100, :_reduce_66, + 2, 89, :_reduce_67, + 3, 89, :_reduce_68, + 3, 99, :_reduce_69, + 1, 90, :_reduce_70, + 3, 101, :_reduce_71, + 3, 101, :_reduce_72, + 1, 102, :_reduce_73, + 1, 102, :_reduce_74, + 1, 102, :_reduce_75, + 8, 83, :_reduce_76, + 5, 84, :_reduce_77, + 8, 84, :_reduce_78, + 1, 103, :_reduce_79, + 3, 103, :_reduce_80, + 1, 104, :_reduce_81, + 3, 104, :_reduce_82, + 0, 110, :_reduce_none, + 1, 110, :_reduce_none, + 0, 111, :_reduce_none, + 1, 111, :_reduce_none, + 1, 105, :_reduce_87, + 3, 105, :_reduce_88, + 3, 105, :_reduce_89, + 6, 105, :_reduce_90, + 3, 105, :_reduce_91, + 3, 105, :_reduce_92, 0, 113, :_reduce_none, 1, 113, :_reduce_none, - 2, 97, :_reduce_121, - 3, 97, :_reduce_122, - 4, 97, :_reduce_123, - 0, 114, :_reduce_124, - 0, 115, :_reduce_125, - 5, 98, :_reduce_126, - 3, 95, :_reduce_127, - 0, 116, :_reduce_128, - 3, 63, :_reduce_129, - 1, 73, :_reduce_none, - 0, 74, :_reduce_none, - 1, 74, :_reduce_none, - 1, 74, :_reduce_none, - 1, 74, :_reduce_none, - 1, 101, :_reduce_135 ] - -racc_reduce_n = 136 - -racc_shift_n = 231 + 1, 97, :_reduce_95, + 1, 114, :_reduce_96, + 2, 114, :_reduce_97, + 2, 91, :_reduce_98, + 3, 91, :_reduce_99, + 1, 85, :_reduce_none, + 1, 85, :_reduce_none, + 0, 115, :_reduce_102, + 0, 116, :_reduce_103, + 5, 80, :_reduce_104, + 1, 117, :_reduce_105, + 2, 117, :_reduce_106, + 2, 118, :_reduce_107, + 1, 119, :_reduce_108, + 2, 119, :_reduce_109, + 1, 92, :_reduce_110, + 1, 92, :_reduce_111, + 3, 92, :_reduce_112, + 1, 96, :_reduce_none, + 1, 96, :_reduce_none, + 1, 121, :_reduce_115, + 2, 121, :_reduce_116, + 2, 67, :_reduce_none, + 2, 67, :_reduce_none, + 4, 120, :_reduce_119, + 1, 122, :_reduce_120, + 3, 122, :_reduce_121, + 0, 123, :_reduce_122, + 2, 123, :_reduce_123, + 3, 123, :_reduce_124, + 5, 123, :_reduce_125, + 7, 123, :_reduce_126, + 4, 123, :_reduce_127, + 3, 123, :_reduce_128, + 1, 107, :_reduce_129, + 1, 107, :_reduce_130, + 1, 107, :_reduce_131, + 0, 124, :_reduce_none, + 1, 124, :_reduce_none, + 2, 108, :_reduce_134, + 3, 108, :_reduce_135, + 4, 108, :_reduce_136, + 0, 125, :_reduce_137, + 0, 126, :_reduce_138, + 5, 109, :_reduce_139, + 3, 106, :_reduce_140, + 0, 127, :_reduce_141, + 3, 68, :_reduce_142, + 1, 78, :_reduce_none, + 0, 79, :_reduce_none, + 1, 79, :_reduce_none, + 1, 79, :_reduce_none, + 1, 79, :_reduce_none, + 1, 112, :_reduce_148 ] + +racc_reduce_n = 149 + +racc_shift_n = 253 racc_token_table = { false => 0, @@ -1072,57 +1101,62 @@ def raise_parse_error(error_message, location) :INTEGER => 6, :STRING => 7, :TAG => 8, - "%%" => 9, - "%{" => 10, - "%}" => 11, - "%require" => 12, - ";" => 13, - "%expect" => 14, - "%define" => 15, - "{" => 16, - "}" => 17, - "%param" => 18, - "%lex-param" => 19, - "%parse-param" => 20, - "%code" => 21, - "%initial-action" => 22, - "%no-stdlib" => 23, - "%locations" => 24, - "%union" => 25, - "%destructor" => 26, - "%printer" => 27, - "%error-token" => 28, - "%after-shift" => 29, - "%before-reduce" => 30, - "%after-reduce" => 31, - "%after-shift-error-token" => 32, - "%after-pop-stack" => 33, - "-temp-group" => 34, - "%token" => 35, - "%type" => 36, - "%nterm" => 37, - "%left" => 38, - "%right" => 39, - "%precedence" => 40, - "%nonassoc" => 41, - "%start" => 42, - "%rule" => 43, - "(" => 44, - ")" => 45, - ":" => 46, - "%inline" => 47, - "," => 48, - "|" => 49, - "%empty" => 50, - "%prec" => 51, - "?" => 52, - "+" => 53, - "*" => 54, - "[" => 55, - "]" => 56, - "{...}" => 57 } - -racc_nt_base = 58 + :REGEX => 9, + "%%" => 10, + "%{" => 11, + "%}" => 12, + "%require" => 13, + ";" => 14, + "%expect" => 15, + "%define" => 16, + "{" => 17, + "}" => 18, + "%param" => 19, + "%lex-param" => 20, + "%parse-param" => 21, + "%code" => 22, + "%initial-action" => 23, + "%no-stdlib" => 24, + "%locations" => 25, + "%union" => 26, + "%destructor" => 27, + "%printer" => 28, + "%error-token" => 29, + "%after-shift" => 30, + "%before-reduce" => 31, + "%after-reduce" => 32, + "%after-shift-error-token" => 33, + "%after-pop-stack" => 34, + "-temp-group" => 35, + "%token" => 36, + "%token-pattern" => 37, + "%lex-prec" => 38, + "%type" => 39, + "%nterm" => 40, + "%left" => 41, + "%right" => 42, + "%precedence" => 43, + "%nonassoc" => 44, + "%start" => 45, + "," => 46, + "-" => 47, + "-s" => 48, + "%rule" => 49, + "(" => 50, + ")" => 51, + ":" => 52, + "%inline" => 53, + "|" => 54, + "%empty" => 55, + "%prec" => 56, + "?" => 57, + "+" => 58, + "*" => 59, + "[" => 60, + "]" => 61, + "{...}" => 62 } + +racc_nt_base = 63 racc_use_result_var = true @@ -1153,6 +1187,7 @@ def raise_parse_error(error_message, location) "INTEGER", "STRING", "TAG", + "REGEX", "\"%%\"", "\"%{\"", "\"%}\"", @@ -1180,6 +1215,8 @@ def raise_parse_error(error_message, location) "\"%after-pop-stack\"", "\"-temp-group\"", "\"%token\"", + "\"%token-pattern\"", + "\"%lex-prec\"", "\"%type\"", "\"%nterm\"", "\"%left\"", @@ -1187,12 +1224,14 @@ def raise_parse_error(error_message, location) "\"%precedence\"", "\"%nonassoc\"", "\"%start\"", + "\",\"", + "\"-\"", + "\"-s\"", "\"%rule\"", "\"(\"", "\")\"", "\":\"", "\"%inline\"", - "\",\"", "\"|\"", "\"%empty\"", "\"%prec\"", @@ -1228,6 +1267,8 @@ def raise_parse_error(error_message, location) "\"-group@symbol|TAG\"", "\"-many1@-group@symbol|TAG\"", "token_declarations", + "token_pattern_declarations", + "lex_prec_declarations", "symbol_declarations", "token_declarations_for_precedence", "token_declaration", @@ -1236,6 +1277,10 @@ def raise_parse_error(error_message, location) "id", "alias", "\"-option@INTEGER\"", + "token_pattern_declaration", + "\"-many1@token_pattern_declaration\"", + "lex_prec_chain", + "lex_prec_op", "rule_args", "rule_rhs_list", "rule_rhs", @@ -1583,8 +1628,12 @@ def _reduce_45(val, _values, result) # reduce 46 omitted -module_eval(<<'.,.,', 'parser.y', 136) - def _reduce_47(val, _values, result) +# reduce 47 omitted + +# reduce 48 omitted + +module_eval(<<'.,.,', 'parser.y', 138) + def _reduce_49(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| @grammar.add_type(id: id, tag: hash[:tag]) @@ -1595,8 +1644,8 @@ def _reduce_47(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 144) - def _reduce_48(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 146) + def _reduce_50(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| if @grammar.find_term_by_s_value(id.s_value) @@ -1611,8 +1660,8 @@ def _reduce_48(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 156) - def _reduce_49(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 158) + def _reduce_51(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id, tag: hash[:tag]) @@ -1625,8 +1674,8 @@ def _reduce_49(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 166) - def _reduce_50(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 168) + def _reduce_52(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id, tag: hash[:tag]) @@ -1639,8 +1688,8 @@ def _reduce_50(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 176) - def _reduce_51(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 178) + def _reduce_53(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id, tag: hash[:tag]) @@ -1653,8 +1702,8 @@ def _reduce_51(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 186) - def _reduce_52(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 188) + def _reduce_54(val, _values, result) val[1].each {|hash| hash[:tokens].each {|id| sym = @grammar.add_term(id: id, tag: hash[:tag]) @@ -1667,34 +1716,34 @@ def _reduce_52(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 196) - def _reduce_53(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 198) + def _reduce_55(val, _values, result) @grammar.set_start_nterm(val[1]) result end .,., -# reduce 54 omitted +# reduce 56 omitted -# reduce 55 omitted +# reduce 57 omitted -module_eval(<<'.,.,', 'parser.y', 214) - def _reduce_56(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 216) + def _reduce_58(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 214) - def _reduce_57(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 216) + def _reduce_59(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 202) - def _reduce_58(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 204) + def _reduce_60(val, _values, result) val[1].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1]&.s_value, tag: val[0], replace: true) } @@ -1703,8 +1752,8 @@ def _reduce_58(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 208) - def _reduce_59(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 210) + def _reduce_61(val, _values, result) val[2].each {|token_declaration| @grammar.add_term(id: token_declaration[0], alias_name: token_declaration[2], token_id: token_declaration[1]&.s_value, tag: val[1], replace: true) } @@ -1713,19 +1762,129 @@ def _reduce_59(val, _values, result) end .,., -# reduce 60 omitted +# reduce 62 omitted -# reduce 61 omitted +# reduce 63 omitted -module_eval(<<'.,.,', 'parser.y', 213) - def _reduce_62(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 215) + def _reduce_64(val, _values, result) result = val result end .,., -module_eval(<<'.,.,', 'parser.y', 218) - def _reduce_63(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 244) + def _reduce_65(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 244) + def _reduce_66(val, _values, result) + result = val[1] ? val[1].unshift(val[0]) : val + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 220) + def _reduce_67(val, _values, result) + val[1].each {|decl| + @grammar.add_token_pattern( + id: decl[:id], + pattern: decl[:pattern], + alias_name: decl[:alias], + tag: val[0], + lineno: decl[:id].first_line + ) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 232) + def _reduce_68(val, _values, result) + val[2].each {|decl| + @grammar.add_token_pattern( + id: decl[:id], + pattern: decl[:pattern], + alias_name: decl[:alias], + tag: val[1], + lineno: decl[:id].first_line + ) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 246) + def _reduce_69(val, _values, result) + result = { id: val[0], pattern: val[1], alias: val[2] } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 252) + def _reduce_70(val, _values, result) + val[0].each {|rule| + @grammar.add_lex_prec_rule( + left_token: rule[:left], + operator: rule[:op], + right_token: rule[:right], + lineno: rule[:left].first_line + ) + } + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 265) + def _reduce_71(val, _values, result) + result = [{ left: val[0], op: val[1], right: val[2] }] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 269) + def _reduce_72(val, _values, result) + last_right = val[0].last[:right] + result = val[0] + [{ left: last_right, op: val[1], right: val[2] }] + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 276) + def _reduce_73(val, _values, result) + result = Lrama::Grammar::LexPrec::SAME_PRIORITY + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 280) + def _reduce_74(val, _values, result) + result = Lrama::Grammar::LexPrec::HIGHER + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 284) + def _reduce_75(val, _values, result) + result = Lrama::Grammar::LexPrec::SHORTER + + result + end +.,., + +module_eval(<<'.,.,', 'parser.y', 290) + def _reduce_76(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[1].s_value, val[3], val[7], tag: val[5]) @grammar.add_parameterized_rule(rule) @@ -1733,8 +1892,8 @@ def _reduce_63(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 225) - def _reduce_64(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 297) + def _reduce_77(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[2].s_value, [], val[4], is_inline: true) @grammar.add_parameterized_rule(rule) @@ -1742,8 +1901,8 @@ def _reduce_64(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 230) - def _reduce_65(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 302) + def _reduce_78(val, _values, result) rule = Grammar::Parameterized::Rule.new(val[2].s_value, val[4], val[7], is_inline: true) @grammar.add_parameterized_rule(rule) @@ -1751,22 +1910,22 @@ def _reduce_65(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 235) - def _reduce_66(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 307) + def _reduce_79(val, _values, result) result = [val[0]] result end .,., -module_eval(<<'.,.,', 'parser.y', 236) - def _reduce_67(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 308) + def _reduce_80(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 241) - def _reduce_68(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 313) + def _reduce_81(val, _values, result) builder = val[0] result = [builder] @@ -1774,8 +1933,8 @@ def _reduce_68(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 246) - def _reduce_69(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 318) + def _reduce_82(val, _values, result) builder = val[2] result = val[0].append(builder) @@ -1783,16 +1942,16 @@ def _reduce_69(val, _values, result) end .,., -# reduce 70 omitted +# reduce 83 omitted -# reduce 71 omitted +# reduce 84 omitted -# reduce 72 omitted +# reduce 85 omitted -# reduce 73 omitted +# reduce 86 omitted -module_eval(<<'.,.,', 'parser.y', 253) - def _reduce_74(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 325) + def _reduce_87(val, _values, result) reset_precs result = Grammar::Parameterized::Rhs.new @@ -1800,8 +1959,8 @@ def _reduce_74(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 258) - def _reduce_75(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 330) + def _reduce_88(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen token = val[1] token.alias_name = val[2] @@ -1813,8 +1972,8 @@ def _reduce_75(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 267) - def _reduce_76(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 339) + def _reduce_89(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen builder = val[0] builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]]) @@ -1824,8 +1983,8 @@ def _reduce_76(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 274) - def _reduce_77(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 346) + def _reduce_90(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen builder = val[0] builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5]) @@ -1835,8 +1994,8 @@ def _reduce_77(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 281) - def _reduce_78(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 353) + def _reduce_91(val, _values, result) user_code = val[1] user_code.alias_name = val[2] builder = val[0] @@ -1847,8 +2006,8 @@ def _reduce_78(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 289) - def _reduce_79(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 361) + def _reduce_92(val, _values, result) on_action_error("multiple %prec in a rule", val[0]) if prec_seen? sym = @grammar.find_symbol_by_id!(val[2]) if val[0].rhs.empty? @@ -1864,33 +2023,33 @@ def _reduce_79(val, _values, result) end .,., -# reduce 80 omitted +# reduce 93 omitted -# reduce 81 omitted +# reduce 94 omitted -module_eval(<<'.,.,', 'parser.y', 301) - def _reduce_82(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 373) + def _reduce_95(val, _values, result) result = val[0].s_value if val[0] result end .,., -module_eval(<<'.,.,', 'parser.y', 315) - def _reduce_83(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 387) + def _reduce_96(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 315) - def _reduce_84(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 387) + def _reduce_97(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 306) - def _reduce_85(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 378) + def _reduce_98(val, _values, result) result = if val[0] [{tag: val[0], tokens: val[1]}] else @@ -1901,121 +2060,121 @@ def _reduce_85(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 312) - def _reduce_86(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 384) + def _reduce_99(val, _values, result) result = val[0].append({tag: val[1], tokens: val[2]}) result end .,., -# reduce 87 omitted +# reduce 100 omitted -# reduce 88 omitted +# reduce 101 omitted -module_eval(<<'.,.,', 'parser.y', 321) - def _reduce_89(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 393) + def _reduce_102(val, _values, result) begin_c_declaration("}") result end .,., -module_eval(<<'.,.,', 'parser.y', 325) - def _reduce_90(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 397) + def _reduce_103(val, _values, result) end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 329) - def _reduce_91(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 401) + def _reduce_104(val, _values, result) result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 338) - def _reduce_92(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 410) + def _reduce_105(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 338) - def _reduce_93(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 410) + def _reduce_106(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 338) - def _reduce_94(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 410) + def _reduce_107(val, _values, result) result = val result end .,., -module_eval(<<'.,.,', 'parser.y', 338) - def _reduce_95(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 410) + def _reduce_108(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 338) - def _reduce_96(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 410) + def _reduce_109(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 333) - def _reduce_97(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 405) + def _reduce_110(val, _values, result) result = [{tag: nil, tokens: val[0]}] result end .,., -module_eval(<<'.,.,', 'parser.y', 334) - def _reduce_98(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 406) + def _reduce_111(val, _values, result) result = val[0].map {|tag, ids| {tag: tag, tokens: ids} } result end .,., -module_eval(<<'.,.,', 'parser.y', 335) - def _reduce_99(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 407) + def _reduce_112(val, _values, result) result = [{tag: nil, tokens: val[0]}, {tag: val[1], tokens: val[2]}] result end .,., -# reduce 100 omitted +# reduce 113 omitted -# reduce 101 omitted +# reduce 114 omitted -module_eval(<<'.,.,', 'parser.y', 346) - def _reduce_102(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 418) + def _reduce_115(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -module_eval(<<'.,.,', 'parser.y', 346) - def _reduce_103(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 418) + def _reduce_116(val, _values, result) result = val[1] ? val[1].unshift(val[0]) : val result end .,., -# reduce 104 omitted +# reduce 117 omitted -# reduce 105 omitted +# reduce 118 omitted -module_eval(<<'.,.,', 'parser.y', 348) - def _reduce_106(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 420) + def _reduce_119(val, _values, result) lhs = val[0] lhs.alias_name = val[1] val[3].each do |builder| @@ -2028,8 +2187,8 @@ def _reduce_106(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 360) - def _reduce_107(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 432) + def _reduce_120(val, _values, result) if val[0].rhs.count > 1 empties = val[0].rhs.select { |sym| sym.is_a?(Lrama::Lexer::Token::Empty) } empties.each do |empty| @@ -2046,8 +2205,8 @@ def _reduce_107(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 374) - def _reduce_108(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 446) + def _reduce_121(val, _values, result) builder = val[2] if !builder.line builder.line = @lexer.line - 1 @@ -2058,8 +2217,8 @@ def _reduce_108(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 384) - def _reduce_109(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 456) + def _reduce_122(val, _values, result) reset_precs result = @grammar.create_rule_builder(@rule_counter, @midrule_action_counter) @@ -2067,8 +2226,8 @@ def _reduce_109(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 389) - def _reduce_110(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 461) + def _reduce_123(val, _values, result) builder = val[0] builder.add_rhs(Lrama::Lexer::Token::Empty.new(location: @lexer.location)) result = builder @@ -2077,8 +2236,8 @@ def _reduce_110(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 395) - def _reduce_111(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 467) + def _reduce_124(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen token = val[1] token.alias_name = val[2] @@ -2090,8 +2249,8 @@ def _reduce_111(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 404) - def _reduce_112(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 476) + def _reduce_125(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], alias_name: val[3], location: @lexer.location, args: [val[1]], lhs_tag: val[4]) builder = val[0] @@ -2103,8 +2262,8 @@ def _reduce_112(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 413) - def _reduce_113(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 485) + def _reduce_126(val, _values, result) on_action_error("intermediate %prec in a rule", val[1]) if @trailing_prec_seen token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, alias_name: val[5], location: @lexer.location, args: val[3], lhs_tag: val[6]) builder = val[0] @@ -2116,8 +2275,8 @@ def _reduce_113(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 422) - def _reduce_114(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 494) + def _reduce_127(val, _values, result) user_code = val[1] user_code.alias_name = val[2] user_code.tag = val[3] @@ -2129,8 +2288,8 @@ def _reduce_114(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 431) - def _reduce_115(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 503) + def _reduce_128(val, _values, result) on_action_error("multiple %prec in a rule", val[0]) if prec_seen? sym = @grammar.find_symbol_by_id!(val[2]) if val[0].rhs.empty? @@ -2146,33 +2305,33 @@ def _reduce_115(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 444) - def _reduce_116(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 516) + def _reduce_129(val, _values, result) result = "option" result end .,., -module_eval(<<'.,.,', 'parser.y', 445) - def _reduce_117(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 517) + def _reduce_130(val, _values, result) result = "nonempty_list" result end .,., -module_eval(<<'.,.,', 'parser.y', 446) - def _reduce_118(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 518) + def _reduce_131(val, _values, result) result = "list" result end .,., -# reduce 119 omitted +# reduce 132 omitted -# reduce 120 omitted +# reduce 133 omitted -module_eval(<<'.,.,', 'parser.y', 451) - def _reduce_121(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 523) + def _reduce_134(val, _values, result) result = if val[1] [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] else @@ -2183,22 +2342,22 @@ def _reduce_121(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 457) - def _reduce_122(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 529) + def _reduce_135(val, _values, result) result = val[0].append(val[2]) result end .,., -module_eval(<<'.,.,', 'parser.y', 458) - def _reduce_123(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 530) + def _reduce_136(val, _values, result) result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] result end .,., -module_eval(<<'.,.,', 'parser.y', 463) - def _reduce_124(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 535) + def _reduce_137(val, _values, result) if prec_seen? on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec @code_after_prec = true @@ -2209,39 +2368,39 @@ def _reduce_124(val, _values, result) end .,., -module_eval(<<'.,.,', 'parser.y', 471) - def _reduce_125(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 543) + def _reduce_138(val, _values, result) end_c_declaration result end .,., -module_eval(<<'.,.,', 'parser.y', 475) - def _reduce_126(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 547) + def _reduce_139(val, _values, result) result = val[2] result end .,., -module_eval(<<'.,.,', 'parser.y', 478) - def _reduce_127(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 550) + def _reduce_140(val, _values, result) result = val[1].s_value result end .,., -module_eval(<<'.,.,', 'parser.y', 483) - def _reduce_128(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 555) + def _reduce_141(val, _values, result) begin_c_declaration('\Z') result end .,., -module_eval(<<'.,.,', 'parser.y', 487) - def _reduce_129(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 559) + def _reduce_142(val, _values, result) end_c_declaration @grammar.epilogue_first_lineno = val[0].first_line + 1 @grammar.epilogue = val[2].s_value @@ -2250,18 +2409,18 @@ def _reduce_129(val, _values, result) end .,., -# reduce 130 omitted +# reduce 143 omitted -# reduce 131 omitted +# reduce 144 omitted -# reduce 132 omitted +# reduce 145 omitted -# reduce 133 omitted +# reduce 146 omitted -# reduce 134 omitted +# reduce 147 omitted -module_eval(<<'.,.,', 'parser.y', 499) - def _reduce_135(val, _values, result) +module_eval(<<'.,.,', 'parser.y', 571) + def _reduce_148(val, _values, result) result = Lrama::Lexer::Token::Ident.new(s_value: val[0].s_value) result end diff --git a/lib/lrama/scanner_fsa.rb b/lib/lrama/scanner_fsa.rb new file mode 100644 index 00000000..8ecc8186 --- /dev/null +++ b/lib/lrama/scanner_fsa.rb @@ -0,0 +1,506 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + # Scanner Finite State Automaton for PSLR(1) + # Built from token patterns defined by %token-pattern directives + # Based on Definitions 3.2.12, 3.2.13 from the PSLR dissertation + class ScannerFSA + # Represents a state in the scanner FSA + class State + attr_reader :id #: Integer + attr_reader :transitions #: Hash[String, Integer] + attr_reader :accepting_tokens #: Array[Grammar::TokenPattern] + + # @rbs (Integer id) -> void + def initialize(id) + @id = id + @transitions = {} + @accepting_tokens = [] + end + + # @rbs () -> bool + def accepting? + !@accepting_tokens.empty? + end + + # @rbs (String char, Integer target_state_id) -> void + def add_transition(char, target_state_id) + @transitions[char] = target_state_id + end + + # @rbs (Grammar::TokenPattern token_pattern) -> void + def add_accepting_token(token_pattern) + @accepting_tokens << token_pattern + end + end + + attr_reader :states #: Array[State] + attr_reader :initial_state #: State + attr_reader :token_patterns #: Array[Grammar::TokenPattern] + + # @rbs (Array[Grammar::TokenPattern] token_patterns) -> void + def initialize(token_patterns) + @token_patterns = token_patterns + @states = [] + @state_counter = 0 + build_fsa + end + + # Returns the accepting state for a given FSA state + # Definition 3.2.13 (state_to_accepting_state) + # @rbs (Integer state_id) -> State? + def state_to_accepting_state(state_id) + state = @states[state_id] + return nil unless state&.accepting? + state + end + + # Returns the set of tokens accepted at FSA state ss + # Definition 3.2.12 acc(ss) + # @rbs (Integer state_id) -> Array[Grammar::TokenPattern] + def acc_ss(state_id) + state = @states[state_id] + return [] unless state + state.accepting_tokens + end + + # Simulate the FSA on input string starting from initial state + # Returns all accepting states reached during the scan + # @rbs (String input) -> Array[{state: State, position: Integer, token: Grammar::TokenPattern}] + def scan(input) + results = [] + current_state_id = 0 + + input.each_char.with_index do |char, index| + current_state = @states[current_state_id] + break unless current_state + + next_state_id = current_state.transitions[char] + break unless next_state_id + + current_state_id = next_state_id + next_state = @states[next_state_id] + + if next_state.accepting? + next_state.accepting_tokens.each do |token_pattern| + results << { state: next_state, position: index + 1, token: token_pattern } + end + end + end + + results + end + + private + + # Build the FSA from token patterns + # Uses Thompson's construction for NFAs followed by subset construction for DFA + # @rbs () -> void + def build_fsa + return if @token_patterns.empty? + + # Create initial state + @initial_state = create_state + + # Build NFA for each token pattern and convert to DFA + nfa_states = build_nfa + convert_nfa_to_dfa(nfa_states) + end + + # @rbs () -> State + def create_state + state = State.new(@state_counter) + @state_counter += 1 + @states << state + state + end + + # Simple NFA state for regex compilation + class NFAState + attr_reader :id #: Integer + attr_accessor :transitions #: Hash[String?, Array[NFAState]] + attr_accessor :accepting_token #: Grammar::TokenPattern? + + # @rbs (Integer id) -> void + def initialize(id) + @id = id + @transitions = Hash.new { |h, k| h[k] = [] } + @accepting_token = nil + end + + # @rbs (String? char, NFAState target) -> void + def add_transition(char, target) + @transitions[char] << target + end + + # @rbs () -> bool + def accepting? + !@accepting_token.nil? + end + end + + # Build NFA from all token patterns + # @rbs () -> Array[NFAState] + def build_nfa + nfa_states = [] + nfa_counter = [0] + + # Create NFA start state + nfa_start = create_nfa_state(nfa_counter, nfa_states) + + @token_patterns.each do |token_pattern| + # Build NFA fragment for this pattern + start_state, end_state = compile_regex(token_pattern.regex_pattern, nfa_counter, nfa_states) + + # Connect NFA start to this pattern's start with epsilon + nfa_start.add_transition(nil, start_state) + + # Mark end state as accepting + end_state.accepting_token = token_pattern + end + + nfa_states + end + + # @rbs (Array[Integer] counter, Array[NFAState] states) -> NFAState + def create_nfa_state(counter, states) + state = NFAState.new(counter[0]) + counter[0] += 1 + states << state + state + end + + # Compile a regex pattern to NFA fragment + # Returns [start_state, end_state] + # @rbs (String pattern, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_regex(pattern, counter, states) + # Simple regex compiler supporting: + # - Literal characters + # - Character classes [...] + # - Quantifiers *, +, ? + # - Alternation | + # - Grouping () + + compile_sequence(pattern, 0, counter, states) + end + + # Compile a sequence of regex elements + # @rbs (String pattern, Integer pos, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_sequence(pattern, pos, counter, states) + fragments = [] + i = pos + + while i < pattern.length + char = pattern[i] + + case char + when '\\' + # Escape sequence + if i + 1 < pattern.length + i += 1 + next_char = pattern[i] + case next_char + when 'd' + # \d matches digit + frag = compile_char_class('0-9', counter, states) + when 'w' + # \w matches word character + frag = compile_char_class('a-zA-Z0-9_', counter, states) + when 's' + # \s matches whitespace + frag = compile_char_class(' \t\n\r\f\v', counter, states) + else + # Literal escaped character + frag = compile_literal(next_char, counter, states) + end + fragments << frag + end + when '[' + # Character class + class_end = pattern.index(']', i) + raise "Unclosed character class in pattern: #{pattern}" unless class_end + + char_class = pattern[i + 1...class_end] + frag = compile_char_class(char_class, counter, states) + fragments << frag + i = class_end + when '*', '+', '?' + # Quantifier - modify the last fragment + if fragments.empty? + raise "Quantifier #{char} without preceding element in pattern: #{pattern}" + end + last_frag = fragments.pop + quantified = apply_quantifier(last_frag, char, counter, states) + fragments << quantified + when '|' + # Alternation - compile remaining and merge + left_start, left_end = concatenate_fragments(fragments, counter, states) + right_start, right_end = compile_sequence(pattern, i + 1, counter, states) + + # Create alternation + alt_start = create_nfa_state(counter, states) + alt_end = create_nfa_state(counter, states) + + alt_start.add_transition(nil, left_start) + alt_start.add_transition(nil, right_start) + left_end.add_transition(nil, alt_end) + right_end.add_transition(nil, alt_end) + + return [alt_start, alt_end] + when '(' + # Find matching closing paren + depth = 1 + j = i + 1 + while j < pattern.length && depth > 0 + if pattern[j] == '(' + depth += 1 + elsif pattern[j] == ')' + depth -= 1 + end + j += 1 + end + raise "Unclosed group in pattern: #{pattern}" if depth > 0 + + group_content = pattern[i + 1...j - 1] + frag = compile_sequence(group_content, 0, counter, states) + fragments << frag + i = j - 1 + when ')' + # End of group - return + break + when '.' + # Match any character (simplified: printable ASCII) + frag = compile_any_char(counter, states) + fragments << frag + else + # Literal character + frag = compile_literal(char, counter, states) + fragments << frag + end + + i += 1 + end + + if fragments.empty? + # Empty pattern + state = create_nfa_state(counter, states) + return [state, state] + end + + concatenate_fragments(fragments, counter, states) + end + + # Compile a single literal character + # @rbs (String char, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_literal(char, counter, states) + start_state = create_nfa_state(counter, states) + end_state = create_nfa_state(counter, states) + start_state.add_transition(char, end_state) + [start_state, end_state] + end + + # Compile a character class [...] + # @rbs (String char_class, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_char_class(char_class, counter, states) + start_state = create_nfa_state(counter, states) + end_state = create_nfa_state(counter, states) + + chars = expand_char_class(char_class) + chars.each do |c| + start_state.add_transition(c, end_state) + end + + [start_state, end_state] + end + + # Expand character class string to array of characters + # @rbs (String char_class) -> Array[String] + def expand_char_class(char_class) + chars = [] + i = 0 + negated = false + + if char_class[0] == '^' + negated = true + i = 1 + end + + while i < char_class.length + if i + 2 < char_class.length && char_class[i + 1] == '-' + # Range + start_char = char_class[i] + end_char = char_class[i + 2] + (start_char..end_char).each { |c| chars << c } + i += 3 + else + chars << char_class[i] + i += 1 + end + end + + if negated + all_printable = (32..126).map(&:chr) + chars = all_printable - chars + end + + chars + end + + # Compile . (any character) + # @rbs (Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_any_char(counter, states) + start_state = create_nfa_state(counter, states) + end_state = create_nfa_state(counter, states) + + # Match printable ASCII + (32..126).each do |code| + start_state.add_transition(code.chr, end_state) + end + + [start_state, end_state] + end + + # Apply a quantifier to a fragment + # @rbs ([NFAState, NFAState] fragment, String quantifier, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def apply_quantifier(fragment, quantifier, counter, states) + frag_start, frag_end = fragment + + case quantifier + when '*' + # Zero or more + new_start = create_nfa_state(counter, states) + new_end = create_nfa_state(counter, states) + + new_start.add_transition(nil, frag_start) + new_start.add_transition(nil, new_end) + frag_end.add_transition(nil, frag_start) + frag_end.add_transition(nil, new_end) + + [new_start, new_end] + when '+' + # One or more + new_end = create_nfa_state(counter, states) + + frag_end.add_transition(nil, frag_start) + frag_end.add_transition(nil, new_end) + + [frag_start, new_end] + when '?' + # Zero or one + new_start = create_nfa_state(counter, states) + new_end = create_nfa_state(counter, states) + + new_start.add_transition(nil, frag_start) + new_start.add_transition(nil, new_end) + frag_end.add_transition(nil, new_end) + + [new_start, new_end] + else + fragment + end + end + + # Concatenate multiple NFA fragments into one + # @rbs (Array[[NFAState, NFAState]] fragments, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def concatenate_fragments(fragments, counter, states) + return create_nfa_state(counter, states).then { |s| [s, s] } if fragments.empty? + return fragments[0] if fragments.size == 1 + + result_start = fragments[0][0] + current_end = fragments[0][1] + + fragments[1..-1].each do |frag_start, frag_end| + current_end.add_transition(nil, frag_start) + current_end = frag_end + end + + [result_start, current_end] + end + + # Convert NFA to DFA using subset construction + # @rbs (Array[NFAState] nfa_states) -> void + def convert_nfa_to_dfa(nfa_states) + return if nfa_states.empty? + + # Clear existing DFA states + @states = [] + @state_counter = 0 + + # Compute epsilon closure of start state + nfa_start = nfa_states[0] + start_closure = epsilon_closure([nfa_start]) + + # Map NFA state sets to DFA states + dfa_states = {} + work_list = [start_closure] + dfa_states[start_closure.map(&:id).sort] = create_state + + @initial_state = @states[0] + + # Mark accepting tokens for initial state + start_closure.each do |nfa_state| + if nfa_state.accepting? + @initial_state.add_accepting_token(nfa_state.accepting_token) + end + end + + while !work_list.empty? + current_nfa_set = work_list.shift + current_dfa = dfa_states[current_nfa_set.map(&:id).sort] + + # Find all possible transitions + transitions = {} + current_nfa_set.each do |nfa_state| + nfa_state.transitions.each do |char, targets| + next if char.nil? # Skip epsilon transitions + transitions[char] ||= [] + transitions[char].concat(targets) + end + end + + transitions.each do |char, targets| + target_closure = epsilon_closure(targets.uniq) + target_key = target_closure.map(&:id).sort + + unless dfa_states.key?(target_key) + new_dfa_state = create_state + dfa_states[target_key] = new_dfa_state + + # Mark accepting tokens + target_closure.each do |nfa_state| + if nfa_state.accepting? + new_dfa_state.add_accepting_token(nfa_state.accepting_token) + end + end + + work_list << target_closure + end + + current_dfa.add_transition(char, dfa_states[target_key].id) + end + end + end + + # Compute epsilon closure of a set of NFA states + # @rbs (Array[NFAState] nfa_states) -> Array[NFAState] + def epsilon_closure(nfa_states) + closure = nfa_states.dup + work_list = nfa_states.dup + + while !work_list.empty? + state = work_list.shift + epsilon_targets = state.transitions[nil] || [] + + epsilon_targets.each do |target| + unless closure.include?(target) + closure << target + work_list << target + end + end + end + + closure + end + end +end diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index 50912e09..9f116346 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -4,8 +4,10 @@ require_relative "state/action" require_relative "state/inadequacy_annotation" require_relative "state/item" +require_relative "state/pslr_inadequacy" require_relative "state/reduce_reduce_conflict" require_relative "state/resolved_conflict" +require_relative "state/scanner_accepts" require_relative "state/shift_reduce_conflict" module Lrama diff --git a/lib/lrama/state/pslr_inadequacy.rb b/lib/lrama/state/pslr_inadequacy.rb new file mode 100644 index 00000000..58fa5236 --- /dev/null +++ b/lib/lrama/state/pslr_inadequacy.rb @@ -0,0 +1,67 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + # PSLR Inadequacy detection + # Based on Section 3.4.3 from the PSLR dissertation + # + # PSLR inadequacy occurs when state merging causes different + # pseudo-scanner behavior + class PslrInadequacy + # Inadequacy types + LR_RELATIVE = :lr_relative #: Symbol + PSLR_RELATIVE = :pslr_relative #: Symbol + + attr_reader :type #: Symbol + attr_reader :state #: State + attr_reader :conflicting_states #: Array[State] + attr_reader :details #: Hash[Symbol, Object] + + # @rbs (type: Symbol, state: State, conflicting_states: Array[State], details: Hash[Symbol, untyped]) -> void + def initialize(type:, state:, conflicting_states:, details:) + @type = type + @state = state + @conflicting_states = conflicting_states + @details = details + end + + # @rbs () -> String + def to_s + "PSLR Inadequacy (#{type}): state #{state.id} conflicts with states #{conflicting_states.map(&:id).join(', ')}" + end + end + + # PSLR Compatibility checker + # Based on Definition 3.4.1 from the dissertation + class PslrCompatibilityChecker + # @rbs (ScannerAccepts scanner_accepts, LengthPrecedences length_prec) -> void + def initialize(scanner_accepts, length_prec) + @scanner_accepts = scanner_accepts + @length_prec = length_prec + end + + # Check if two states are PSLR-compatible + # Definition 3.4.1: States are compatible if for any input, + # the pseudo-scanner selects the same token + # @rbs (State s1, State s2, ScannerFSA scanner_fsa) -> bool + def compatible?(s1, s2, scanner_fsa) + # For all accepting states in the FSA, check if the selected tokens match + scanner_fsa.states.each do |fsa_state| + next unless fsa_state.accepting? + + token1 = @scanner_accepts[s1.id, fsa_state.id] + token2 = @scanner_accepts[s2.id, fsa_state.id] + + # Both undefined is compatible + next if token1.nil? && token2.nil? + + # Different tokens are incompatible + return false if token1 != token2 + end + + true + end + end + end +end diff --git a/lib/lrama/state/scanner_accepts.rb b/lib/lrama/state/scanner_accepts.rb new file mode 100644 index 00000000..77f0a393 --- /dev/null +++ b/lib/lrama/state/scanner_accepts.rb @@ -0,0 +1,141 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class State + # Scanner accepts table for PSLR(1) + # Based on Definition 3.2.14 from the PSLR dissertation + # + # scanner_accepts[sp, sa]: For parser state sp and accepting state sa, + # returns the token that should be selected + class ScannerAccepts + attr_reader :table #: Hash[[Integer, Integer], Grammar::TokenPattern?] + + # @rbs (Array[State] parser_states, ScannerFSA scanner_fsa, Grammar::LexPrec lex_prec, LengthPrecedences length_prec) -> void + def initialize(parser_states, scanner_fsa, lex_prec, length_prec) + @parser_states = parser_states + @scanner_fsa = scanner_fsa + @lex_prec = lex_prec + @length_prec = length_prec + @table = {} + @profile_map = {} #: Hash[untyped, untyped] # Cache for conflict profile resolution + end + + # Build the scanner_accepts table + # Based on Definition 3.2.20 (compute_scanner_accepts) + # @rbs () -> void + def build + @parser_states.each do |parser_state| + compute_for_parser_state(parser_state) + end + end + + # Get the accepted token for a parser state and accepting state + # @rbs (Integer parser_state_id, Integer accepting_state_id) -> Grammar::TokenPattern? + def [](parser_state_id, accepting_state_id) + @table[[parser_state_id, accepting_state_id]] + end + + private + + # Compute scanner_accepts for a single parser state + # Uses DFS to explore the FSA state space + # @rbs (State parser_state) -> void + def compute_for_parser_state(parser_state) + visited = Set.new + dfs(parser_state, 0, visited) # Start from FSA initial state (id 0) + end + + # DFS exploration of FSA states + # @rbs (State parser_state, Integer fsa_state_id, Set[Integer] visited) -> void + def dfs(parser_state, fsa_state_id, visited) + return if visited.include?(fsa_state_id) + visited << fsa_state_id + + fsa_state = @scanner_fsa.states[fsa_state_id] + return unless fsa_state + + # If this is an accepting state, compute the accepted token + if fsa_state.accepting? + token = resolve(parser_state, fsa_state) + @table[[parser_state.id, fsa_state_id]] = token if token + end + + # Explore transitions + fsa_state.transitions.each_value do |next_state_id| + dfs(parser_state, next_state_id, visited) + end + end + + # Resolve which token should be accepted + # Based on Definition 3.2.19 (resolve) + # @rbs (State parser_state, ScannerFSA::State fsa_state) -> Grammar::TokenPattern? + def resolve(parser_state, fsa_state) + # Get tokens that are both: + # 1. Accepted by the FSA at this state (acc(ss)) + # 2. Accepted by the parser at this state (acc(sp)) + acc_ss = fsa_state.accepting_tokens + acc_sp = compute_acc_sp(parser_state) + + # Intersection: tokens that can be both scanned and parsed + acc_sp_ss = acc_ss.select do |token_pattern| + acc_sp.include?(token_pattern.name) + end + + return nil if acc_sp_ss.empty? + + # Select the highest priority token + select_best_token(acc_sp_ss) + end + + # Compute acc(sp): set of terminal symbols acceptable at parser state sp + # @rbs (State parser_state) -> Set[String] + def compute_acc_sp(parser_state) + tokens = Set.new + + # Add tokens from shift actions (term_transitions) + parser_state.term_transitions.each do |shift| + next_sym = shift.next_sym + tokens << next_sym.id.s_value if next_sym.term? + end + + # Add tokens from reduce actions (lookahead) + parser_state.reduces.each do |reduce| + reduce.look_ahead&.each do |la| + tokens << la.id.s_value + end + end + + tokens + end + + # Select the best token from candidates based on precedence rules + # @rbs (Array[Grammar::TokenPattern] candidates) -> Grammar::TokenPattern? + def select_best_token(candidates) + return candidates.first if candidates.size <= 1 + + # Sort by: + # 1. Explicit precedence (from %lex-prec - rules) + # 2. Definition order (first defined wins) + candidates.min_by do |token| + priority_rank(token, candidates) + end + end + + # Compute priority rank for a token among candidates + # Lower rank = higher priority + # @rbs (Grammar::TokenPattern token, Array[Grammar::TokenPattern] candidates) -> [Integer, Integer] + def priority_rank(token, candidates) + # Check if this token has explicit higher priority over others + higher_count = candidates.count do |other| + next false if other == token + @lex_prec.higher_priority?(token.name, other.name) + end + + # Tokens with more "higher than" relationships get lower rank + # Fallback to definition order + [-higher_count, token.definition_order] + end + end + end +end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index ddce627d..60071053 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -36,12 +36,16 @@ class States include Lrama::Tracer::Duration def_delegators "@grammar", :symbols, :terms, :nterms, :rules, :precedences, - :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!, :ielr_defined? + :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!, :ielr_defined?, :pslr_defined?, + :token_patterns, :lex_prec attr_reader :states #: Array[State] attr_reader :reads_relation #: Hash[State::Action::Goto, Array[State::Action::Goto]] attr_reader :includes_relation #: Hash[State::Action::Goto, Array[State::Action::Goto]] attr_reader :lookback_relation #: Hash[state_id, Hash[rule_id, Array[State::Action::Goto]]] + attr_reader :scanner_fsa #: ScannerFSA? + attr_reader :length_precedences #: LengthPrecedences? + attr_reader :scanner_accepts_table #: State::ScannerAccepts? # @rbs (Grammar grammar, Tracer tracer) -> void def initialize(grammar, tracer) @@ -141,6 +145,26 @@ def compute_ielr report_duration(:compute_default_reduction) { compute_default_reduction } end + # Compute PSLR(1) states + # Based on Section 3.4 of the PSLR dissertation + # @rbs () -> void + def compute_pslr + # Phase 1: Run IELR(1) as the base + compute_ielr + + # Phase 2: Build Scanner FSA from token patterns + report_duration(:build_scanner_fsa) { build_scanner_fsa } + + # Phase 3: Build lexical precedence tables + report_duration(:build_length_precedences) { build_length_precedences } + + # Phase 4: Build scanner_accepts table + report_duration(:build_scanner_accepts) { build_scanner_accepts } + + # Phase 5: Detect and handle PSLR inadequacies + report_duration(:handle_pslr_inadequacies) { handle_pslr_inadequacies } + end + # @rbs () -> Integer def states_count @states.count @@ -863,5 +887,75 @@ def clear_look_ahead_sets @_follow_sets = nil @_la = nil end + + # Build Scanner FSA from token patterns + # @rbs () -> void + def build_scanner_fsa + return if token_patterns.empty? + + @scanner_fsa = ScannerFSA.new(token_patterns) + end + + # Build length precedences table + # @rbs () -> void + def build_length_precedences + @length_precedences = LengthPrecedences.new(lex_prec) + end + + # Build scanner_accepts table + # @rbs () -> void + def build_scanner_accepts + return unless @scanner_fsa + + @scanner_accepts_table = State::ScannerAccepts.new( + @states, + @scanner_fsa, + lex_prec, + @length_precedences + ) + @scanner_accepts_table.build + end + + # Handle PSLR inadequacies + # Detects and splits states where pseudo-scanner behavior differs + # @rbs () -> void + def handle_pslr_inadequacies + return unless @scanner_fsa && @scanner_accepts_table + + inadequacies = detect_pslr_inadequacies + return if inadequacies.empty? + + # For now, just report inadequacies + # Full state splitting would require additional implementation + @tracer.warn("Detected #{inadequacies.size} PSLR inadequacies") if @tracer.respond_to?(:warn) + end + + # Detect PSLR inadequacies in isocore groups + # @rbs () -> Array[State::PslrInadequacy] + def detect_pslr_inadequacies + inadequacies = [] + checker = State::PslrCompatibilityChecker.new(@scanner_accepts_table, @length_precedences) + + # Group states by their kernel items (isocore groups) + isocore_groups = @states.group_by { |s| s.kernels.map { |k| [k.rule.id, k.position] }.sort } + + isocore_groups.each_value do |group_states| + next if group_states.size <= 1 + + # Check pairwise compatibility + group_states.combination(2).each do |s1, s2| + unless checker.compatible?(s1, s2, @scanner_fsa) + inadequacies << State::PslrInadequacy.new( + type: State::PslrInadequacy::PSLR_RELATIVE, + state: s1, + conflicting_states: [s1, s2], + details: { reason: "Scanner behavior differs between isocore states" } + ) + end + end + end + + inadequacies + end end end diff --git a/parser.y b/parser.y index 312b5e9d..8da81b4f 100644 --- a/parser.y +++ b/parser.y @@ -2,7 +2,7 @@ class Lrama::Parser expect 0 error_on_expect_mismatch - token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG + token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG REGEX rule @@ -132,6 +132,8 @@ rule symbol_declaration: "%token" token_declarations + | "%token-pattern" token_pattern_declarations + | "%lex-prec" lex_prec_declarations | "%type" symbol_declarations { val[1].each {|hash| @@ -213,6 +215,76 @@ rule token_declaration: id INTEGER? alias { result = val } + token_pattern_declarations: + TAG? token_pattern_declaration+ + { + val[1].each {|decl| + @grammar.add_token_pattern( + id: decl[:id], + pattern: decl[:pattern], + alias_name: decl[:alias], + tag: val[0], + lineno: decl[:id].first_line + ) + } + } + | token_pattern_declarations TAG token_pattern_declaration+ + { + val[2].each {|decl| + @grammar.add_token_pattern( + id: decl[:id], + pattern: decl[:pattern], + alias_name: decl[:alias], + tag: val[1], + lineno: decl[:id].first_line + ) + } + } + + token_pattern_declaration: + IDENTIFIER REGEX alias + { + result = { id: val[0], pattern: val[1], alias: val[2] } + } + + lex_prec_declarations: + lex_prec_chain + { + val[0].each {|rule| + @grammar.add_lex_prec_rule( + left_token: rule[:left], + operator: rule[:op], + right_token: rule[:right], + lineno: rule[:left].first_line + ) + } + } + + lex_prec_chain: + IDENTIFIER lex_prec_op IDENTIFIER + { + result = [{ left: val[0], op: val[1], right: val[2] }] + } + | lex_prec_chain lex_prec_op IDENTIFIER + { + last_right = val[0].last[:right] + result = val[0] + [{ left: last_right, op: val[1], right: val[2] }] + } + + lex_prec_op: + "," + { + result = Lrama::Grammar::LexPrec::SAME_PRIORITY + } + | "-" + { + result = Lrama::Grammar::LexPrec::HIGHER + } + | "-s" + { + result = Lrama::Grammar::LexPrec::SHORTER + } + rule_declaration: "%rule" IDENTIFIER "(" rule_args ")" TAG? ":" rule_rhs_list { diff --git a/sig/generated/lrama/grammar.rbs b/sig/generated/lrama/grammar.rbs index faab4f04..7cf8cc6b 100644 --- a/sig/generated/lrama/grammar.rbs +++ b/sig/generated/lrama/grammar.rbs @@ -76,6 +76,10 @@ module Lrama @start_nterm: Lrama::Lexer::Token::Base? + @token_patterns: Array[Grammar::TokenPattern] + + @lex_prec: Grammar::LexPrec + extend Forwardable attr_reader percent_codes: Array[PercentCode] @@ -136,6 +140,10 @@ module Lrama attr_accessor required: bool + attr_reader token_patterns: Array[Grammar::TokenPattern] + + attr_reader lex_prec: Grammar::LexPrec + # @rbs (Counter rule_counter, bool locations, Hash[String, String] define) -> void def initialize: (Counter rule_counter, bool locations, Hash[String, String] define) -> void @@ -227,6 +235,21 @@ module Lrama # @rbs () -> bool def ielr_defined?: () -> bool + # @rbs () -> bool + def pslr_defined?: () -> bool + + # Add a token pattern from %token-pattern directive + # @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer) -> Grammar::TokenPattern + def add_token_pattern: (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, lineno: Integer, ?alias_name: String?, ?tag: Lexer::Token::Tag?) -> Grammar::TokenPattern + + # Add a lex-prec rule from %lex-prec directive + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Grammar::LexPrec::Rule + def add_lex_prec_rule: (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Grammar::LexPrec::Rule + + # Find a token pattern by its name + # @rbs (String name) -> Grammar::TokenPattern? + def find_token_pattern: (String name) -> Grammar::TokenPattern? + private # @rbs () -> void diff --git a/sig/generated/lrama/grammar/lex_prec.rbs b/sig/generated/lrama/grammar/lex_prec.rbs new file mode 100644 index 00000000..fcb13076 --- /dev/null +++ b/sig/generated/lrama/grammar/lex_prec.rbs @@ -0,0 +1,64 @@ +# Generated from lib/lrama/grammar/lex_prec.rb with RBS::Inline + +module Lrama + class Grammar + # Represents lexical precedence rules defined by %lex-prec directive + # Based on Definition 3.2.3, 3.2.4, 3.2.10 from the PSLR dissertation + # + # Example: %lex-prec RANGLE -s RSHIFT # RANGLE is shorter than RSHIFT + # %lex-prec IF - ID # IF has higher priority than ID (same length) + class LexPrec + # Precedence relation types + # "," : Same priority (lex-tie) + # "-" : Left has higher priority than right + # "-s" : Left is shorter match priority over right + SAME_PRIORITY: Symbol + + HIGHER: Symbol + + SHORTER: Symbol + + # Represents a single precedence rule + class Rule + attr_reader left_token: Lexer::Token::Ident + + attr_reader operator: Symbol + + attr_reader right_token: Lexer::Token::Ident + + attr_reader lineno: Integer + + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> void + def initialize: (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> void + + # @rbs () -> String + def left_name: () -> String + + # @rbs () -> String + def right_name: () -> String + end + + attr_reader rules: Array[Rule] + + # @rbs () -> void + def initialize: () -> void + + # @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Rule + def add_rule: (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Rule + + # Check if token t1 has higher priority than t2 + # Based on Definition 3.2.4 + # @rbs (String t1, String t2) -> bool + def higher_priority?: (String t1, String t2) -> bool + + # Check if token t1 has shorter-match priority over t2 + # Based on Definition 3.2.15 + # @rbs (String t1, String t2) -> bool + def shorter_priority?: (String t1, String t2) -> bool + + # Check if tokens t1 and t2 are in a lex-tie relationship + # @rbs (String t1, String t2) -> bool + def same_priority?: (String t1, String t2) -> bool + end + end +end diff --git a/sig/generated/lrama/grammar/token_pattern.rbs b/sig/generated/lrama/grammar/token_pattern.rbs new file mode 100644 index 00000000..2add03da --- /dev/null +++ b/sig/generated/lrama/grammar/token_pattern.rbs @@ -0,0 +1,31 @@ +# Generated from lib/lrama/grammar/token_pattern.rb with RBS::Inline + +module Lrama + class Grammar + # Represents a token pattern defined by %token-pattern directive + # Example: %token-pattern RSHIFT />>/ "right shift" + class TokenPattern + attr_reader id: Lexer::Token::Ident + + attr_reader pattern: Lexer::Token::Regex + + attr_reader alias_name: String? + + attr_reader tag: Lexer::Token::Tag? + + attr_reader lineno: Integer + + attr_reader definition_order: Integer + + # @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer, definition_order: Integer) -> void + def initialize: (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, lineno: Integer, definition_order: Integer, ?alias_name: String?, ?tag: Lexer::Token::Tag?) -> void + + # @rbs () -> String + def name: () -> String + + # Returns the regex pattern string (without slashes) + # @rbs () -> String + def regex_pattern: () -> String + end + end +end diff --git a/sig/generated/lrama/length_precedences.rbs b/sig/generated/lrama/length_precedences.rbs new file mode 100644 index 00000000..07e23d31 --- /dev/null +++ b/sig/generated/lrama/length_precedences.rbs @@ -0,0 +1,38 @@ +# Generated from lib/lrama/length_precedences.rb with RBS::Inline + +module Lrama + # Length precedences table for PSLR(1) + # Based on Definition 3.2.15 from the PSLR dissertation + # + # Determines which token should be preferred when there's a length conflict: + # - :left - the shorter token (t1) should be preferred + # - :right - the longer token (t2) should be preferred + # - :undefined - no preference defined, use default (longest match) + class LengthPrecedences + # Result of length precedence lookup + LEFT: Symbol + + RIGHT: Symbol + + UNDEFINED: Symbol + + attr_reader table: Hash[[ String, String ], Symbol] + + # @rbs (Grammar::LexPrec lex_prec) -> void + def initialize: (Grammar::LexPrec lex_prec) -> void + + # Get the length precedence between two tokens + # @rbs (String t1, String t2) -> Symbol + def precedence: (String t1, String t2) -> Symbol + + # Check if t1 (shorter) should be preferred over t2 (longer) + # @rbs (String t1, String t2) -> bool + def prefer_shorter?: (String t1, String t2) -> bool + + private + + # Build the length precedence table from lex-prec rules + # @rbs (Grammar::LexPrec lex_prec) -> Hash[[String, String], Symbol] + def build_table: (Grammar::LexPrec lex_prec) -> Hash[[ String, String ], Symbol] + end +end diff --git a/sig/generated/lrama/lexer.rbs b/sig/generated/lrama/lexer.rbs index 23202612..997ed3f2 100644 --- a/sig/generated/lrama/lexer.rbs +++ b/sig/generated/lrama/lexer.rbs @@ -4,7 +4,7 @@ module Lrama class Lexer type token = lexer_token | c_token - type lexer_token = [ String, Token::Token ] | [ ::Symbol, Token::Tag ] | [ ::Symbol, Token::Char ] | [ ::Symbol, Token::Str ] | [ ::Symbol, Token::Int ] | [ ::Symbol, Token::Ident ] + type lexer_token = [ String, Token::Token ] | [ ::Symbol, Token::Tag ] | [ ::Symbol, Token::Char ] | [ ::Symbol, Token::Str ] | [ ::Symbol, Token::Int ] | [ ::Symbol, Token::Ident ] | [ ::Symbol, Token::Regex ] type c_token = [ :C_DECLARATION, Token::UserCode ] diff --git a/sig/generated/lrama/lexer/token/regex.rbs b/sig/generated/lrama/lexer/token/regex.rbs new file mode 100644 index 00000000..b832c4be --- /dev/null +++ b/sig/generated/lrama/lexer/token/regex.rbs @@ -0,0 +1,15 @@ +# Generated from lib/lrama/lexer/token/regex.rb with RBS::Inline + +module Lrama + class Lexer + module Token + # Token class for regex patterns used in %token-pattern directive + # Example: /[a-zA-Z_][a-zA-Z0-9_]*/ + class Regex < Base + # Returns the regex pattern without the surrounding slashes + # @rbs () -> String + def pattern: () -> String + end + end + end +end diff --git a/sig/generated/lrama/scanner_fsa.rbs b/sig/generated/lrama/scanner_fsa.rbs new file mode 100644 index 00000000..490d7201 --- /dev/null +++ b/sig/generated/lrama/scanner_fsa.rbs @@ -0,0 +1,129 @@ +# Generated from lib/lrama/scanner_fsa.rb with RBS::Inline + +module Lrama + # Scanner Finite State Automaton for PSLR(1) + # Built from token patterns defined by %token-pattern directives + # Based on Definitions 3.2.12, 3.2.13 from the PSLR dissertation + class ScannerFSA + # Represents a state in the scanner FSA + class State + attr_reader id: Integer + + attr_reader transitions: Hash[String, Integer] + + attr_reader accepting_tokens: Array[Grammar::TokenPattern] + + # @rbs (Integer id) -> void + def initialize: (Integer id) -> void + + # @rbs () -> bool + def accepting?: () -> bool + + # @rbs (String char, Integer target_state_id) -> void + def add_transition: (String char, Integer target_state_id) -> void + + # @rbs (Grammar::TokenPattern token_pattern) -> void + def add_accepting_token: (Grammar::TokenPattern token_pattern) -> void + end + + attr_reader states: Array[State] + + attr_reader initial_state: State + + attr_reader token_patterns: Array[Grammar::TokenPattern] + + # @rbs (Array[Grammar::TokenPattern] token_patterns) -> void + def initialize: (Array[Grammar::TokenPattern] token_patterns) -> void + + # Returns the accepting state for a given FSA state + # Definition 3.2.13 (state_to_accepting_state) + # @rbs (Integer state_id) -> State? + def state_to_accepting_state: (Integer state_id) -> State? + + # Returns the set of tokens accepted at FSA state ss + # Definition 3.2.12 acc(ss) + # @rbs (Integer state_id) -> Array[Grammar::TokenPattern] + def acc_ss: (Integer state_id) -> Array[Grammar::TokenPattern] + + # Simulate the FSA on input string starting from initial state + # Returns all accepting states reached during the scan + # @rbs (String input) -> Array[{state: State, position: Integer, token: Grammar::TokenPattern}] + def scan: (String input) -> Array[{ state: State, position: Integer, token: Grammar::TokenPattern }] + + private + + # Build the FSA from token patterns + # Uses Thompson's construction for NFAs followed by subset construction for DFA + # @rbs () -> void + def build_fsa: () -> void + + # @rbs () -> State + def create_state: () -> State + + # Simple NFA state for regex compilation + class NFAState + attr_reader id: Integer + + attr_accessor transitions: Hash[String?, Array[NFAState]] + + attr_accessor accepting_token: Grammar::TokenPattern? + + # @rbs (Integer id) -> void + def initialize: (Integer id) -> void + + # @rbs (String? char, NFAState target) -> void + def add_transition: (String? char, NFAState target) -> void + + # @rbs () -> bool + def accepting?: () -> bool + end + + # Build NFA from all token patterns + # @rbs () -> Array[NFAState] + def build_nfa: () -> Array[NFAState] + + # @rbs (Array[Integer] counter, Array[NFAState] states) -> NFAState + def create_nfa_state: (Array[Integer] counter, Array[NFAState] states) -> NFAState + + # Compile a regex pattern to NFA fragment + # Returns [start_state, end_state] + # @rbs (String pattern, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_regex: (String pattern, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Compile a sequence of regex elements + # @rbs (String pattern, Integer pos, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_sequence: (String pattern, Integer pos, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Compile a single literal character + # @rbs (String char, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_literal: (String char, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Compile a character class [...] + # @rbs (String char_class, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_char_class: (String char_class, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Expand character class string to array of characters + # @rbs (String char_class) -> Array[String] + def expand_char_class: (String char_class) -> Array[String] + + # Compile . (any character) + # @rbs (Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def compile_any_char: (Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Apply a quantifier to a fragment + # @rbs ([NFAState, NFAState] fragment, String quantifier, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def apply_quantifier: ([ NFAState, NFAState ] fragment, String quantifier, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Concatenate multiple NFA fragments into one + # @rbs (Array[[NFAState, NFAState]] fragments, Array[Integer] counter, Array[NFAState] states) -> [NFAState, NFAState] + def concatenate_fragments: (Array[[ NFAState, NFAState ]] fragments, Array[Integer] counter, Array[NFAState] states) -> [ NFAState, NFAState ] + + # Convert NFA to DFA using subset construction + # @rbs (Array[NFAState] nfa_states) -> void + def convert_nfa_to_dfa: (Array[NFAState] nfa_states) -> void + + # Compute epsilon closure of a set of NFA states + # @rbs (Array[NFAState] nfa_states) -> Array[NFAState] + def epsilon_closure: (Array[NFAState] nfa_states) -> Array[NFAState] + end +end diff --git a/sig/generated/lrama/state/pslr_inadequacy.rbs b/sig/generated/lrama/state/pslr_inadequacy.rbs new file mode 100644 index 00000000..aa0cc976 --- /dev/null +++ b/sig/generated/lrama/state/pslr_inadequacy.rbs @@ -0,0 +1,44 @@ +# Generated from lib/lrama/state/pslr_inadequacy.rb with RBS::Inline + +module Lrama + class State + # PSLR Inadequacy detection + # Based on Section 3.4.3 from the PSLR dissertation + # + # PSLR inadequacy occurs when state merging causes different + # pseudo-scanner behavior + class PslrInadequacy + # Inadequacy types + LR_RELATIVE: Symbol + + PSLR_RELATIVE: Symbol + + attr_reader type: Symbol + + attr_reader state: State + + attr_reader conflicting_states: Array[State] + + attr_reader details: Hash[Symbol, Object] + + # @rbs (type: Symbol, state: State, conflicting_states: Array[State], details: Hash[Symbol, untyped]) -> void + def initialize: (type: Symbol, state: State, conflicting_states: Array[State], details: Hash[Symbol, untyped]) -> void + + # @rbs () -> String + def to_s: () -> String + end + + # PSLR Compatibility checker + # Based on Definition 3.4.1 from the dissertation + class PslrCompatibilityChecker + # @rbs (ScannerAccepts scanner_accepts, LengthPrecedences length_prec) -> void + def initialize: (ScannerAccepts scanner_accepts, LengthPrecedences length_prec) -> void + + # Check if two states are PSLR-compatible + # Definition 3.4.1: States are compatible if for any input, + # the pseudo-scanner selects the same token + # @rbs (State s1, State s2, ScannerFSA scanner_fsa) -> bool + def compatible?: (State s1, State s2, ScannerFSA scanner_fsa) -> bool + end + end +end diff --git a/sig/generated/lrama/state/scanner_accepts.rbs b/sig/generated/lrama/state/scanner_accepts.rbs new file mode 100644 index 00000000..0235f7f9 --- /dev/null +++ b/sig/generated/lrama/state/scanner_accepts.rbs @@ -0,0 +1,55 @@ +# Generated from lib/lrama/state/scanner_accepts.rb with RBS::Inline + +module Lrama + class State + # Scanner accepts table for PSLR(1) + # Based on Definition 3.2.14 from the PSLR dissertation + # + # scanner_accepts[sp, sa]: For parser state sp and accepting state sa, + # returns the token that should be selected + class ScannerAccepts + attr_reader table: Hash[[ Integer, Integer ], Grammar::TokenPattern?] + + # @rbs (Array[State] parser_states, ScannerFSA scanner_fsa, Grammar::LexPrec lex_prec, LengthPrecedences length_prec) -> void + def initialize: (Array[State] parser_states, ScannerFSA scanner_fsa, Grammar::LexPrec lex_prec, LengthPrecedences length_prec) -> void + + # Build the scanner_accepts table + # Based on Definition 3.2.20 (compute_scanner_accepts) + # @rbs () -> void + def build: () -> void + + # Get the accepted token for a parser state and accepting state + # @rbs (Integer parser_state_id, Integer accepting_state_id) -> Grammar::TokenPattern? + def []: (Integer parser_state_id, Integer accepting_state_id) -> Grammar::TokenPattern? + + private + + # Compute scanner_accepts for a single parser state + # Uses DFS to explore the FSA state space + # @rbs (State parser_state) -> void + def compute_for_parser_state: (State parser_state) -> void + + # DFS exploration of FSA states + # @rbs (State parser_state, Integer fsa_state_id, Set[Integer] visited) -> void + def dfs: (State parser_state, Integer fsa_state_id, Set[Integer] visited) -> void + + # Resolve which token should be accepted + # Based on Definition 3.2.19 (resolve) + # @rbs (State parser_state, ScannerFSA::State fsa_state) -> Grammar::TokenPattern? + def resolve: (State parser_state, ScannerFSA::State fsa_state) -> Grammar::TokenPattern? + + # Compute acc(sp): set of terminal symbols acceptable at parser state sp + # @rbs (State parser_state) -> Set[String] + def compute_acc_sp: (State parser_state) -> Set[String] + + # Select the best token from candidates based on precedence rules + # @rbs (Array[Grammar::TokenPattern] candidates) -> Grammar::TokenPattern? + def select_best_token: (Array[Grammar::TokenPattern] candidates) -> Grammar::TokenPattern? + + # Compute priority rank for a token among candidates + # Lower rank = higher priority + # @rbs (Grammar::TokenPattern token, Array[Grammar::TokenPattern] candidates) -> [Integer, Integer] + def priority_rank: (Grammar::TokenPattern token, Array[Grammar::TokenPattern] candidates) -> [ Integer, Integer ] + end + end +end diff --git a/sig/generated/lrama/states.rbs b/sig/generated/lrama/states.rbs index 8e4b296e..d8472ce0 100644 --- a/sig/generated/lrama/states.rbs +++ b/sig/generated/lrama/states.rbs @@ -44,6 +44,12 @@ module Lrama attr_reader lookback_relation: Hash[state_id, Hash[rule_id, Array[State::Action::Goto]]] + attr_reader scanner_fsa: ScannerFSA? + + attr_reader length_precedences: LengthPrecedences? + + attr_reader scanner_accepts_table: State::ScannerAccepts? + # @rbs (Grammar grammar, Tracer tracer) -> void def initialize: (Grammar grammar, Tracer tracer) -> void @@ -53,6 +59,11 @@ module Lrama # @rbs () -> void def compute_ielr: () -> void + # Compute PSLR(1) states + # Based on Section 3.4 of the PSLR dissertation + # @rbs () -> void + def compute_pslr: () -> void + # @rbs () -> Integer def states_count: () -> Integer @@ -211,5 +222,26 @@ module Lrama # @rbs () -> void def clear_look_ahead_sets: () -> void + + # Build Scanner FSA from token patterns + # @rbs () -> void + def build_scanner_fsa: () -> void + + # Build length precedences table + # @rbs () -> void + def build_length_precedences: () -> void + + # Build scanner_accepts table + # @rbs () -> void + def build_scanner_accepts: () -> void + + # Handle PSLR inadequacies + # Detects and splits states where pseudo-scanner behavior differs + # @rbs () -> void + def handle_pslr_inadequacies: () -> void + + # Detect PSLR inadequacies in isocore groups + # @rbs () -> Array[State::PslrInadequacy] + def detect_pslr_inadequacies: () -> Array[State::PslrInadequacy] end end diff --git a/spec/lrama/grammar/lex_prec_spec.rb b/spec/lrama/grammar/lex_prec_spec.rb new file mode 100644 index 00000000..59e1ea6b --- /dev/null +++ b/spec/lrama/grammar/lex_prec_spec.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::Grammar::LexPrec do + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + + it "stores lex-prec rules" do + left = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right, + lineno: 1 + ) + + expect(lex_prec.rules.size).to eq(1) + expect(lex_prec.shorter_priority?("RANGLE", "RSHIFT")).to be true + expect(lex_prec.shorter_priority?("RSHIFT", "RANGLE")).to be false + end + + it "handles higher priority rules" do + left = Lrama::Lexer::Token::Ident.new(s_value: "IF") + right = Lrama::Lexer::Token::Ident.new(s_value: "ID") + + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::HIGHER, + right_token: right, + lineno: 1 + ) + + expect(lex_prec.higher_priority?("IF", "ID")).to be true + expect(lex_prec.higher_priority?("ID", "IF")).to be false + end + + it "handles same priority (lex-tie) rules" do + left = Lrama::Lexer::Token::Ident.new(s_value: "TOKEN_A") + right = Lrama::Lexer::Token::Ident.new(s_value: "TOKEN_B") + + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::SAME_PRIORITY, + right_token: right, + lineno: 1 + ) + + expect(lex_prec.same_priority?("TOKEN_A", "TOKEN_B")).to be true + expect(lex_prec.same_priority?("TOKEN_B", "TOKEN_A")).to be true + end +end diff --git a/spec/lrama/grammar/token_pattern_spec.rb b/spec/lrama/grammar/token_pattern_spec.rb new file mode 100644 index 00000000..5d4c95da --- /dev/null +++ b/spec/lrama/grammar/token_pattern_spec.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::Grammar::TokenPattern do + it "stores token pattern information" do + id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + pattern = Lrama::Lexer::Token::Regex.new(s_value: "/>>>/") + + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: pattern, + alias_name: "right shift", + tag: nil, + lineno: 1, + definition_order: 0 + ) + + expect(token_pattern.name).to eq("RSHIFT") + expect(token_pattern.regex_pattern).to eq(">>>") + expect(token_pattern.alias_name).to eq("right shift") + expect(token_pattern.definition_order).to eq(0) + end +end diff --git a/spec/lrama/integration_spec.rb b/spec/lrama/integration_spec.rb index 52922a0f..1bda8e50 100644 --- a/spec/lrama/integration_spec.rb +++ b/spec/lrama/integration_spec.rb @@ -285,4 +285,231 @@ def generate_object(grammar_file_path, c_path, obj_path, command_args: []) end end end + + describe "PSLR context-dependent lexing" do + describe "Scanner FSA with overlapping patterns" do + let(:rangle) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:rshift) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 1 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([rangle, rshift]) } + + it "recognizes both RANGLE and RSHIFT as possible matches for '>>'" do + results = scanner_fsa.scan(">>") + + token_names = results.map { |r| r[:token].name } + expect(token_names).to include("RANGLE") + expect(token_names).to include("RSHIFT") + end + + it "RANGLE matches at position 1, RSHIFT matches at position 2" do + results = scanner_fsa.scan(">>") + + rangle_match = results.find { |r| r[:token].name == "RANGLE" } + rshift_match = results.find { |r| r[:token].name == "RSHIFT" } + + expect(rangle_match[:position]).to eq(1) + expect(rshift_match[:position]).to eq(2) + end + end + + describe "Length precedence resolution" do + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + + before do + left = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right, + lineno: 1 + ) + end + + let(:length_prec) { Lrama::LengthPrecedences.new(lex_prec) } + + it "indicates RANGLE (shorter) should be preferred over RSHIFT (longer)" do + expect(length_prec.prefer_shorter?("RANGLE", "RSHIFT")).to be true + end + + it "returns :left precedence for RANGLE vs RSHIFT" do + expect(length_prec.precedence("RANGLE", "RSHIFT")).to eq(:left) + end + end + + describe "Keyword vs identifier precedence" do + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + + before do + left = Lrama::Lexer::Token::Ident.new(s_value: "IF") + right = Lrama::Lexer::Token::Ident.new(s_value: "ID") + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::HIGHER, + right_token: right, + lineno: 1 + ) + end + + it "indicates IF has higher priority than ID" do + expect(lex_prec.higher_priority?("IF", "ID")).to be true + end + + it "indicates ID does not have higher priority than IF" do + expect(lex_prec.higher_priority?("ID", "IF")).to be false + end + end + + describe "Full PSLR grammar compilation" do + let(:grammar_text) do + <<~GRAMMAR + %token-pattern RSHIFT />>/ "right shift" + %token-pattern RANGLE />/ "right angle" + %token-pattern LANGLE />") + token_names = results.map { |r| r[:token].name } + + expect(token_names).to include("RANGLE") + expect(token_names).to include("RSHIFT") + end + + describe "context-dependent token selection" do + it "scanner_accepts table is built" do + expect(states.scanner_accepts_table).not_to be_nil + end + + it "different parser states may accept different tokens for same FSA state" do + scanner_accepts = states.scanner_accepts_table + scanner_fsa = states.scanner_fsa + + results = scanner_fsa.scan(">>") + rshift_result = results.find { |r| r[:token].name == "RSHIFT" } + rangle_result = results.find { |r| r[:token].name == "RANGLE" } + + expect(rshift_result).not_to be_nil + expect(rangle_result).not_to be_nil + expect(scanner_accepts.table).to be_a(Hash) + end + end + + describe "generated C code output" do + let(:out) { StringIO.new } + let(:context) { Lrama::Context.new(states) } + let(:output) do + Lrama::Output.new( + out: out, + output_file_path: "pslr_test.c", + template_name: "bison/yacc.c", + grammar_file_path: "pslr_test.y", + context: context, + grammar: grammar + ) + end + + before do + output.render + out.rewind + end + + let(:rendered) { out.read } + + it "includes yy_scanner_transition table" do + expect(rendered).to include("yy_scanner_transition") + expect(rendered).to include("YY_SCANNER_NUM_STATES") + end + + it "includes yy_state_to_accepting mapping" do + expect(rendered).to include("yy_state_to_accepting") + expect(rendered).to include("YY_ACCEPTING_NONE") + end + + it "includes yy_length_precedences table" do + expect(rendered).to include("yy_length_precedences") + expect(rendered).to include("YY_LENGTH_PREC_LEFT") + end + + it "includes yy_pseudo_scan function" do + expect(rendered).to include("yy_pseudo_scan") + expect(rendered).to include("parser_state") + expect(rendered).to include("match_length") + end + + it "pseudo_scan function uses length precedences for token selection" do + expect(rendered).to include("yy_length_precedences[tbest][t]") + end + end + end + end end diff --git a/spec/lrama/length_precedences_spec.rb b/spec/lrama/length_precedences_spec.rb new file mode 100644 index 00000000..283a7d65 --- /dev/null +++ b/spec/lrama/length_precedences_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::LengthPrecedences do + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + + describe "#precedence" do + it "returns :undefined when no rule exists" do + length_prec = Lrama::LengthPrecedences.new(lex_prec) + expect(length_prec.precedence("TOKEN_A", "TOKEN_B")).to eq(:undefined) + end + + it "returns :left when shorter token should be preferred" do + left_token = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right_token = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + lex_prec.add_rule( + left_token: left_token, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right_token, + lineno: 1 + ) + length_prec = Lrama::LengthPrecedences.new(lex_prec) + + expect(length_prec.precedence("RANGLE", "RSHIFT")).to eq(:left) + end + + it "returns :right for the inverse relationship" do + left_token = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right_token = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + lex_prec.add_rule( + left_token: left_token, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right_token, + lineno: 1 + ) + length_prec = Lrama::LengthPrecedences.new(lex_prec) + + expect(length_prec.precedence("RSHIFT", "RANGLE")).to eq(:right) + end + end + + describe "#prefer_shorter?" do + it "returns true when shorter token should be preferred" do + left_token = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right_token = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + lex_prec.add_rule( + left_token: left_token, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right_token, + lineno: 1 + ) + length_prec = Lrama::LengthPrecedences.new(lex_prec) + + expect(length_prec.prefer_shorter?("RANGLE", "RSHIFT")).to be true + end + + it "returns false when no preference exists" do + length_prec = Lrama::LengthPrecedences.new(lex_prec) + + expect(length_prec.prefer_shorter?("TOKEN_A", "TOKEN_B")).to be false + end + + it "returns false for inverse relationship" do + left_token = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + right_token = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + lex_prec.add_rule( + left_token: left_token, + operator: Lrama::Grammar::LexPrec::SHORTER, + right_token: right_token, + lineno: 1 + ) + length_prec = Lrama::LengthPrecedences.new(lex_prec) + + expect(length_prec.prefer_shorter?("RSHIFT", "RANGLE")).to be false + end + end +end diff --git a/spec/lrama/lexer/token/regex_spec.rb b/spec/lrama/lexer/token/regex_spec.rb new file mode 100644 index 00000000..bc4fb38e --- /dev/null +++ b/spec/lrama/lexer/token/regex_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::Lexer::Token::Regex do + describe "#pattern" do + it "returns the pattern without surrounding slashes" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>>/") + expect(regex.pattern).to eq(">>>") + end + + it "handles character class patterns" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "/[a-zA-Z_][a-zA-Z0-9_]*/") + expect(regex.pattern).to eq("[a-zA-Z_][a-zA-Z0-9_]*") + end + + it "handles escape sequences" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "/\\+/") + expect(regex.pattern).to eq("\\+") + end + + it "handles empty pattern" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "//") + expect(regex.pattern).to eq("") + end + + it "handles single character pattern" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + expect(regex.pattern).to eq(">") + end + end + + describe "#s_value" do + it "returns the original value including slashes" do + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>>/") + expect(regex.s_value).to eq("/>>>/") + end + end +end diff --git a/spec/lrama/output_spec.rb b/spec/lrama/output_spec.rb index 5fa1d04f..813b91be 100644 --- a/spec/lrama/output_spec.rb +++ b/spec/lrama/output_spec.rb @@ -203,4 +203,183 @@ end end end + + describe "PSLR methods" do + let(:token_pattern) do + id = Lrama::Lexer::Token::Ident.new(s_value: "ID") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/[a-z]+/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([token_pattern]) } + + let(:mock_states) do + instance_double( + Lrama::States, + scanner_fsa: scanner_fsa, + scanner_accepts_table: nil, + length_precedences: nil, + token_patterns: [token_pattern], + states: [] + ) + end + + let(:mock_context) do + instance_double(Lrama::Context, states: mock_states) + end + + let(:mock_grammar) do + instance_double( + Lrama::Grammar, + eof_symbol: nil, + error_symbol: nil, + undef_symbol: nil, + accept_symbol: nil, + locations: false, + parse_param: nil, + lex_param: nil + ) + end + + let(:pslr_output) do + out = StringIO.new + Lrama::Output.new( + out: out, + output_file_path: "test.c", + template_name: "bison/yacc.c", + grammar_file_path: "test.y", + context: mock_context, + grammar: mock_grammar + ) + end + + describe "#pslr_enabled?" do + it "returns true when scanner FSA is built with states" do + expect(pslr_output.pslr_enabled?).to be true + end + + it "returns false when scanner FSA is nil" do + allow(mock_states).to receive(:scanner_fsa).and_return(nil) + expect(pslr_output.pslr_enabled?).to be false + end + + it "returns false when scanner FSA has no states" do + empty_fsa = Lrama::ScannerFSA.new([]) + allow(mock_states).to receive(:scanner_fsa).and_return(empty_fsa) + expect(pslr_output.pslr_enabled?).to be false + end + end + + describe "#scanner_transition_table" do + it "generates C code for scanner transitions" do + result = pslr_output.scanner_transition_table + expect(result).to include("YY_SCANNER_NUM_STATES") + expect(result).to include("yy_scanner_transition") + end + end + + describe "#pseudo_scan_function" do + it "generates the pseudo_scan C function" do + result = pslr_output.pseudo_scan_function + expect(result).to include("yy_pseudo_scan") + expect(result).to include("parser_state") + expect(result).to include("match_length") + end + end + + describe "#pslr_tables_and_functions" do + it "generates all PSLR C code" do + result = pslr_output.pslr_tables_and_functions + expect(result).to include("PSLR(1) Scanner Tables and Functions") + expect(result).to include("YY_SCANNER_NUM_STATES") + expect(result).to include("yy_scanner_transition") + expect(result).to include("yy_pseudo_scan") + end + end + + describe "#state_to_accepting_table" do + it "generates state to accepting mapping" do + result = pslr_output.state_to_accepting_table + expect(result).to include("yy_state_to_accepting") + expect(result).to include("YY_ACCEPTING_NONE") + end + end + + describe "#length_precedences_table_code" do + let(:mock_length_prec) { Lrama::LengthPrecedences.new(Lrama::Grammar::LexPrec.new) } + + before do + allow(mock_states).to receive(:length_precedences).and_return(mock_length_prec) + end + + it "generates length precedences table" do + result = pslr_output.length_precedences_table_code + expect(result).to include("length_precedences") + expect(result).to include("YY_LENGTH_PREC_UNDEFINED") + end + end + + describe "#accepting_tokens_table" do + it "generates accepting tokens information" do + result = pslr_output.accepting_tokens_table + expect(result).to include("Accepting state token IDs") + end + end + end + + describe "PSLR integration in render" do + let(:pslr_grammar_text) do + <<~GRAMMAR + %token-pattern RSHIFT />>/ "right shift" + %token-pattern RANGLE />/ "right angle" + %lex-prec RANGLE -s RSHIFT + %% + program: RSHIFT | RANGLE + GRAMMAR + end + + let(:pslr_grammar) do + grammar = Lrama::Parser.new(pslr_grammar_text, "pslr_test.y").parse + grammar.prepare + grammar.validate! + grammar + end + + let(:pslr_states) do + s = Lrama::States.new(pslr_grammar, Lrama::Tracer.new(Lrama::Logger.new)) + s.compute + s.compute_pslr + s + end + + let(:pslr_context) { Lrama::Context.new(pslr_states) } + let(:pslr_out) { StringIO.new } + + let(:pslr_full_output) do + Lrama::Output.new( + out: pslr_out, + output_file_path: "pslr_test.c", + template_name: "bison/yacc.c", + grammar_file_path: "pslr_test.y", + context: pslr_context, + grammar: pslr_grammar + ) + end + + it "includes PSLR tables in rendered output" do + pslr_full_output.render + pslr_out.rewind + rendered = pslr_out.read + + expect(rendered).to include("PSLR(1) Scanner Tables and Functions") + expect(rendered).to include("YY_SCANNER_NUM_STATES") + expect(rendered).to include("yy_scanner_transition") + expect(rendered).to include("yy_pseudo_scan") + end + end end diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 9b2365ab..afbccf1d 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -4391,4 +4391,130 @@ class : keyword_class tSTRING keyword_end end end end + + describe "PSLR directives" do + describe "%token-pattern" do + it "parses a single token pattern" do + y = <<~GRAMMAR + %token-pattern RSHIFT />>/ "right shift" + %% + program: RSHIFT + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.token_patterns.size).to eq(1) + token_pattern = grammar.token_patterns.first + expect(token_pattern.name).to eq("RSHIFT") + expect(token_pattern.regex_pattern).to eq(">>") + expect(token_pattern.alias_name).to eq("\"right shift\"") + end + + it "parses multiple token patterns" do + y = <<~GRAMMAR + %token-pattern RSHIFT />>/ "right shift" + %token-pattern RANGLE />/ "right angle" + %token-pattern LANGLE / RSHIFT />>/ "right shift" + %% + program: RSHIFT + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.token_patterns.size).to eq(1) + token_pattern = grammar.token_patterns.first + expect(token_pattern.tag.s_value).to eq("") + end + end + + describe "%lex-prec" do + it "parses shorter priority rule" do + y = <<~GRAMMAR + %token RANGLE RSHIFT + %lex-prec RANGLE -s RSHIFT + %% + program: RANGLE | RSHIFT + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.lex_prec.rules.size).to eq(1) + expect(grammar.lex_prec.shorter_priority?("RANGLE", "RSHIFT")).to be true + end + + it "parses higher priority rule" do + y = <<~GRAMMAR + %token IF ID + %lex-prec IF - ID + %% + program: IF | ID + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.lex_prec.rules.size).to eq(1) + expect(grammar.lex_prec.higher_priority?("IF", "ID")).to be true + end + + it "parses chained lex-prec rules" do + y = <<~GRAMMAR + %token IF ELSE WHILE ID + %lex-prec IF - ELSE - WHILE - ID + %% + program: IF | ELSE | WHILE | ID + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.lex_prec.rules.size).to eq(3) + expect(grammar.lex_prec.higher_priority?("IF", "ELSE")).to be true + expect(grammar.lex_prec.higher_priority?("ELSE", "WHILE")).to be true + expect(grammar.lex_prec.higher_priority?("WHILE", "ID")).to be true + end + end + + describe "%define lr.type pslr" do + it "recognizes pslr lr.type" do + y = <<~GRAMMAR + %define lr.type pslr + %token ID + %% + program: ID + GRAMMAR + + grammar = Lrama::Parser.new(y, "pslr_test.y").parse + grammar.prepare + grammar.validate! + + expect(grammar.pslr_defined?).to be true + end + end + end end diff --git a/spec/lrama/scanner_fsa_spec.rb b/spec/lrama/scanner_fsa_spec.rb new file mode 100644 index 00000000..8ba20de8 --- /dev/null +++ b/spec/lrama/scanner_fsa_spec.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::ScannerFSA do + describe "initialization" do + it "creates an empty FSA for no patterns" do + fsa = Lrama::ScannerFSA.new([]) + expect(fsa.states).to be_empty + end + + it "creates FSA for single literal pattern" do + id = Lrama::Lexer::Token::Ident.new(s_value: "PLUS") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/\\+/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + expect(fsa.states).not_to be_empty + expect(fsa.initial_state).not_to be_nil + end + end + + describe "#scan" do + it "matches a single character pattern" do + id = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + results = fsa.scan(">") + expect(results.size).to eq(1) + expect(results[0][:token].name).to eq("RANGLE") + expect(results[0][:position]).to eq(1) + end + + it "matches a multi-character pattern" do + id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + results = fsa.scan(">>") + expect(results.size).to eq(1) + expect(results[0][:token].name).to eq("RSHIFT") + expect(results[0][:position]).to eq(2) + end + + it "returns multiple matches for overlapping patterns" do + rangle_id = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + rangle_regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + rangle = Lrama::Grammar::TokenPattern.new( + id: rangle_id, + pattern: rangle_regex, + lineno: 1, + definition_order: 0 + ) + + rshift_id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + rshift_regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>/") + rshift = Lrama::Grammar::TokenPattern.new( + id: rshift_id, + pattern: rshift_regex, + lineno: 1, + definition_order: 1 + ) + + fsa = Lrama::ScannerFSA.new([rangle, rshift]) + + results = fsa.scan(">>") + + # Should match both RANGLE at position 1 and RSHIFT at position 2 + expect(results.size).to eq(2) + positions = results.map { |r| [r[:token].name, r[:position]] } + expect(positions).to include(["RANGLE", 1]) + expect(positions).to include(["RSHIFT", 2]) + end + + it "matches character class patterns" do + id = Lrama::Lexer::Token::Ident.new(s_value: "ID") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/[a-zA-Z_][a-zA-Z0-9_]*/") + id_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([id_pattern]) + + results = fsa.scan("hello_world123") + expect(results).not_to be_empty + # Should have matches at each position as the identifier grows + end + + it "matches digit patterns" do + id = Lrama::Lexer::Token::Ident.new(s_value: "INT") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/[0-9]+/") + int_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([int_pattern]) + + results = fsa.scan("12345") + expect(results).not_to be_empty + end + end + + describe "#acc_ss" do + it "returns empty array for non-accepting state" do + id = Lrama::Lexer::Token::Ident.new(s_value: "AB") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/ab/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + # Initial state shouldn't be accepting for non-empty pattern + tokens = fsa.acc_ss(0) + expect(tokens).to be_empty + end + + it "returns accepting tokens for accepting state" do + id = Lrama::Lexer::Token::Ident.new(s_value: "A") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/a/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + # Scan to reach accepting state + results = fsa.scan("a") + expect(results).not_to be_empty + + accepting_state = results[0][:state] + tokens = fsa.acc_ss(accepting_state.id) + expect(tokens.map(&:name)).to include("A") + end + end + + describe "#state_to_accepting_state" do + it "returns nil for non-accepting state" do + id = Lrama::Lexer::Token::Ident.new(s_value: "AB") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/ab/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + expect(fsa.state_to_accepting_state(0)).to be_nil + end + + it "returns the state itself for accepting state" do + id = Lrama::Lexer::Token::Ident.new(s_value: "A") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/a/") + token_pattern = Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + fsa = Lrama::ScannerFSA.new([token_pattern]) + + results = fsa.scan("a") + accepting_state = results[0][:state] + + expect(fsa.state_to_accepting_state(accepting_state.id)).to eq(accepting_state) + end + end +end diff --git a/spec/lrama/state/pslr_inadequacy_spec.rb b/spec/lrama/state/pslr_inadequacy_spec.rb new file mode 100644 index 00000000..f7944ccb --- /dev/null +++ b/spec/lrama/state/pslr_inadequacy_spec.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::State::PslrInadequacy do + let(:mock_state) do + instance_double(Lrama::State, id: 0) + end + + let(:mock_conflicting_states) do + [ + instance_double(Lrama::State, id: 1), + instance_double(Lrama::State, id: 2) + ] + end + + describe "#initialize" do + it "creates an LR-relative inadequacy" do + inadequacy = Lrama::State::PslrInadequacy.new( + type: Lrama::State::PslrInadequacy::LR_RELATIVE, + state: mock_state, + conflicting_states: mock_conflicting_states, + details: { reason: "test" } + ) + + expect(inadequacy.type).to eq(:lr_relative) + expect(inadequacy.state).to eq(mock_state) + expect(inadequacy.conflicting_states).to eq(mock_conflicting_states) + expect(inadequacy.details[:reason]).to eq("test") + end + + it "creates a PSLR-relative inadequacy" do + inadequacy = Lrama::State::PslrInadequacy.new( + type: Lrama::State::PslrInadequacy::PSLR_RELATIVE, + state: mock_state, + conflicting_states: mock_conflicting_states, + details: {} + ) + + expect(inadequacy.type).to eq(:pslr_relative) + end + end + + describe "#to_s" do + it "returns a human-readable description" do + inadequacy = Lrama::State::PslrInadequacy.new( + type: Lrama::State::PslrInadequacy::PSLR_RELATIVE, + state: mock_state, + conflicting_states: mock_conflicting_states, + details: {} + ) + + expect(inadequacy.to_s).to include("PSLR Inadequacy") + expect(inadequacy.to_s).to include("pslr_relative") + expect(inadequacy.to_s).to include("state 0") + expect(inadequacy.to_s).to include("1, 2") + end + end + + describe "constants" do + it "defines LR_RELATIVE constant" do + expect(Lrama::State::PslrInadequacy::LR_RELATIVE).to eq(:lr_relative) + end + + it "defines PSLR_RELATIVE constant" do + expect(Lrama::State::PslrInadequacy::PSLR_RELATIVE).to eq(:pslr_relative) + end + end +end + +RSpec.describe Lrama::State::PslrCompatibilityChecker do + let(:rangle) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:rshift) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 1 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([rangle, rshift]) } + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + let(:length_prec) { Lrama::LengthPrecedences.new(lex_prec) } + + describe "#initialize" do + it "creates a compatibility checker" do + scanner_accepts = instance_double(Lrama::State::ScannerAccepts) + checker = Lrama::State::PslrCompatibilityChecker.new( + scanner_accepts, + length_prec + ) + + expect(checker).to be_a(Lrama::State::PslrCompatibilityChecker) + end + end + + describe "#compatible?" do + context "when both states select same tokens" do + it "returns true" do + scanner_accepts = instance_double(Lrama::State::ScannerAccepts) + allow(scanner_accepts).to receive(:[]).and_return(rangle) + + checker = Lrama::State::PslrCompatibilityChecker.new( + scanner_accepts, + length_prec + ) + + state1 = instance_double(Lrama::State, id: 0) + state2 = instance_double(Lrama::State, id: 1) + + expect(checker.compatible?(state1, state2, scanner_fsa)).to be true + end + end + + context "when both states have no tokens (nil)" do + it "returns true" do + scanner_accepts = instance_double(Lrama::State::ScannerAccepts) + allow(scanner_accepts).to receive(:[]).and_return(nil) + + checker = Lrama::State::PslrCompatibilityChecker.new( + scanner_accepts, + length_prec + ) + + state1 = instance_double(Lrama::State, id: 0) + state2 = instance_double(Lrama::State, id: 1) + + expect(checker.compatible?(state1, state2, scanner_fsa)).to be true + end + end + + context "when states select different tokens" do + it "returns false" do + scanner_accepts = instance_double(Lrama::State::ScannerAccepts) + + # State 0 selects RANGLE, State 1 selects RSHIFT + allow(scanner_accepts).to receive(:[]) do |state_id, _fsa_state_id| + if state_id == 0 + rangle + else + rshift + end + end + + checker = Lrama::State::PslrCompatibilityChecker.new( + scanner_accepts, + length_prec + ) + + state1 = instance_double(Lrama::State, id: 0) + state2 = instance_double(Lrama::State, id: 1) + + expect(checker.compatible?(state1, state2, scanner_fsa)).to be false + end + end + end +end diff --git a/spec/lrama/state/scanner_accepts_spec.rb b/spec/lrama/state/scanner_accepts_spec.rb new file mode 100644 index 00000000..5b2b5a45 --- /dev/null +++ b/spec/lrama/state/scanner_accepts_spec.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +RSpec.describe Lrama::State::ScannerAccepts do + describe "#build and #[]" do + let(:rangle) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RANGLE") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:rshift) do + id = Lrama::Lexer::Token::Ident.new(s_value: "RSHIFT") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/>>/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 1 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([rangle, rshift]) } + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + let(:length_prec) { Lrama::LengthPrecedences.new(lex_prec) } + + context "with mock parser states" do + let(:mock_symbol) do + instance_double( + Lrama::Grammar::Symbol, + term?: true, + id: instance_double(Lrama::Lexer::Token::Ident, s_value: "RANGLE") + ) + end + + let(:mock_shift) do + instance_double( + Lrama::State::Action::Shift, + next_sym: mock_symbol + ) + end + + let(:mock_state) do + instance_double( + Lrama::State, + id: 0, + term_transitions: [mock_shift], + reduces: [] + ) + end + + it "builds scanner_accepts table" do + scanner_accepts = Lrama::State::ScannerAccepts.new( + [mock_state], + scanner_fsa, + lex_prec, + length_prec + ) + scanner_accepts.build + + expect(scanner_accepts.table).to be_a(Hash) + end + end + end + + describe "token selection logic" do + let(:token_a) do + id = Lrama::Lexer::Token::Ident.new(s_value: "TOKEN_A") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/a/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:token_ab) do + id = Lrama::Lexer::Token::Ident.new(s_value: "TOKEN_AB") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/ab/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 1 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([token_a, token_ab]) } + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + let(:length_prec) { Lrama::LengthPrecedences.new(lex_prec) } + + it "creates FSA with accepting states" do + expect(scanner_fsa.states).not_to be_empty + end + end + + describe "priority selection with lex-prec rules" do + let(:if_token) do + id = Lrama::Lexer::Token::Ident.new(s_value: "IF") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/if/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 0 + ) + end + + let(:id_token) do + id = Lrama::Lexer::Token::Ident.new(s_value: "ID") + regex = Lrama::Lexer::Token::Regex.new(s_value: "/[a-z]+/") + Lrama::Grammar::TokenPattern.new( + id: id, + pattern: regex, + lineno: 1, + definition_order: 1 + ) + end + + let(:scanner_fsa) { Lrama::ScannerFSA.new([if_token, id_token]) } + let(:lex_prec) { Lrama::Grammar::LexPrec.new } + + before do + left = Lrama::Lexer::Token::Ident.new(s_value: "IF") + right = Lrama::Lexer::Token::Ident.new(s_value: "ID") + lex_prec.add_rule( + left_token: left, + operator: Lrama::Grammar::LexPrec::HIGHER, + right_token: right, + lineno: 1 + ) + end + + it "respects higher priority rules" do + expect(lex_prec.higher_priority?("IF", "ID")).to be true + end + + it "creates length precedences from lex_prec" do + length_prec = Lrama::LengthPrecedences.new(lex_prec) + expect(length_prec).to be_a(Lrama::LengthPrecedences) + end + end +end diff --git a/template/bison/yacc.c b/template/bison/yacc.c index 6edd59a0..fb0b4e1a 100644 --- a/template/bison/yacc.c +++ b/template/bison/yacc.c @@ -582,6 +582,9 @@ static const <%= output.int_type_for(output.context.yyr2) %> yyr2[] = <%= output.int_array_to_string(output.context.yyr2) %> }; +<%- if output.pslr_enabled? -%> +<%= output.pslr_tables_and_functions %> +<%- end -%> enum { YYENOMEM = -2 };