Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@

## Lrama 0.8.0 (2026-xx-xx)

### [EXPERIMENTAL] Support the generation of the PSLR(1) parser described in this dissertation

Support the generation of the PSLR(1) parser described in this dissertation.
https://open.clemson.edu/all_dissertations/519/

If you use PSLR(1) parser, you can write the following directives in your grammar file.

```yacc
%token-pattern RSHIFT />>/ "right shift"
%token-pattern RANGLE />/ "right angle"

%lex-prec RANGLE -s RSHIFT
```

But, currently PSLR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you.

## Lrama 0.7.1 (2025-12-24)

### Optimize IELR
Expand Down
2 changes: 2 additions & 0 deletions lib/lrama.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
require_relative "lrama/output"
require_relative "lrama/parser"
require_relative "lrama/reporter"
require_relative "lrama/scanner_fsa"
require_relative "lrama/state"
require_relative "lrama/states"
require_relative "lrama/length_precedences"
require_relative "lrama/tracer"
require_relative "lrama/version"
require_relative "lrama/warnings"
51 changes: 51 additions & 0 deletions lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
require_relative "grammar/symbols"
require_relative "grammar/type"
require_relative "grammar/union"
require_relative "grammar/token_pattern"
require_relative "grammar/lex_prec"
require_relative "lexer"

module Lrama
Expand Down Expand Up @@ -68,6 +70,8 @@ class Grammar
# @union: Union
# @precedences: Array[Precedence]
# @start_nterm: Lrama::Lexer::Token::Base?
# @token_patterns: Array[Grammar::TokenPattern]
# @lex_prec: Grammar::LexPrec

extend Forwardable

Expand Down Expand Up @@ -100,6 +104,8 @@ class Grammar
attr_accessor :locations #: bool
attr_accessor :define #: Hash[String, String]
attr_accessor :required #: bool
attr_reader :token_patterns #: Array[Grammar::TokenPattern]
attr_reader :lex_prec #: Grammar::LexPrec

def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value,
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
Expand Down Expand Up @@ -133,6 +139,9 @@ def initialize(rule_counter, locations, define = {})
@required = false
@precedences = []
@start_nterm = nil
@token_patterns = []
@lex_prec = Grammar::LexPrec.new
@token_pattern_counter = 0

append_special_symbols
end
Expand Down Expand Up @@ -304,6 +313,48 @@ def ielr_defined?
@define.key?('lr.type') && @define['lr.type'] == 'ielr'
end

# @rbs () -> bool
def pslr_defined?
@define.key?('lr.type') && @define['lr.type'] == 'pslr'
end

# Add a token pattern from %token-pattern directive
# @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer) -> Grammar::TokenPattern
def add_token_pattern(id:, pattern:, alias_name: nil, tag: nil, lineno:)
token_pattern = Grammar::TokenPattern.new(
id: id,
pattern: pattern,
alias_name: alias_name,
tag: tag,
lineno: lineno,
definition_order: @token_pattern_counter
)
@token_pattern_counter += 1
@token_patterns << token_pattern

# Also register as a terminal symbol
add_term(id: id, alias_name: alias_name, tag: tag)

token_pattern
end

# Add a lex-prec rule from %lex-prec directive
# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Grammar::LexPrec::Rule
def add_lex_prec_rule(left_token:, operator:, right_token:, lineno:)
@lex_prec.add_rule(
left_token: left_token,
operator: operator,
right_token: right_token,
lineno: lineno
)
end

# Find a token pattern by its name
# @rbs (String name) -> Grammar::TokenPattern?
def find_token_pattern(name)
@token_patterns.find { |tp| tp.name == name }
end

private

# @rbs () -> void
Expand Down
98 changes: 98 additions & 0 deletions lib/lrama/grammar/lex_prec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
class Grammar
# Represents lexical precedence rules defined by %lex-prec directive
# Based on Definition 3.2.3, 3.2.4, 3.2.10 from the PSLR dissertation
#
# Example: %lex-prec RANGLE -s RSHIFT # RANGLE is shorter than RSHIFT
# %lex-prec IF - ID # IF has higher priority than ID (same length)
class LexPrec
# Precedence relation types
# "," : Same priority (lex-tie)
# "-" : Left has higher priority than right
# "-s" : Left is shorter match priority over right
SAME_PRIORITY = :same #: Symbol
HIGHER = :higher #: Symbol
SHORTER = :shorter #: Symbol

# Represents a single precedence rule
class Rule
attr_reader :left_token #: Lexer::Token::Ident
attr_reader :operator #: Symbol
attr_reader :right_token #: Lexer::Token::Ident
attr_reader :lineno #: Integer

# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> void
def initialize(left_token:, operator:, right_token:, lineno:)
@left_token = left_token
@operator = operator
@right_token = right_token
@lineno = lineno
end

# @rbs () -> String
def left_name
@left_token.s_value
end

# @rbs () -> String
def right_name
@right_token.s_value
end
end

attr_reader :rules #: Array[Rule]

# @rbs () -> void
def initialize
@rules = []
end

# @rbs (left_token: Lexer::Token::Ident, operator: Symbol, right_token: Lexer::Token::Ident, lineno: Integer) -> Rule
def add_rule(left_token:, operator:, right_token:, lineno:)
rule = Rule.new(
left_token: left_token,
operator: operator,
right_token: right_token,
lineno: lineno
)
@rules << rule
rule
end

# Check if token t1 has higher priority than t2
# Based on Definition 3.2.4
# @rbs (String t1, String t2) -> bool
def higher_priority?(t1, t2)
@rules.any? do |rule|
rule.operator == HIGHER &&
rule.left_name == t1 &&
rule.right_name == t2
end
end

# Check if token t1 has shorter-match priority over t2
# Based on Definition 3.2.15
# @rbs (String t1, String t2) -> bool
def shorter_priority?(t1, t2)
@rules.any? do |rule|
rule.operator == SHORTER &&
rule.left_name == t1 &&
rule.right_name == t2
end
end

# Check if tokens t1 and t2 are in a lex-tie relationship
# @rbs (String t1, String t2) -> bool
def same_priority?(t1, t2)
@rules.any? do |rule|
rule.operator == SAME_PRIORITY &&
((rule.left_name == t1 && rule.right_name == t2) ||
(rule.left_name == t2 && rule.right_name == t1))
end
end
end
end
end
38 changes: 38 additions & 0 deletions lib/lrama/grammar/token_pattern.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
class Grammar
# Represents a token pattern defined by %token-pattern directive
# Example: %token-pattern RSHIFT />>/ "right shift"
class TokenPattern
attr_reader :id #: Lexer::Token::Ident
attr_reader :pattern #: Lexer::Token::Regex
attr_reader :alias_name #: String?
attr_reader :tag #: Lexer::Token::Tag?
attr_reader :lineno #: Integer
attr_reader :definition_order #: Integer

# @rbs (id: Lexer::Token::Ident, pattern: Lexer::Token::Regex, ?alias_name: String?, ?tag: Lexer::Token::Tag?, lineno: Integer, definition_order: Integer) -> void
def initialize(id:, pattern:, alias_name: nil, tag: nil, lineno:, definition_order:)
@id = id
@pattern = pattern
@alias_name = alias_name
@tag = tag
@lineno = lineno
@definition_order = definition_order
end

# @rbs () -> String
def name
@id.s_value
end

# Returns the regex pattern string (without slashes)
# @rbs () -> String
def regex_pattern
@pattern.pattern
end
end
end
end
57 changes: 57 additions & 0 deletions lib/lrama/length_precedences.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
# Length precedences table for PSLR(1)
# Based on Definition 3.2.15 from the PSLR dissertation
#
# Determines which token should be preferred when there's a length conflict:
# - :left - the shorter token (t1) should be preferred
# - :right - the longer token (t2) should be preferred
# - :undefined - no preference defined, use default (longest match)
class LengthPrecedences
# Result of length precedence lookup
LEFT = :left #: Symbol
RIGHT = :right #: Symbol
UNDEFINED = :undefined #: Symbol

attr_reader :table #: Hash[[String, String], Symbol]

# @rbs (Grammar::LexPrec lex_prec) -> void
def initialize(lex_prec)
@table = build_table(lex_prec)
end

# Get the length precedence between two tokens
# @rbs (String t1, String t2) -> Symbol
def precedence(t1, t2)
@table[[t1, t2]] || UNDEFINED
end

# Check if t1 (shorter) should be preferred over t2 (longer)
# @rbs (String t1, String t2) -> bool
def prefer_shorter?(t1, t2)
precedence(t1, t2) == LEFT
end

private

# Build the length precedence table from lex-prec rules
# @rbs (Grammar::LexPrec lex_prec) -> Hash[[String, String], Symbol]
def build_table(lex_prec)
table = {}

lex_prec.rules.each do |rule|
case rule.operator
when Grammar::LexPrec::SHORTER
# t1 -s t2: t1 (shorter) should be preferred over t2 (longer)
table[[rule.left_name, rule.right_name]] = LEFT
# Inverse: t2 (longer) should not be preferred over t1 (shorter)
table[[rule.right_name, rule.left_name]] = RIGHT
end
end

table
end
end
end
13 changes: 11 additions & 2 deletions lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class Lexer
# [::Symbol, Token::Char] |
# [::Symbol, Token::Str] |
# [::Symbol, Token::Int] |
# [::Symbol, Token::Ident]
# [::Symbol, Token::Ident] |
# [::Symbol, Token::Regex]
#
# type c_token = [:C_DECLARATION, Token::UserCode]

Expand All @@ -32,6 +33,7 @@ class Lexer
PERCENT_TOKENS = %w(
%union
%token
%token-pattern
%type
%nterm
%left
Expand All @@ -43,6 +45,7 @@ class Lexer
%printer
%destructor
%lex-param
%lex-prec
%parse-param
%initial-action
%precedence
Expand Down Expand Up @@ -121,7 +124,7 @@ def lex_token
return
when @scanner.scan(/#{SYMBOLS.join('|')}/)
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
when @scanner.scan(/#{PERCENT_TOKENS.sort_by { |s| -s.length }.join('|')}/)
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/[\?\+\*]/)
return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
Expand All @@ -133,6 +136,12 @@ def lex_token
return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/".*?"/)
return [:STRING, Lrama::Lexer::Token::Str.new(s_value: %Q(#{@scanner.matched}), location: location)]
when @scanner.scan(%r{/[^/]+/})
return [:REGEX, Lrama::Lexer::Token::Regex.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/-s(?=\s)/)
return ['-s', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/-(?=\s)/)
return ['-', Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/\d+/)
return [:INTEGER, Lrama::Lexer::Token::Int.new(s_value: Integer(@scanner.matched), location: location)]
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
Expand Down
1 change: 1 addition & 0 deletions lib/lrama/lexer/token.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
require_relative 'token/ident'
require_relative 'token/instantiate_rule'
require_relative 'token/int'
require_relative 'token/regex'
require_relative 'token/str'
require_relative 'token/tag'
require_relative 'token/token'
Expand Down
19 changes: 19 additions & 0 deletions lib/lrama/lexer/token/regex.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# rbs_inline: enabled
# frozen_string_literal: true

module Lrama
class Lexer
module Token
# Token class for regex patterns used in %token-pattern directive
# Example: /[a-zA-Z_][a-zA-Z0-9_]*/
class Regex < Base
# Returns the regex pattern without the surrounding slashes
# @rbs () -> String
def pattern
# Remove leading and trailing slashes
s_value[1..-2].to_s
end
end
end
end
end
Loading