diff --git a/lib/t_ruby/ast_type_inferrer.rb b/lib/t_ruby/ast_type_inferrer.rb index bbaa263..7fe0b3e 100644 --- a/lib/t_ruby/ast_type_inferrer.rb +++ b/lib/t_ruby/ast_type_inferrer.rb @@ -205,11 +205,13 @@ def infer_method_return_type(method_node, class_env = nil) end # 본문에서 반환 타입 수집 - return_types = collect_return_types(method_node.body, env) + return_types, terminated = collect_return_types(method_node.body, env) - # 암묵적 반환값 추론 (마지막 표현식) - implicit_return = infer_implicit_return(method_node.body, env) - return_types << implicit_return if implicit_return + # 암묵적 반환값 추론 (마지막 표현식) - 종료되지 않은 경우만 + unless terminated + implicit_return = infer_implicit_return(method_node.body, env) + return_types << implicit_return if implicit_return + end # 타입 통합 unify_types(return_types) @@ -407,24 +409,35 @@ def infer_return(node, env) end # 본문에서 모든 return 타입 수집 + # @return [Array<(Array, Boolean)>] [수집된 타입들, 종료 여부] def collect_return_types(body, env) types = [] - collect_returns_recursive(body, env, types) + terminated = collect_returns_recursive(body, env, types) - types + [types, terminated] end + # @return [Boolean] true if this node terminates (contains unconditional return) def collect_returns_recursive(node, env, types) case node when IR::Return type = node.value ? infer_expression(node.value, env) : "nil" types << type + true # return은 항상 실행 흐름 종료 when IR::Block - node.statements.each { |stmt| collect_returns_recursive(stmt, env, types) } + node.statements.each do |stmt| + terminated = collect_returns_recursive(stmt, env, types) + return true if terminated # return 이후 코드는 unreachable + end + false when IR::Conditional - collect_returns_recursive(node.then_branch, env, types) if node.then_branch - collect_returns_recursive(node.else_branch, env, types) if node.else_branch + then_terminated = node.then_branch ? collect_returns_recursive(node.then_branch, env, types) : false + else_terminated = node.else_branch ? collect_returns_recursive(node.else_branch, env, types) : false + # 모든 분기가 종료되어야 조건문 전체가 종료됨 + then_terminated && else_terminated + else + false end end diff --git a/lib/t_ruby/body_parser.rb b/lib/t_ruby/body_parser.rb index 616dcf3..d8090a5 100644 --- a/lib/t_ruby/body_parser.rb +++ b/lib/t_ruby/body_parser.rb @@ -19,17 +19,96 @@ def parse(lines, start_line, end_line) stripped = line.strip # 빈 줄이나 주석은 건너뛰기 - unless stripped.empty? || stripped.start_with?("#") - node = parse_statement(stripped, i) - statements << node if node + if stripped.empty? || stripped.start_with?("#") + i += 1 + next end + # if/unless 조건문 처리 + if stripped.match?(/^(if|unless)\s+/) + node, next_i = parse_conditional(lines, i, end_line) + if node + statements << node + i = next_i + next + end + end + + node = parse_statement(stripped, i) + statements << node if node i += 1 end IR::Block.new(statements: statements) end + # if/unless/elsif 조건문 파싱 + # @return [Array(IR::Conditional, Integer)] 조건문 노드와 다음 라인 인덱스 + def parse_conditional(lines, start_line, block_end) + line = lines[start_line].strip + match = line.match(/^(if|unless|elsif)\s+(.+)$/) + return [nil, start_line] unless match + + # elsif는 내부적으로 if처럼 처리 + kind = match[1] == "elsif" ? :if : match[1].to_sym + condition = parse_expression(match[2]) + + # then/elsif/else/end 블록 찾기 + then_statements = [] + else_statements = [] + current_branch = :then + depth = 1 + i = start_line + 1 + + while i < block_end && depth.positive? + current_line = lines[i].strip + + if current_line.match?(/^(if|unless|case|while|until|for|begin)\b/) + depth += 1 + if current_branch == :then + then_statements << IR::RawCode.new(code: current_line) + else + else_statements << IR::RawCode.new(code: current_line) + end + elsif current_line == "end" + depth -= 1 + break if depth.zero? + elsif depth == 1 && current_line.match?(/^elsif\s+/) + # elsif는 중첩된 if로 처리 + nested_cond, next_i = parse_conditional(lines, i, block_end) + else_statements << nested_cond if nested_cond + i = next_i + break + elsif depth == 1 && current_line == "else" + current_branch = :else + elsif !current_line.empty? && !current_line.start_with?("#") + node = parse_statement(current_line, i) + next unless node + + if current_branch == :then + then_statements << node + else + else_statements << node + end + end + + i += 1 + end + + then_block = IR::Block.new(statements: then_statements) + else_block = else_statements.empty? ? nil : IR::Block.new(statements: else_statements) + + conditional = IR::Conditional.new( + condition: condition, + then_branch: then_block, + else_branch: else_block, + kind: kind, + location: start_line + ) + + [conditional, i + 1] + end + private # 단일 문장 파싱 diff --git a/lib/t_ruby/compiler.rb b/lib/t_ruby/compiler.rb index 96fc2c3..2da96f4 100644 --- a/lib/t_ruby/compiler.rb +++ b/lib/t_ruby/compiler.rb @@ -3,6 +3,11 @@ require "fileutils" module TRuby + # Pattern for method names that supports Unicode characters + # \p{L} matches any Unicode letter, \p{N} matches any Unicode number + IDENTIFIER_CHAR = '[\p{L}\p{N}_]' + METHOD_NAME_PATTERN = "#{IDENTIFIER_CHAR}+[?!]?".freeze + class Compiler attr_reader :declaration_loader, :use_ir, :optimizer @@ -362,7 +367,7 @@ def erase_parameter_types(source) result = source.dup # Match function definitions and remove type annotations from parameters - result.gsub!(/^(\s*def\s+\w+\s*\()([^)]+)(\)\s*)(?::\s*[^\n]+)?(\s*$)/) do |_match| + result.gsub!(/^(\s*def\s+#{TRuby::METHOD_NAME_PATTERN}\s*\()([^)]+)(\)\s*)(?::\s*[^\n]+)?(\s*$)/) do |_match| indent = ::Regexp.last_match(1) params = ::Regexp.last_match(2) close_paren = ::Regexp.last_match(3) @@ -411,8 +416,8 @@ def remove_param_types(params_str) # Clean a single parameter (remove type annotation) def clean_param(param) - # Match: name: Type or name - if (match = param.match(/^(\w+)\s*:/)) + # Match: name: Type or name (supports Unicode identifiers) + if (match = param.match(/^(#{TRuby::IDENTIFIER_CHAR}+)\s*:/)) match[1] else param diff --git a/lib/t_ruby/parser.rb b/lib/t_ruby/parser.rb index 6a6c0ef..bbbbc5e 100644 --- a/lib/t_ruby/parser.rb +++ b/lib/t_ruby/parser.rb @@ -7,6 +7,12 @@ class Parser # Type names that are recognized as valid VALID_TYPES = %w[String Integer Boolean Array Hash Symbol void nil].freeze + # Pattern for method/variable names that supports Unicode characters + # \p{L} matches any Unicode letter, \p{N} matches any Unicode number + IDENTIFIER_CHAR = '[\p{L}\p{N}_]' + # Method names can end with ? or ! + METHOD_NAME_PATTERN = "#{IDENTIFIER_CHAR}+[?!]?".freeze + attr_reader :source, :ir_program, :use_combinator def initialize(source, use_combinator: true, parse_body: true) @@ -56,7 +62,7 @@ def parse end # Match function definitions (top-level only, not inside class) - if line.match?(/^\s*def\s+\w+/) + if line.match?(/^\s*def\s+#{IDENTIFIER_CHAR}+/) func_info, next_i = parse_function_with_body(i) if func_info functions << func_info @@ -167,7 +173,7 @@ def parse_function_definition(line) # def foo(): Type - no params but with return type # def foo(params) - with params, no return type # def foo - no params, no return type - match = line.match(/^\s*def\s+([\w?!]+)\s*(?:\((.*?)\))?\s*(?::\s*(.+?))?\s*$/) + match = line.match(/^\s*def\s+(#{METHOD_NAME_PATTERN})\s*(?:\((.*?)\))?\s*(?::\s*(.+?))?\s*$/) return nil unless match function_name = match[1] @@ -320,7 +326,7 @@ def parse_class(start_index) current_line = @lines[i] # Match method definitions inside class - if current_line.match?(/^\s*def\s+\w+/) + if current_line.match?(/^\s*def\s+#{IDENTIFIER_CHAR}+/) method_info, next_i = parse_method_in_class(i, class_end) if method_info methods << method_info diff --git a/spec/t_ruby/ast_type_inferrer_spec.rb b/spec/t_ruby/ast_type_inferrer_spec.rb index 7a26387..27f28c2 100644 --- a/spec/t_ruby/ast_type_inferrer_spec.rb +++ b/spec/t_ruby/ast_type_inferrer_spec.rb @@ -282,4 +282,104 @@ expect(inferrer.type_cache).to have_key(node.object_id) end end + + describe "unreachable code handling" do + it "ignores code after unconditional return" do + # def test + # return false + # "unreachable" + # end + body = TRuby::IR::Block.new( + statements: [ + TRuby::IR::Return.new( + value: TRuby::IR::Literal.new(value: false, literal_type: :boolean) + ), + TRuby::IR::Literal.new(value: "unreachable", literal_type: :string), + ] + ) + method = TRuby::IR::MethodDef.new( + name: "test", + params: [], + return_type: nil, + body: body + ) + + # Should be bool, not bool | String + expect(inferrer.infer_method_return_type(method)).to eq("bool") + end + + it "ignores conditional after unconditional return" do + # def test + # return 42 + # if condition + # "then" + # else + # "else" + # end + # end + conditional = TRuby::IR::Conditional.new( + condition: TRuby::IR::Literal.new(value: true, literal_type: :boolean), + then_branch: TRuby::IR::Block.new( + statements: [TRuby::IR::Literal.new(value: "then", literal_type: :string)] + ), + else_branch: TRuby::IR::Block.new( + statements: [TRuby::IR::Literal.new(value: "else", literal_type: :string)] + ), + kind: :if + ) + body = TRuby::IR::Block.new( + statements: [ + TRuby::IR::Return.new( + value: TRuby::IR::Literal.new(value: 42, literal_type: :integer) + ), + conditional, + ] + ) + method = TRuby::IR::MethodDef.new( + name: "test", + params: [], + return_type: nil, + body: body + ) + + # Should be Integer only + expect(inferrer.infer_method_return_type(method)).to eq("Integer") + end + + it "collects returns from all branches when conditional does not fully terminate" do + # def test + # if condition + # return "yes" + # end + # "no" + # end + conditional = TRuby::IR::Conditional.new( + condition: TRuby::IR::Literal.new(value: true, literal_type: :boolean), + then_branch: TRuby::IR::Block.new( + statements: [ + TRuby::IR::Return.new( + value: TRuby::IR::Literal.new(value: "yes", literal_type: :string) + ), + ] + ), + else_branch: nil, + kind: :if + ) + body = TRuby::IR::Block.new( + statements: [ + conditional, + TRuby::IR::Literal.new(value: "no", literal_type: :string), + ] + ) + method = TRuby::IR::MethodDef.new( + name: "test", + params: [], + return_type: nil, + body: body + ) + + # Should include both String from return and String from implicit return + expect(inferrer.infer_method_return_type(method)).to eq("String") + end + end end diff --git a/spec/t_ruby/body_parser_spec.rb b/spec/t_ruby/body_parser_spec.rb index dbdb027..e18cce1 100644 --- a/spec/t_ruby/body_parser_spec.rb +++ b/spec/t_ruby/body_parser_spec.rb @@ -269,4 +269,73 @@ expect(result.statements.length).to eq(2) end end + + describe "conditional expressions" do + it "parses if/else conditional" do + lines = [ + " if x == 1", + " true", + " else", + " false", + " end", + ] + result = parser.parse(lines, 0, 5) + + expect(result.statements.length).to eq(1) + stmt = result.statements.first + expect(stmt).to be_a(TRuby::IR::Conditional) + expect(stmt.kind).to eq(:if) + expect(stmt.then_branch).to be_a(TRuby::IR::Block) + expect(stmt.else_branch).to be_a(TRuby::IR::Block) + end + + it "parses if without else" do + lines = [ + " if x == 1", + " true", + " end", + ] + result = parser.parse(lines, 0, 3) + + stmt = result.statements.first + expect(stmt).to be_a(TRuby::IR::Conditional) + expect(stmt.then_branch).to be_a(TRuby::IR::Block) + expect(stmt.else_branch).to be_nil + end + + it "parses unless conditional" do + lines = [ + " unless x.nil?", + " x", + " end", + ] + result = parser.parse(lines, 0, 3) + + stmt = result.statements.first + expect(stmt).to be_a(TRuby::IR::Conditional) + expect(stmt.kind).to eq(:unless) + end + + it "parses conditional returning nil or value" do + lines = [ + " if name == \"test\"", + " nil", + " else", + " name", + " end", + ] + result = parser.parse(lines, 0, 5) + + stmt = result.statements.first + expect(stmt).to be_a(TRuby::IR::Conditional) + + then_stmt = stmt.then_branch.statements.first + expect(then_stmt).to be_a(TRuby::IR::Literal) + expect(then_stmt.literal_type).to eq(:nil) + + else_stmt = stmt.else_branch.statements.first + expect(else_stmt).to be_a(TRuby::IR::VariableRef) + expect(else_stmt.name).to eq("name") + end + end end diff --git a/spec/t_ruby/parser_spec.rb b/spec/t_ruby/parser_spec.rb index 058df94..bce42ad 100644 --- a/spec/t_ruby/parser_spec.rb +++ b/spec/t_ruby/parser_spec.rb @@ -170,6 +170,56 @@ def add(a: Integer, b: Integer): Integer expect(result).to be_a(Hash) end end + + context "with non-ASCII (Unicode) method names" do + it "parses method names with Korean characters" do + source = "def 안녕하세요(name: String): String\n name\nend" + parser = TRuby::Parser.new(source) + + result = parser.parse + expect(result[:type]).to eq(:success) + expect(result[:functions].length).to eq(1) + expect(result[:functions][0][:name]).to eq("안녕하세요") + expect(result[:functions][0][:params][0][:type]).to eq("String") + expect(result[:functions][0][:return_type]).to eq("String") + end + + it "parses method names with mixed ASCII and Unicode characters" do + source = "def 비_영어_함수명___테스트1!(name: String)\n name\nend" + parser = TRuby::Parser.new(source) + + result = parser.parse + expect(result[:type]).to eq(:success) + expect(result[:functions].length).to eq(1) + expect(result[:functions][0][:name]).to eq("비_영어_함수명___테스트1!") + end + + it "parses method names with Japanese characters" do + source = "def こんにちは(): String\n 'hello'\nend" + parser = TRuby::Parser.new(source) + + result = parser.parse + expect(result[:type]).to eq(:success) + expect(result[:functions][0][:name]).to eq("こんにちは") + end + + it "parses class methods with Unicode names" do + source = <<~RUBY + class HelloWorld + def 인사하기(name: String): String + "Hello, \#{name}!" + end + end + RUBY + parser = TRuby::Parser.new(source) + + result = parser.parse + expect(result[:type]).to eq(:success) + expect(result[:classes].length).to eq(1) + expect(result[:classes][0][:methods].length).to eq(1) + expect(result[:classes][0][:methods][0][:name]).to eq("인사하기") + end + end end describe "parsing namespaced interfaces" do