Skip to content

Commit

Permalink
Change comment directive parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
tompng committed Jan 31, 2025
1 parent 80a146b commit 5d04e3d
Show file tree
Hide file tree
Showing 12 changed files with 622 additions and 208 deletions.
177 changes: 169 additions & 8 deletions lib/rdoc/comment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ def normalize
self
end

# Change normalized, when creating already normalized comment.

def normalized=(value)
@normalized = value
end

##
# Was this text normalized?

Expand Down Expand Up @@ -226,14 +232,169 @@ def tomdoc?
@format == 'tomdoc'
end

##
# Create a new parsed comment from a document
MULTILINE_DIRECTIVES = %w[call-seq].freeze # :nodoc:

def self.from_document(document) # :nodoc:
comment = RDoc::Comment.new('')
comment.document = document
comment.location = RDoc::TopLevel.new(document.file) if document.file
comment
end
# There are more, but already handled by RDoc::Parser::C
COLON_LESS_DIRECTIVES = %w[call-seq Document-method].freeze # :nodoc:

private_constant :MULTILINE_DIRECTIVES, :COLON_LESS_DIRECTIVES

class << self

##
# Create a new parsed comment from a document

def from_document(document) # :nodoc:
comment = RDoc::Comment.new('')
comment.document = document
comment.location = RDoc::TopLevel.new(document.file) if document.file
comment
end

# Parse comment, collect directives as an attribute and return [normalized_comment_text, directives_hash]
# This method expands include and removes everything not needed in the document text, such as
# private section, directive line, comment characters `# /* * */` and indent spaces.
#
# RDoc comment consists of include, directive, multiline directive, private section and comment text.
#
# Include
# # :include: filename
#
# Directive
# # :directive-without-value:
# # :directive-with-value: value
#
# Multiline directive (only :call-seq:)
# # :multiline-directive:
# # value1
# # value2
#
# Private section
# #--
# # private comment
# #++

def parse(text, filename, line_no, type)
case type
when :ruby
text = text.gsub(/^#+/, '') if text.start_with?('#')
private_start_regexp = /^-{2,}$/
private_end_regexp = /^\+{2}$/
indent_regexp = /^\s*/
when :c
private_start_regexp = /^(\s*\*)?-{2,}$/
private_end_regexp = /^(\s*\*)?\+{2}$/
indent_regexp = /^\s*(\/\*+|\*)?\s*/
text = text.gsub(/\s*\*+\/\s*\z/, '')
# TODO: should not be here. Looks like another type of directive
# text = text.gsub %r%Document-method:\s+[\w:.#=!?|^&<>~+\-/*\%@`\[\]]+%, ''
when :simple
# Unlike other types, this implementation only looks for two dashes at
# the beginning of the line. Three or more dashes are considered to be
# a rule and ignored.
private_start_regexp = /^-{2}$/
private_end_regexp = /^\+{2}$/
indent_regexp = /^\s*/
end

directives = {}
lines = text.split("\n")
in_private = false
comment_lines = []
until lines.empty?
line = lines.shift
read_lines = 1
if in_private
in_private = false if line.match?(private_end_regexp)
line_no += read_lines
next
elsif line.match?(private_start_regexp)
in_private = true
line_no += read_lines
next
end

prefix = line[indent_regexp]
prefix_indent = ' ' * prefix.size
line = line.byteslice(prefix.bytesize..)
/\A(?<colon>\\?:|:?)(?<directive>[\w-]+):(?<param>.*)/ =~ line

if colon == '\\:'
# unescape if escaped
comment_lines << prefix_indent + line.sub('\\:', ':')
elsif !directive || param.start_with?(':') || (colon.empty? && !COLON_LESS_DIRECTIVES.include?(directive))
# Something like `:toto::` is not a directive
# Only few directives allows to start without a colon
comment_lines << prefix_indent + line
elsif directive == 'include'
filename_to_include = param.strip
yield(filename_to_include, prefix_indent).lines.each { |l| comment_lines << l.chomp }
elsif MULTILINE_DIRECTIVES.include?(directive)
param = param.strip
value_lines = take_multiline_directive_value_lines(directive, filename, line_no, lines, prefix_indent.size, indent_regexp, !param.empty?)
read_lines += value_lines.size
lines.shift(value_lines.size)
unless param.empty?
# Accept `:call-seq: first-line\n second-line` for now
value_lines.unshift(param)
end
value = value_lines.join("\n")
directives[directive] = [value.empty? ? nil : value, line_no]
else
value = param.strip
directives[directive] = [value.empty? ? nil : value, line_no]
end
line_no += read_lines
end
# normalize comment
min_spaces = nil
comment_lines.each do |l|
next if l.match?(/\A\s*\z/)
n = l[/\A */].size
min_spaces = n if !min_spaces || n < min_spaces
end
comment_lines.map! { |l| l[min_spaces..] || '' } if min_spaces
comment_lines.shift while comment_lines.first&.match?(/\A\s*\z/)
[String.new(encoding: text.encoding) << comment_lines.join("\n"), directives]
end

# Take value lines of multiline directive

private def take_multiline_directive_value_lines(directive, filename, line_no, lines, base_indent_size, indent_regexp, has_param)
return [] if lines.empty?

first_indent_size = lines.first[indent_regexp].size

# Blank line or unindented line is not part of multiline-directive value
return [] if first_indent_size <= base_indent_size

if has_param
# :multiline-directive: line1
# line2
# line3
#
value_lines = lines.take_while do |l|
l.rstrip[indent_regexp].size > base_indent_size
end
min_indent = value_lines.map { |l| l[indent_regexp].size }.min
value_lines.map { |l| l[min_indent..] }
else
# Take indented lines accepting blank lines between them
value_lines = lines.take_while do |l|
l = l.rstrip
indent = l[indent_regexp]
if indent == l || indent.size >= first_indent_size
true
end
end
value_lines.map! { |l| (l[first_indent_size..] || '').chomp }

if value_lines.size != lines.size && !value_lines.last.empty?
warn "#{filename}:#{line_no} Multiline directive :#{directive}: should end with a blank line."
end
value_lines.pop while value_lines.last&.empty?
value_lines
end
end
end
end
45 changes: 35 additions & 10 deletions lib/rdoc/markup/pre_process.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,15 @@ def initialize(input_file_name, include_path)
# RDoc::CodeObject#metadata for details.

def handle text, code_object = nil, &block
first_line = 1
if RDoc::Comment === text then
comment = text
text = text.text
first_line = comment.line || 1
end

# regexp helper (square brackets for optional)
# $1 $2 $3 $4 $5
# [prefix][\]:directive:[spaces][param]newline
text = text.lines.map.with_index(first_line) do |line, num|
next line unless line =~ /\A([ \t]*(?:#|\/?\*)?[ \t]*)(\\?):([\w-]+):([ \t]*)(.+)?(\r?\n|$)/
text = text.gsub(/^([ \t]*(?:#|\/?\*)?[ \t]*)(\\?):([\w-]+):([ \t]*)(.+)?(\r?\n|$)/) do
# skip something like ':toto::'
next $& if $4.empty? and $5 and $5[0, 1] == ':'

Expand All @@ -122,21 +119,49 @@ def handle text, code_object = nil, &block
comment.format = $5.downcase
next "#{$1.strip}\n"
end

handle_directive $1, $3, $5, code_object, text.encoding, num, &block
end.join
handle_directive $1, $3, $5, code_object, text.encoding, &block
end

if comment then
comment.text = text
else
comment = text
end

run_post_processes(comment, code_object)

text
end

# Apply directives to a code object

def run_pre_processes(comment_text, code_object, start_line_no, type)
comment_text, directives = parse_comment(comment_text, start_line_no, type)
directives.each do |directive, (param, line_no)|
handle_directive('', directive, param, code_object)
end
if code_object.is_a?(RDoc::AnyMethod) && (call_seq, = directives['call-seq']) && call_seq
code_object.call_seq = call_seq.lines.map(&:chomp).reject(&:empty?).join("\n") if call_seq
end
format, = directives['markup']
[comment_text, format]
end


# Perform post preocesses to a code object

def run_post_processes(comment, code_object)
self.class.post_processors.each do |handler|
handler.call comment, code_object
end
end

text
# Parse comment and return [normalized_comment_text, directives_hash]

def parse_comment(text, line_no, type)
RDoc::Comment.parse(text, @input_file_name, line_no, type) do |filename, prefix_indent|
include_file(filename, prefix_indent, text.encoding)
end
end

##
Expand All @@ -151,7 +176,7 @@ def handle text, code_object = nil, &block
# When 1.8.7 support is ditched prefix can be defaulted to ''

def handle_directive prefix, directive, param, code_object = nil,
encoding = nil, line = nil
encoding = nil
blankline = "#{prefix.strip}\n"
directive = directive.downcase

Expand Down Expand Up @@ -244,7 +269,7 @@ def handle_directive prefix, directive, param, code_object = nil,

blankline
else
result = yield directive, param, line if block_given?
result = yield directive, param if block_given?

case result
when nil then
Expand Down
46 changes: 6 additions & 40 deletions lib/rdoc/parser/c.rb
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,6 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false
body = args[1]
offset, = args[2]

comment.remove_private if comment

# try to find the whole body
body = $& if /#{Regexp.escape body}[^(]*?\{.*?^\}/m =~ file_content

Expand All @@ -623,7 +621,6 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false
override_comment = find_override_comment class_name, meth_obj
comment = override_comment if override_comment

comment.normalize
find_modifiers comment, meth_obj if comment

#meth_obj.params = params
Expand All @@ -641,7 +638,6 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false

find_body class_name, args[3], meth_obj, file_content, true

comment.normalize
find_modifiers comment, meth_obj

meth_obj.start_collecting_tokens
Expand All @@ -665,7 +661,6 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false
comment = find_override_comment class_name, meth_obj

if comment then
comment.normalize
find_modifiers comment, meth_obj
meth_obj.comment = comment

Expand Down Expand Up @@ -744,7 +739,6 @@ def find_class_comment class_name, class_mod
end

comment = new_comment comment, @top_level, :c
comment.normalize

look_for_directives_in class_mod, comment

Expand Down Expand Up @@ -809,9 +803,6 @@ def find_const_comment(type, const_name, class_name = nil)
# Handles modifiers in +comment+ and updates +meth_obj+ as appropriate.

def find_modifiers comment, meth_obj
comment.normalize
comment.extract_call_seq meth_obj

look_for_directives_in meth_obj, comment
end

Expand All @@ -825,10 +816,10 @@ def find_override_comment class_name, meth_obj
comment = if @content =~ %r%Document-method:
\s+#{class_name}#{prefix}#{name}
\s*?\n((?>.*?\*/))%xm then
"/*#{$1}"
"/*\n#{$1}"
elsif @content =~ %r%Document-method:
\s#{name}\s*?\n((?>.*?\*/))%xm then
"/*#{$1}"
"/*\n#{$1}"
end

return unless comment
Expand Down Expand Up @@ -1105,35 +1096,10 @@ def load_variable_map map_name
# Both :main: and :title: directives are deprecated and will be removed in RDoc 7.

def look_for_directives_in context, comment
@preprocess.handle comment, context do |directive, param|
case directive
when 'main' then
@options.main_page = param

warn <<~MSG
The :main: directive is deprecated and will be removed in RDoc 7.
You can use these options to specify the initial page displayed instead:
- `--main=#{param}` via the command line
- `rdoc.main = "#{param}"` if you use `RDoc::Task`
- `main_page: #{param}` in your `.rdoc_options` file
MSG
''
when 'title' then
@options.default_title = param if @options.respond_to? :default_title=

warn <<~MSG
The :title: directive is deprecated and will be removed in RDoc 7.
You can use these options to specify the title displayed instead:
- `--title=#{param}` via the command line
- `rdoc.title = "#{param}"` if you use `RDoc::Task`
- `title: #{param}` in your `.rdoc_options` file
MSG
''
end
end

comment.text, format = @preprocess.run_pre_processes(comment.text, context, comment.line || 1, :c)
comment.format = format if format
@preprocess.run_post_processes(comment, context)
comment.normalized = true
comment
end

Expand Down
Loading

0 comments on commit 5d04e3d

Please sign in to comment.