diff --git a/lib/attributed_string/attachment.rb b/lib/attributed_string/attachment.rb index a12c0ba..a5b3d19 100644 --- a/lib/attributed_string/attachment.rb +++ b/lib/attributed_string/attachment.rb @@ -53,14 +53,13 @@ def attachments(range: 0...self.length) attachments_with_positions(range: range).map { |attachment| attachment[:attachment] } end - # TODO: needs a test def attachments_with_positions(range: 0...self.length) range = normalize_range(range) return [] if range.nil? - attachments = [] - self.chars[range].map.with_index do |char, i| - attachments << { attachment: attachment_at(range.begin + i), position: range.begin + i } if char == ATTACHMENT_CHARACTER + @store.each_with_object([]) do |entry, arr| + if entry[:attachment] && range.include?(entry[:range].begin) + arr << { attachment: entry[:attachment], position: entry[:range].begin } + end end - attachments end end diff --git a/lib/attributed_string/filter_result.rb b/lib/attributed_string/filter_result.rb index 11e715a..fdd6b61 100644 --- a/lib/attributed_string/filter_result.rb +++ b/lib/attributed_string/filter_result.rb @@ -16,52 +16,31 @@ def filter(&block) class FilterResult < String # @see AttributedString#filter def initialize(attr_string, &block) - filtered_positions = [] + result_parts = [] + ranges = [] cached_block_calls = {} - # TODO: this can be optimized to use the same method that inspect uses which doesn't go through - # every character (it goes through each substring span with different ranges) - # A presenter type architecture that inspect, rainbow print, and filter can share would be ideal - attr_string.each_char.with_index do |char, index| - attrs = attr_string.attrs_at(index) - # Use the attrs object ID as the cache key to handle different attribute hashes + attr_string.each_span_with_attrs do |substring, attrs, range| cache_key = attrs.hash - cached_result = cached_block_calls.fetch(cache_key) do - result = block.call(attrs) - cached_block_calls[cache_key] = result - result - end - if cached_result - filtered_positions << index + keep = cached_block_calls.fetch(cache_key) do + res = block.call(attrs) + cached_block_calls[cache_key] = res + res end - end + next unless keep - # Group adjacent positions into ranges to minimize allocations - ranges = [] - unless filtered_positions.empty? - start_pos = filtered_positions.first - prev_pos = start_pos - filtered_positions.each_with_index do |pos, idx| - next if idx == 0 - if pos == prev_pos + 1 - # Continue the current range - prev_pos = pos - else - # End the current range and start a new one - ranges << (start_pos..prev_pos) - start_pos = pos - prev_pos = pos - end + result_parts << substring + + if ranges.any? && ranges.last.end == range.begin - 1 + last = ranges.pop + ranges << (last.begin..range.end) + else + ranges << range end - # Add the final range - ranges << (start_pos..prev_pos) end - # Concatenate substrings from the original string based on the ranges - result_string = ranges.map { |range| attr_string.send(:original_slice,range) }.join - - # Build the list of original positions - original_positions = ranges.flat_map { |range| range.to_a } + result_string = result_parts.join + original_positions = ranges.flat_map { |r| r.to_a } super(result_string) @original_positions = original_positions @@ -73,15 +52,16 @@ def original_position_at(index) end def original_ranges_for(filtered_range) - # TODO: this doesn't work for excluded end range raise ArgumentError, "Invalid range" unless filtered_range.is_a?(Range) - raise ArgumentError, "Range out of bounds" if filtered_range.end >= length - if filtered_range.begin > filtered_range.end + + end_idx = filtered_range.end + end_idx -= 1 if filtered_range.exclude_end? + raise ArgumentError, "Range out of bounds" if end_idx >= length + + if filtered_range.begin > end_idx + return [] if filtered_range.begin == end_idx + 1 && filtered_range.exclude_end? raise ArgumentError, "Reverse range is not allowed" end - if filtered_range.begin == filtered_range.end && filtered_range.exclude_end? - return [] - end original_positions = @original_positions[filtered_range] ranges = [] diff --git a/lib/attributed_string/inspect.rb b/lib/attributed_string/inspect.rb index 8dd9d31..918ab70 100644 --- a/lib/attributed_string/inspect.rb +++ b/lib/attributed_string/inspect.rb @@ -9,87 +9,58 @@ class AttributedString # "and these have none").inspect # def inspect(color: false) - # Collect all positions where attributes change - positions = Set.new - - @store.each do |attr| - range = attr[:range] - positions << range.begin - positions << range.begin + range.size + attachments_map = attachments_with_positions(range: 0...length).each_with_object({}) do |entry, h| + h[entry[:position]] = entry[:attachment] end - # Include the start and end positions of the string - positions << 0 - positions << self.length - - # Sort all positions - positions = positions.to_a.sort - result = "" - last_attrs = {} # Initialize as empty hash - - positions.each_cons(2) do |start_pos, end_pos| - next if start_pos >= end_pos # Skip invalid ranges - - substring = self.to_s[start_pos...end_pos] - attrs_before = last_attrs - attachment = attachment_at(start_pos) - attrs_after = attrs_at(start_pos) + last_attrs = {} - # Determine attribute changes + each_span_with_attrs.with_index do |(substring, attrs, range), span_idx| ended_attrs = {} started_attrs = {} - # Attributes that have ended or changed - attrs_before.each do |key, value| - if !attrs_after.key?(key) - # Attribute has ended + last_attrs.each do |key, value| + if !attrs.key?(key) ended_attrs[key] = value - elsif attrs_after[key] != value - # Attribute value has changed; treat as ending old and starting new + elsif attrs[key] != value ended_attrs[key] = value - started_attrs[key] = attrs_after[key] + started_attrs[key] = attrs[key] end end - # Attributes that have started - attrs_after.each do |key, value| - if !attrs_before.key?(key) - started_attrs[key] = value - end + attrs.each do |key, value| + started_attrs[key] = value unless last_attrs.key?(key) end - # Remove attributes that both ended and started (value change) ended_attrs.delete_if { |k, _| started_attrs.key?(k) } unless ended_attrs.empty? && started_attrs.empty? - attrs_str = ended_attrs.keys.sort.map{ |k| "-#{k}" } - attrs_str += started_attrs.to_a.sort{ |a,b| a[0] <=> b[0] }.map{ |a| "#{a[0]}: #{a[1]}" } - attrs_str = "{ #{attrs_str.join(', ')} }" - result += dim(attrs_str, color: color) + attrs_str = ended_attrs.keys.sort.map { |k| "-#{k}" } + attrs_str += started_attrs.sort_by { |a, _| a }.map { |k, v| "#{k}: #{v}" } + result << dim("{ #{attrs_str.join(', ')} }", color: color) end - if attachment - substring = dim("[#{attachment}]", color: color) + substring.chars.each_with_index do |char, i| + pos = range.begin + i + if char.to_s == ATTACHMENT_CHARACTER && attachments_map.key?(pos) + result << dim("[#{attachments_map[pos]}]", color: color) + else + result << char + end end - # Append the substring - result += substring - - last_attrs = attrs_after + last_attrs = attrs end - # Close any remaining attributes unless last_attrs.empty? - result += dim("{ #{last_attrs.keys.sort.map{ |k| "-#{k}" }.join(", ")} }", color: color) + result << dim("{ #{last_attrs.keys.sort.map { |k| "-#{k}" }.join(', ')} }", color: color) end result end - def dim(string, color: true) color ? "\e[2m#{string}\e[22m" : string end - end diff --git a/lib/attributed_string/klass.rb b/lib/attributed_string/klass.rb index 0087468..8220cdf 100644 --- a/lib/attributed_string/klass.rb +++ b/lib/attributed_string/klass.rb @@ -1,3 +1,5 @@ +require 'set' + class AttributedString < String alias_method :original_slice, :slice @@ -107,6 +109,119 @@ def ==(other) (0...length).all? { |i| attrs_at(i) == other.attrs_at(i) && attachment_at(i) == other.attachment_at(i) } && super end + # Iterates over contiguous spans of the string that share the same active + # attributes. Yields the substring, the attributes for that span, and the + # span range. This avoids yielding once per character and instead jumps + # directly to points where attributes change, allowing callers to operate in + # O(n + k) time where `n` is the number of attribute ranges and `k` is the + # resulting span count. + def each_span_with_attrs + return enum_for(:each_span_with_attrs) unless block_given? + + starts = Hash.new { |h, k| h[k] = [] } + ends = Hash.new { |h, k| h[k] = [] } + + @store.each do |entry| + next unless entry[:range] + next if entry[:attachment] # attachments do not affect attrs + range = entry[:range] + starts[range.begin] << entry + end_point = range.end + 1 + ends[end_point] << entry if end_point <= length + end + + event_positions = Set.new([0, length]) + event_positions.merge(starts.keys) + event_positions.merge(ends.keys) + events = event_positions.to_a.sort + + stacks = Hash.new { |h, k| h[k] = [] } + attrs = {} + + compute_value = lambda do |key| + stack = stacks[key] + array_mode = stack.any? { |_, v| v.is_a?(Array) } + if array_mode + combined = [] + stack.each { |_, v| combined.concat(v) unless v == :__deleted__ } + if combined.empty? + attrs.delete(key) + else + attrs[key] = combined + end + else + val = nil + stack.reverse_each do |_, v| + next if v == :__deleted__ + val = v + break + end + if val.nil? + attrs.delete(key) + else + attrs[key] = val + end + end + end + + remove_stack_entry = lambda do |key, entry| + stack = stacks[key] + idx = stack&.index { |pair| pair[0].equal?(entry) } + return unless idx + stack.delete_at(idx) + if stack.empty? + stacks.delete(key) + attrs.delete(key) + else + compute_value.call(key) + end + end + + events.each_cons(2) do |start_idx, end_idx| + if starts.key?(start_idx) + starts[start_idx].each do |entry| + if entry[:attributes] + entry[:attributes].each do |k, v| + stacks[k] << [entry, v] + attrs[k] = v + end + elsif entry[:delete] + entry[:delete].each do |k| + stacks[k] << [entry, :__deleted__] + attrs.delete(k) + end + elsif entry[:arr_attributes] + entry[:arr_attributes].each do |k, v| + stacks[k] << [entry, Array(v)] + compute_value.call(k) + end + end + end + end + + substring = self[start_idx...end_idx] + yield substring, attrs.dup, start_idx..(end_idx - 1) + + if ends.key?(end_idx) + ends[end_idx].each do |entry| + if entry[:attributes] + entry[:attributes].each_key do |k| + remove_stack_entry.call(k, entry) + end + elsif entry[:delete] + entry[:delete].each do |k| + remove_stack_entry.call(k, entry) + end + elsif entry[:arr_attributes] + entry[:arr_attributes].each_key do |k| + remove_stack_entry.call(k, entry) + end + end + end + end + end + end + diff --git a/test/attachment_test.rb b/test/attachment_test.rb index 09a6f70..0a7b972 100644 --- a/test/attachment_test.rb +++ b/test/attachment_test.rb @@ -70,4 +70,22 @@ def test_range_movements assert_equal "attachment", str.attachment_at(2) assert_nil str.attachment_at(3) end + + def test_attachments_with_positions_entire_string + @str.add_attachment("second", position: 3) + results = @str.attachments_with_positions + assert_equal [{ attachment: "attachment", position: 1 }, + { attachment: "second", position: 3 }], results + end + + def test_attachments_with_positions_in_range + @str.add_attachment("second", position: 3) + results = @str.attachments_with_positions(range: 3..3) + assert_equal [{ attachment: "second", position: 3 }], results + end + + def test_attachments_with_positions_invalid_range + results = @str.attachments_with_positions(range: 100..101) + assert_equal [], results + end end diff --git a/test/benchmark_performance_test.rb b/test/benchmark_performance_test.rb new file mode 100644 index 0000000..3418f9a --- /dev/null +++ b/test/benchmark_performance_test.rb @@ -0,0 +1,68 @@ +require 'test_helper' +require 'minitest/benchmark' + +# Benchmarks for common AttributedString operations. These tests help +# identify operations that scale worse than linearly as the string or +# attribute count grows. +class BenchmarkPerformanceTest < Minitest::Benchmark + # Use a modest range so the test suite stays fast while still + # demonstrating growth trends. + def self.bench_range + bench_exp(10, 1_000, 10) + end + + # Benchmark adding a single attribute over the entire string. + # The operation should run in roughly constant time with respect to string + # length because only the attribute list grows by one entry. + def bench_add_attrs_whole_string + assert_performance_constant do |n| + str = AttributedString.new('a' * n) + str.add_attrs(0...n, bold: true) + end + end + + # Benchmark looking up an attribute when many single-character ranges exist. + def bench_attrs_at_many_ranges + assert_performance_linear 0.90 do |n| + str = AttributedString.new('a' * n) + n.times { |i| str.add_attrs(i..i, index: i) } + str.attrs_at(n / 2) + end + end + + # Benchmark inserting text in the middle of a heavily attributed string. + def bench_insert_middle + assert_performance_linear 0.90 do |n| + str = AttributedString.new('a' * n) + n.times { |i| str.add_attrs(i..i, index: i) } + str.insert(n / 2, 'b') + end + end + + # Benchmark iterating over spans with many small attribute ranges. + def bench_each_span_with_attrs + assert_performance_linear 0.90 do |n| + str = AttributedString.new('a' * n) + n.times { |i| str.add_attrs(i..i, index: i) } + str.each_span_with_attrs { |_sub, _attrs, _range| } + end + end + + # Benchmark filtering to build a new AttributedString when densely attributed. + def bench_filter_dense + assert_performance_linear 0.90 do |n| + str = AttributedString.new('a' * n) + n.times { |i| str.add_attrs(i..i, index: i) } + str.filter { |attrs| attrs[:index].even? } + end + end + + # Benchmark inspect on a heavily attributed string. + def bench_inspect_dense + assert_performance_linear 0.90 do |n| + str = AttributedString.new('a' * n) + n.times { |i| str.add_attrs(i..i, index: i) } + str.inspect + end + end +end diff --git a/test/each_span_with_attrs_test.rb b/test/each_span_with_attrs_test.rb new file mode 100644 index 0000000..8c9f067 --- /dev/null +++ b/test/each_span_with_attrs_test.rb @@ -0,0 +1,55 @@ +require 'test_helper' + +class EachSpanWithAttrsTest < Minitest::Test + def test_yields_spans_and_attributes + str = AttributedString.new('Hello') + str.add_attrs(0..1, bold: true) + str.add_attrs(3..4, italic: true) + + spans = str.each_span_with_attrs.to_a + assert_equal [ + ['He', { bold: true }, 0..1], + ['l', {}, 2..2], + ['lo', { italic: true }, 3..4] + ], spans + end + + def test_attribute_removal_creates_new_span + str = AttributedString.new('abc') + str.add_attrs(0..2, bold: true) + str.remove_attrs(1..1, :bold) + + spans = str.each_span_with_attrs.to_a + assert_equal [ + ['a', { bold: true }, 0..0], + ['b', {}, 1..1], + ['c', { bold: true }, 2..2] + ], spans + end + + def test_array_attributes_accumulate + str = AttributedString.new('abc') + str.add_arr_attrs(0..1, tag: :x) + str.add_arr_attrs(1..2, tag: :y) + + spans = str.each_span_with_attrs.to_a + assert_equal [ + ['a', { tag: [:x] }, 0..0], + ['b', { tag: [:x, :y] }, 1..1], + ['c', { tag: [:y] }, 2..2] + ], spans + end + + def test_attachments_do_not_create_new_spans + str = AttributedString.new('ab') + str.add_attrs(0..1, bold: true) + str.add_attachment('img', position: 1) + + spans = str.each_span_with_attrs.to_a + assert_equal [ + ['a', { bold: true }, 0..0], + [AttributedString::ATTACHMENT_CHARACTER, {}, 1..1], + ['b', { bold: true }, 2..2] + ], spans + end +end