Skip to content

Commit 6180908

Browse files
authored
GH-48295: [Ruby] Add support for reading Int8 array (#48296)
### Rationale for this change We already have UInt8 array support. So Int8 array support is easier than others. ### What changes are included in this PR? * Add `ArrowFormat::UInt8Array` * Add `ArrowFormat::Array` * Add support for parsing validity bitmap ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48295 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 743050d commit 6180908

File tree

4 files changed

+88
-14
lines changed

4 files changed

+88
-14
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,55 @@
1515
# under the License.
1616

1717
module ArrowFormat
18-
class UInt8Array
18+
class Array
19+
attr_reader :type
1920
attr_reader :size
20-
def initialize(size, validity_buffer, values_buffer)
21+
alias_method :length, :size
22+
def initialize(type, size, validity_buffer, values_buffer)
23+
@type = type
2124
@size = size
2225
@validity_buffer = validity_buffer
2326
@values_buffer = values_buffer
2427
end
2528

29+
def valid?(i)
30+
return true if @validity_buffer.nil?
31+
(@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
32+
end
33+
34+
def null?(i)
35+
not valid?(i)
36+
end
37+
38+
private
39+
def apply_validity(array)
40+
return array if @validity_buffer.nil?
41+
n_bytes = @size / 8
42+
@validity_buffer.each(:U8, 0, n_bytes) do |offset, value|
43+
7.times do |i|
44+
array[offset * 8 + i] = nil if (value & (1 << (i % 8))).zero?
45+
end
46+
end
47+
remained_bits = @size % 8
48+
unless remained_bits.zero?
49+
value = @validity_buffer.get_value(:U8, n_bytes)
50+
remained_bits.times do |i|
51+
array[n_bytes * 8 + i] = nil if (value & (1 << (i % 8))).zero?
52+
end
53+
end
54+
array
55+
end
56+
end
57+
58+
class Int8Array < Array
59+
def to_a
60+
apply_validity(@values_buffer.values(:S8, 0, @size))
61+
end
62+
end
63+
64+
class UInt8Array < Array
2665
def to_a
27-
# TODO: Check @validity_buffer
28-
@values_buffer.values(:U8, 0, @size)
66+
apply_validity(@values_buffer.values(:U8, 0, @size))
2967
end
3068
end
3169
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ def read_schema(fb_schema)
132132
case fb_type.bit_width
133133
when 8
134134
if fb_type.signed?
135-
type = Int8Type.new
135+
type = Int8Type.singleton
136136
else
137-
type = UInt8Type.new
137+
type = UInt8Type.singleton
138138
end
139139
end
140140
end
@@ -145,7 +145,8 @@ def read_schema(fb_schema)
145145

146146
def read_column(field, n_rows, buffers, body)
147147
case field.type
148-
when UInt8Type
148+
when Int8Type,
149+
UInt8Type
149150
validity_buffer = buffers.shift
150151
if validity_buffer.length.zero?
151152
validity = nil
@@ -155,7 +156,7 @@ def read_column(field, n_rows, buffers, body)
155156

156157
values_buffer = buffers.shift
157158
values = body.slice(values_buffer.offset, values_buffer.length)
158-
UInt8Array.new(n_rows, validity, values)
159+
field.type.build_array(n_rows, validity, values)
159160
end
160161
end
161162
end

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,34 @@ def initialize(name, bit_width, signed)
3333
end
3434

3535
class Int8Type < IntType
36+
class << self
37+
def singleton
38+
@singleton ||= new
39+
end
40+
end
41+
3642
def initialize
3743
super("Int8", 8, true)
3844
end
45+
46+
def build_array(size, validity_buffer, values_buffer)
47+
Int8Array.new(self, size, validity_buffer, values_buffer)
48+
end
3949
end
4050

4151
class UInt8Type < IntType
52+
class << self
53+
def singleton
54+
@singleton ||= new
55+
end
56+
end
57+
4258
def initialize
4359
super("UInt8", 8, false)
4460
end
61+
62+
def build_array(size, validity_buffer, values_buffer)
63+
UInt8Array.new(self, size, validity_buffer, values_buffer)
64+
end
4565
end
4666
end

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
class TestFileReader < Test::Unit::TestCase
1919
def setup
2020
Dir.mktmpdir do |tmp_dir|
21-
table = Arrow::Table.new(uint8: Arrow::UInt8Array.new([1, 2, 3]))
21+
table = Arrow::Table.new(value: build_array)
2222
@path = File.join(tmp_dir, "data.arrow")
2323
table.save(@path)
2424
File.open(@path, "rb") do |input|
@@ -40,10 +40,25 @@ def read
4040
end
4141
end
4242

43-
def test_uint8
44-
assert_equal([
45-
{"uint8" => [1, 2, 3]},
46-
],
47-
read)
43+
sub_test_case("Int8") do
44+
def build_array
45+
Arrow::Int8Array.new([-128, nil, 127])
46+
end
47+
48+
def test_read
49+
assert_equal([{"value" => [-128, nil, 127]}],
50+
read)
51+
end
52+
end
53+
54+
sub_test_case("UInt8") do
55+
def build_array
56+
Arrow::UInt8Array.new([0, nil, 255])
57+
end
58+
59+
def test_uint8
60+
assert_equal([{"value" => [0, nil, 255]}],
61+
read)
62+
end
4863
end
4964
end

0 commit comments

Comments
 (0)