Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit cffcc10

Browse files
author
Tim Nguyen
committedJan 25, 2025·
Fix all tests
1 parent 9bce999 commit cffcc10

File tree

5 files changed

+270
-77
lines changed

5 files changed

+270
-77
lines changed
 

‎cpp/src/arrow/compute/kernels/vector_selection.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
308308
AddKernels(NumericTypes());
309309
AddKernels({boolean()});
310310

311-
for (const auto& ty : {Type::DECIMAL32, Type::DECIMAL64, Type::DECIMAL128, Type::DECIMAL256}) {
311+
for (const auto& ty :
312+
{Type::DECIMAL32, Type::DECIMAL64, Type::DECIMAL128, Type::DECIMAL256}) {
312313
kernel.signature = KernelSignature::Make({ty}, uint64());
313314
DCHECK_OK(func->AddKernel(kernel));
314315
}

‎cpp/src/parquet/arrow/arrow_reader_writer_test.cc

+110-60
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,14 @@ std::shared_ptr<const LogicalType> get_logical_type(const DataType& type) {
181181
static_cast<const ::arrow::DictionaryType&>(type);
182182
return get_logical_type(*dict_type.value_type());
183183
}
184+
case ArrowId::DECIMAL32: {
185+
const auto& dec_type = static_cast<const ::arrow::Decimal32Type&>(type);
186+
return LogicalType::Decimal(dec_type.precision(), dec_type.scale());
187+
}
188+
case ArrowId::DECIMAL64: {
189+
const auto& dec_type = static_cast<const ::arrow::Decimal64Type&>(type);
190+
return LogicalType::Decimal(dec_type.precision(), dec_type.scale());
191+
}
184192
case ArrowId::DECIMAL128: {
185193
const auto& dec_type = static_cast<const ::arrow::Decimal128Type&>(type);
186194
return LogicalType::Decimal(dec_type.precision(), dec_type.scale());
@@ -206,9 +214,11 @@ ParquetType::type get_physical_type(const DataType& type) {
206214
case ArrowId::INT16:
207215
case ArrowId::UINT32:
208216
case ArrowId::INT32:
217+
case ArrowId::DECIMAL32:
209218
return ParquetType::INT32;
210219
case ArrowId::UINT64:
211220
case ArrowId::INT64:
221+
case ArrowId::DECIMAL64:
212222
return ParquetType::INT64;
213223
case ArrowId::FLOAT:
214224
return ParquetType::FLOAT;
@@ -533,6 +543,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
533543
case ::arrow::Type::HALF_FLOAT:
534544
byte_width = sizeof(::arrow::HalfFloatType::c_type);
535545
break;
546+
case ::arrow::Type::DECIMAL32:
547+
case ::arrow::Type::DECIMAL64:
536548
case ::arrow::Type::DECIMAL128:
537549
case ::arrow::Type::DECIMAL256: {
538550
const auto& decimal_type = static_cast<const DecimalType&>(values_type);
@@ -548,6 +560,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
548560
case ::arrow::Type::HALF_FLOAT:
549561
byte_width = sizeof(::arrow::HalfFloatType::c_type);
550562
break;
563+
case ::arrow::Type::DECIMAL32:
564+
case ::arrow::Type::DECIMAL64:
551565
case ::arrow::Type::DECIMAL128:
552566
case ::arrow::Type::DECIMAL256: {
553567
const auto& decimal_type = static_cast<const DecimalType&>(type);
@@ -783,34 +797,70 @@ class TestReadDecimals : public ParquetIOTestBase {
783797
// The Decimal roundtrip tests always go through the FixedLenByteArray path,
784798
// check the ByteArray case manually.
785799

786-
TEST_F(TestReadDecimals, Decimal128ByteArray) {
800+
TEST_F(TestReadDecimals, Decimal32ByteArray) {
787801
const std::vector<std::vector<uint8_t>> big_endian_decimals = {
788802
// 123456
789803
{1, 226, 64},
790804
// 987654
791805
{15, 18, 6},
792806
// -123456
793-
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 29, 192},
807+
{255, 254, 29, 192},
794808
};
795809

796810
auto expected =
797-
ArrayFromJSON(::arrow::decimal128(6, 3), R"(["123.456", "987.654", "-123.456"])");
811+
ArrayFromJSON(::arrow::decimal32(6, 3), R"(["123.456", "987.654", "-123.456"])");
798812
CheckReadFromByteArrays(LogicalType::Decimal(6, 3), big_endian_decimals, *expected);
799813
}
800814

815+
TEST_F(TestReadDecimals, Decimal64ByteArray) {
816+
const std::vector<std::vector<uint8_t>> big_endian_decimals = {
817+
// 123456
818+
{1, 226, 64},
819+
// 987654
820+
{15, 18, 6},
821+
// -123456
822+
{255, 254, 29, 192},
823+
// -123456
824+
{255, 255, 255, 255, 255, 254, 29, 192},
825+
};
826+
827+
auto expected = ArrayFromJSON(::arrow::decimal64(16, 3),
828+
R"(["123.456", "987.654", "-123.456", "-123.456"])");
829+
CheckReadFromByteArrays(LogicalType::Decimal(16, 3), big_endian_decimals, *expected);
830+
}
831+
832+
TEST_F(TestReadDecimals, Decimal128ByteArray) {
833+
const std::vector<std::vector<uint8_t>> big_endian_decimals = {
834+
// 123456
835+
{1, 226, 64},
836+
// 987654
837+
{15, 18, 6},
838+
// -123456
839+
{255, 254, 29, 192},
840+
// -123456
841+
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 29, 192},
842+
};
843+
844+
auto expected = ArrayFromJSON(::arrow::decimal128(20, 3),
845+
R"(["123.456", "987.654", "-123.456", "-123.456"])");
846+
CheckReadFromByteArrays(LogicalType::Decimal(20, 3), big_endian_decimals, *expected);
847+
}
848+
801849
TEST_F(TestReadDecimals, Decimal256ByteArray) {
802850
const std::vector<std::vector<uint8_t>> big_endian_decimals = {
803851
// 123456
804852
{1, 226, 64},
805853
// 987654
806854
{15, 18, 6},
807855
// -123456
856+
{255, 254, 29, 192},
857+
// -123456
808858
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
809859
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 29, 192},
810860
};
811861

812-
auto expected =
813-
ArrayFromJSON(::arrow::decimal256(40, 3), R"(["123.456", "987.654", "-123.456"])");
862+
auto expected = ArrayFromJSON(::arrow::decimal256(40, 3),
863+
R"(["123.456", "987.654", "-123.456", "-123.456"])");
814864
CheckReadFromByteArrays(LogicalType::Decimal(40, 3), big_endian_decimals, *expected);
815865
}
816866

@@ -858,9 +908,9 @@ typedef ::testing::Types<
858908
::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
859909
::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
860910
::arrow::BinaryType, ::arrow::FixedSizeBinaryType, ::arrow::HalfFloatType,
861-
Decimal128WithPrecisionAndScale<1>, Decimal128WithPrecisionAndScale<5>,
862-
Decimal128WithPrecisionAndScale<10>, Decimal128WithPrecisionAndScale<19>,
863-
Decimal128WithPrecisionAndScale<23>, Decimal128WithPrecisionAndScale<27>,
911+
Decimal32WithPrecisionAndScale<1>, Decimal32WithPrecisionAndScale<5>,
912+
Decimal64WithPrecisionAndScale<10>, Decimal64WithPrecisionAndScale<18>,
913+
Decimal128WithPrecisionAndScale<19>, Decimal128WithPrecisionAndScale<27>,
864914
Decimal128WithPrecisionAndScale<38>, Decimal256WithPrecisionAndScale<39>,
865915
Decimal256WithPrecisionAndScale<56>, Decimal256WithPrecisionAndScale<76>>
866916
TestTypes;
@@ -903,8 +953,9 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
903953
std::shared_ptr<Table> table = MakeSimpleTable(values, false);
904954

905955
this->ResetSink();
906-
ASSERT_OK_NO_THROW(WriteTable(*table, ::arrow::default_memory_pool(), this->sink_,
907-
values->length(), default_writer_properties()));
956+
ASSERT_OK_NO_THROW(WriteTable(
957+
*table, ::arrow::default_memory_pool(), this->sink_, values->length(),
958+
::parquet::WriterProperties::Builder().enable_store_decimal_as_integer()->build()));
908959

909960
std::shared_ptr<Table> out;
910961
std::unique_ptr<FileReader> reader;
@@ -2944,7 +2995,7 @@ TEST(ArrowReadWrite, Decimal256) {
29442995
using ::arrow::Decimal256;
29452996
using ::arrow::field;
29462997

2947-
auto type = ::arrow::decimal256(8, 4);
2998+
auto type = ::arrow::decimal256(48, 4);
29482999

29493000
const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678",
29503001
"-9999.9999", "9999.9999"])";
@@ -2958,7 +3009,7 @@ TEST(ArrowReadWrite, DecimalStats) {
29583009
using ::arrow::Decimal128;
29593010
using ::arrow::field;
29603011

2961-
auto type = ::arrow::decimal128(/*precision=*/8, /*scale=*/0);
3012+
auto type = ::arrow::decimal128(/*precision=*/28, /*scale=*/0);
29623013

29633014
const char* json = R"(["255", "128", null, "0", "1", "-127", "-128", "-129", "-255"])";
29643015
auto array = ::arrow::ArrayFromJSON(type, json);
@@ -3447,8 +3498,8 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptional) {
34473498
types.push_back(::arrow::duration(::arrow::TimeUnit::MILLI));
34483499
types.push_back(::arrow::duration(::arrow::TimeUnit::MICRO));
34493500
types.push_back(::arrow::duration(::arrow::TimeUnit::NANO));
3450-
types.push_back(::arrow::decimal128(3, 2));
3451-
types.push_back(::arrow::decimal256(3, 2));
3501+
types.push_back(::arrow::decimal32(3, 2));
3502+
types.push_back(::arrow::decimal128(23, 2));
34523503
types.push_back(::arrow::fixed_size_binary(4));
34533504
// Note large variants of types appear to get converted back to regular on read
34543505
types.push_back(::arrow::dictionary(::arrow::int32(), ::arrow::binary()));
@@ -3500,9 +3551,8 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
35003551
ByteArray("\x0f\x12\x06"), // 987654
35013552
};
35023553
const std::vector<int32_t> int32_values = {123456, 987654};
3503-
const std::vector<int64_t> int64_values = {123456, 987654};
35043554

3505-
const auto inner_type = ::arrow::decimal128(6, 3);
3555+
const auto inner_type = ::arrow::decimal32(6, 3);
35063556
auto inner_field = ::arrow::field("inner", inner_type, /*nullable=*/false);
35073557
auto type = ::arrow::struct_({inner_field});
35083558
auto field = ::arrow::field("outer", type, /*nullable=*/true);
@@ -3512,7 +3562,7 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
35123562
::arrow::StructArray::Make({inner}, {inner_field}, null_bitmap));
35133563
auto table = ::arrow::Table::Make(::arrow::schema({field}), {array});
35143564

3515-
for (const auto& encoding : {Type::BYTE_ARRAY, Type::INT32, Type::INT64}) {
3565+
for (const auto& encoding : {Type::BYTE_ARRAY, Type::INT32}) {
35163566
// Manually write out file based on encoding type
35173567
ARROW_SCOPED_TRACE("Encoding decimals as ", encoding);
35183568
auto parquet_schema = GroupNode::Make(
@@ -3543,12 +3593,6 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
35433593
int32_values.data());
35443594
break;
35453595
}
3546-
case Type::INT64: {
3547-
auto typed_writer = checked_cast<Int64Writer*>(column_writer);
3548-
typed_writer->WriteBatch(4, def_levels.data(), /*rep_levels=*/nullptr,
3549-
int64_values.data());
3550-
break;
3551-
}
35523596
default:
35533597
FAIL() << "Invalid encoding";
35543598
return;
@@ -3562,11 +3606,11 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
35623606
}
35633607
}
35643608

3565-
TEST(ArrowReadWrite, Decimal256AsInt) {
3609+
TEST(ArrowReadWrite, Decimal32AsInt) {
35663610
using ::arrow::Decimal256;
35673611
using ::arrow::field;
35683612

3569-
auto type = ::arrow::decimal256(8, 4);
3613+
auto type = ::arrow::decimal32(8, 4);
35703614

35713615
const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678",
35723616
"-9999.9999", "9999.9999"])";
@@ -4059,7 +4103,7 @@ TEST(TestArrowReaderAdHoc, WriteBatchedNestedNullableStringColumn) {
40594103
// ARROW-10493
40604104
std::vector<std::shared_ptr<::arrow::Field>> fields{
40614105
::arrow::field("s", ::arrow::utf8(), /*nullable=*/true),
4062-
::arrow::field("d", ::arrow::decimal128(4, 2), /*nullable=*/true),
4106+
::arrow::field("d", ::arrow::decimal32(4, 2), /*nullable=*/true),
40634107
::arrow::field("b", ::arrow::boolean(), /*nullable=*/true),
40644108
::arrow::field("i8", ::arrow::int8(), /*nullable=*/true),
40654109
::arrow::field("i64", ::arrow::int64(), /*nullable=*/true)};
@@ -4222,25 +4266,47 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
42224266

42234267
std::shared_ptr<Array> expected_array;
42244268

4225-
::arrow::Decimal128Builder builder(decimal_type, pool);
4226-
4227-
for (int32_t i = 0; i < expected_length; ++i) {
4228-
::arrow::Decimal128 value((i + 1) * 100);
4229-
ASSERT_OK(builder.Append(value));
4269+
if (decimal_type->id() == ::arrow::Decimal32Type::type_id) {
4270+
::arrow::Decimal32Builder builder(decimal_type, pool);
4271+
for (int32_t i = 0; i < expected_length; ++i) {
4272+
::arrow::Decimal32 value((i + 1) * 100);
4273+
ASSERT_OK(builder.Append(value));
4274+
}
4275+
ASSERT_OK(builder.Finish(&expected_array));
4276+
} else if (decimal_type->id() == ::arrow::Decimal64Type::type_id) {
4277+
::arrow::Decimal64Builder builder(decimal_type, pool);
4278+
for (int32_t i = 0; i < expected_length; ++i) {
4279+
::arrow::Decimal64 value((i + 1) * 100);
4280+
ASSERT_OK(builder.Append(value));
4281+
}
4282+
ASSERT_OK(builder.Finish(&expected_array));
4283+
} else if (decimal_type->id() == ::arrow::Decimal128Type::type_id) {
4284+
::arrow::Decimal128Builder builder(decimal_type, pool);
4285+
for (int32_t i = 0; i < expected_length; ++i) {
4286+
::arrow::Decimal128 value((i + 1) * 100);
4287+
ASSERT_OK(builder.Append(value));
4288+
}
4289+
ASSERT_OK(builder.Finish(&expected_array));
4290+
} else {
4291+
::arrow::Decimal256Builder builder(decimal_type, pool);
4292+
for (int32_t i = 0; i < expected_length; ++i) {
4293+
::arrow::Decimal256 value((i + 1) * 100);
4294+
ASSERT_OK(builder.Append(value));
4295+
}
4296+
ASSERT_OK(builder.Finish(&expected_array));
42304297
}
4231-
ASSERT_OK(builder.Finish(&expected_array));
4298+
42324299
AssertArraysEqual(*expected_array, *chunk);
42334300
}
42344301

42354302
INSTANTIATE_TEST_SUITE_P(
42364303
ReadDecimals, TestArrowReaderAdHocSparkAndHvr,
42374304
::testing::Values(
4238-
std::make_tuple("int32_decimal.parquet", ::arrow::decimal128(4, 2)),
4239-
std::make_tuple("int64_decimal.parquet", ::arrow::decimal128(10, 2)),
4305+
std::make_tuple("int32_decimal.parquet", ::arrow::decimal32(4, 2)),
4306+
std::make_tuple("int64_decimal.parquet", ::arrow::decimal64(10, 2)),
42404307
std::make_tuple("fixed_length_decimal.parquet", ::arrow::decimal128(25, 2)),
4241-
std::make_tuple("fixed_length_decimal_legacy.parquet",
4242-
::arrow::decimal128(13, 2)),
4243-
std::make_tuple("byte_array_decimal.parquet", ::arrow::decimal128(4, 2))));
4308+
std::make_tuple("fixed_length_decimal_legacy.parquet", ::arrow::decimal64(13, 2)),
4309+
std::make_tuple("byte_array_decimal.parquet", ::arrow::decimal32(4, 2))));
42444310

42454311
TEST(TestArrowReaderAdHoc, ReadFloat16Files) {
42464312
using ::arrow::util::Float16;
@@ -5162,33 +5228,17 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
51625228
this->ReaderFromSink(&reader);
51635229
this->ReadSingleColumnFile(std::move(reader), &out);
51645230

5165-
// Reader always read values as DECIMAL128 type
5166-
ASSERT_EQ(out->type()->id(), ::arrow::Type::DECIMAL128);
5167-
5168-
if (values.type()->id() == ::arrow::Type::DECIMAL128) {
5169-
AssertArraysEqual(values, *out);
5170-
} else {
5171-
auto& expected_values = dynamic_cast<const ::arrow::Decimal256Array&>(values);
5172-
auto& read_values = dynamic_cast<const ::arrow::Decimal128Array&>(*out);
5173-
ASSERT_EQ(expected_values.length(), read_values.length());
5174-
ASSERT_EQ(expected_values.null_count(), read_values.null_count());
5175-
ASSERT_EQ(expected_values.length(), read_values.length());
5176-
for (int64_t i = 0; i < expected_values.length(); ++i) {
5177-
ASSERT_EQ(expected_values.IsNull(i), read_values.IsNull(i));
5178-
if (!expected_values.IsNull(i)) {
5179-
ASSERT_EQ(::arrow::Decimal256(expected_values.Value(i)).ToString(0),
5180-
::arrow::Decimal128(read_values.Value(i)).ToString(0));
5181-
}
5182-
}
5183-
}
5231+
ASSERT_EQ(out->type()->id(), TestType::type_id);
5232+
AssertArraysEqual(values, *out);
51845233
}
51855234
};
51865235

51875236
typedef ::testing::Types<
5188-
Decimal128WithPrecisionAndScale<1>, Decimal128WithPrecisionAndScale<5>,
5189-
Decimal128WithPrecisionAndScale<10>, Decimal128WithPrecisionAndScale<18>,
5190-
Decimal256WithPrecisionAndScale<1>, Decimal256WithPrecisionAndScale<5>,
5191-
Decimal256WithPrecisionAndScale<10>, Decimal256WithPrecisionAndScale<18>>
5237+
Decimal32WithPrecisionAndScale<1>, Decimal32WithPrecisionAndScale<5>,
5238+
Decimal64WithPrecisionAndScale<10>, Decimal64WithPrecisionAndScale<18>,
5239+
Decimal128WithPrecisionAndScale<19>, Decimal128WithPrecisionAndScale<27>,
5240+
Decimal128WithPrecisionAndScale<38>, Decimal256WithPrecisionAndScale<39>,
5241+
Decimal256WithPrecisionAndScale<56>, Decimal256WithPrecisionAndScale<76>>
51925242
DecimalTestTypes;
51935243

51945244
TYPED_TEST_SUITE(TestIntegerAnnotateDecimalTypeParquetIO, DecimalTestTypes);

‎cpp/src/parquet/arrow/arrow_schema_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ const auto TIMESTAMP_MS = ::arrow::timestamp(TimeUnit::MILLI);
6767
const auto TIMESTAMP_US = ::arrow::timestamp(TimeUnit::MICRO);
6868
const auto TIMESTAMP_NS = ::arrow::timestamp(TimeUnit::NANO);
6969
const auto BINARY = ::arrow::binary();
70-
const auto DECIMAL_8_4 = std::make_shared<::arrow::Decimal128Type>(8, 4);
70+
const auto DECIMAL_8_4 = std::make_shared<::arrow::Decimal32Type>(8, 4);
7171

7272
class TestConvertParquetSchema : public ::testing::Test {
7373
public:

0 commit comments

Comments
 (0)
Please sign in to comment.