@@ -181,6 +181,14 @@ std::shared_ptr<const LogicalType> get_logical_type(const DataType& type) {
181
181
static_cast <const ::arrow::DictionaryType&>(type);
182
182
return get_logical_type (*dict_type.value_type ());
183
183
}
184
+ case ArrowId::DECIMAL32: {
185
+ const auto & dec_type = static_cast <const ::arrow::Decimal32Type&>(type);
186
+ return LogicalType::Decimal (dec_type.precision (), dec_type.scale ());
187
+ }
188
+ case ArrowId::DECIMAL64: {
189
+ const auto & dec_type = static_cast <const ::arrow::Decimal64Type&>(type);
190
+ return LogicalType::Decimal (dec_type.precision (), dec_type.scale ());
191
+ }
184
192
case ArrowId::DECIMAL128: {
185
193
const auto & dec_type = static_cast <const ::arrow::Decimal128Type&>(type);
186
194
return LogicalType::Decimal (dec_type.precision (), dec_type.scale ());
@@ -206,9 +214,11 @@ ParquetType::type get_physical_type(const DataType& type) {
206
214
case ArrowId::INT16:
207
215
case ArrowId::UINT32:
208
216
case ArrowId::INT32:
217
+ case ArrowId::DECIMAL32:
209
218
return ParquetType::INT32;
210
219
case ArrowId::UINT64:
211
220
case ArrowId::INT64:
221
+ case ArrowId::DECIMAL64:
212
222
return ParquetType::INT64;
213
223
case ArrowId::FLOAT:
214
224
return ParquetType::FLOAT;
@@ -533,6 +543,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
533
543
case ::arrow::Type::HALF_FLOAT:
534
544
byte_width = sizeof (::arrow::HalfFloatType::c_type);
535
545
break ;
546
+ case ::arrow::Type::DECIMAL32:
547
+ case ::arrow::Type::DECIMAL64:
536
548
case ::arrow::Type::DECIMAL128:
537
549
case ::arrow::Type::DECIMAL256: {
538
550
const auto & decimal_type = static_cast <const DecimalType&>(values_type);
@@ -548,6 +560,8 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
548
560
case ::arrow::Type::HALF_FLOAT:
549
561
byte_width = sizeof (::arrow::HalfFloatType::c_type);
550
562
break ;
563
+ case ::arrow::Type::DECIMAL32:
564
+ case ::arrow::Type::DECIMAL64:
551
565
case ::arrow::Type::DECIMAL128:
552
566
case ::arrow::Type::DECIMAL256: {
553
567
const auto & decimal_type = static_cast <const DecimalType&>(type);
@@ -783,34 +797,70 @@ class TestReadDecimals : public ParquetIOTestBase {
783
797
// The Decimal roundtrip tests always go through the FixedLenByteArray path,
784
798
// check the ByteArray case manually.
785
799
786
- TEST_F (TestReadDecimals, Decimal128ByteArray ) {
800
+ TEST_F (TestReadDecimals, Decimal32ByteArray ) {
787
801
const std::vector<std::vector<uint8_t >> big_endian_decimals = {
788
802
// 123456
789
803
{1 , 226 , 64 },
790
804
// 987654
791
805
{15 , 18 , 6 },
792
806
// -123456
793
- {255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 254 , 29 , 192 },
807
+ {255 , 254 , 29 , 192 },
794
808
};
795
809
796
810
auto expected =
797
- ArrayFromJSON (::arrow::decimal128 (6 , 3 ), R"( ["123.456", "987.654", "-123.456"])" );
811
+ ArrayFromJSON (::arrow::decimal32 (6 , 3 ), R"( ["123.456", "987.654", "-123.456"])" );
798
812
CheckReadFromByteArrays (LogicalType::Decimal (6 , 3 ), big_endian_decimals, *expected);
799
813
}
800
814
815
+ TEST_F (TestReadDecimals, Decimal64ByteArray) {
816
+ const std::vector<std::vector<uint8_t >> big_endian_decimals = {
817
+ // 123456
818
+ {1 , 226 , 64 },
819
+ // 987654
820
+ {15 , 18 , 6 },
821
+ // -123456
822
+ {255 , 254 , 29 , 192 },
823
+ // -123456
824
+ {255 , 255 , 255 , 255 , 255 , 254 , 29 , 192 },
825
+ };
826
+
827
+ auto expected = ArrayFromJSON (::arrow::decimal64 (16 , 3 ),
828
+ R"( ["123.456", "987.654", "-123.456", "-123.456"])" );
829
+ CheckReadFromByteArrays (LogicalType::Decimal (16 , 3 ), big_endian_decimals, *expected);
830
+ }
831
+
832
+ TEST_F (TestReadDecimals, Decimal128ByteArray) {
833
+ const std::vector<std::vector<uint8_t >> big_endian_decimals = {
834
+ // 123456
835
+ {1 , 226 , 64 },
836
+ // 987654
837
+ {15 , 18 , 6 },
838
+ // -123456
839
+ {255 , 254 , 29 , 192 },
840
+ // -123456
841
+ {255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 254 , 29 , 192 },
842
+ };
843
+
844
+ auto expected = ArrayFromJSON (::arrow::decimal128 (20 , 3 ),
845
+ R"( ["123.456", "987.654", "-123.456", "-123.456"])" );
846
+ CheckReadFromByteArrays (LogicalType::Decimal (20 , 3 ), big_endian_decimals, *expected);
847
+ }
848
+
801
849
TEST_F (TestReadDecimals, Decimal256ByteArray) {
802
850
const std::vector<std::vector<uint8_t >> big_endian_decimals = {
803
851
// 123456
804
852
{1 , 226 , 64 },
805
853
// 987654
806
854
{15 , 18 , 6 },
807
855
// -123456
856
+ {255 , 254 , 29 , 192 },
857
+ // -123456
808
858
{255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ,
809
859
255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 , 254 , 29 , 192 },
810
860
};
811
861
812
- auto expected =
813
- ArrayFromJSON (:: arrow::decimal256 ( 40 , 3 ), R"( ["123.456", "987.654", "-123.456"])" );
862
+ auto expected = ArrayFromJSON (:: arrow::decimal256 ( 40 , 3 ),
863
+ R"( ["123.456", "987.654", "-123.456 ", "-123.456"])" );
814
864
CheckReadFromByteArrays (LogicalType::Decimal (40 , 3 ), big_endian_decimals, *expected);
815
865
}
816
866
@@ -858,9 +908,9 @@ typedef ::testing::Types<
858
908
::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
859
909
::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
860
910
::arrow::BinaryType, ::arrow::FixedSizeBinaryType, ::arrow::HalfFloatType,
861
- Decimal128WithPrecisionAndScale <1 >, Decimal128WithPrecisionAndScale <5 >,
862
- Decimal128WithPrecisionAndScale <10 >, Decimal128WithPrecisionAndScale< 19 >,
863
- Decimal128WithPrecisionAndScale<23 >, Decimal128WithPrecisionAndScale<27 >,
911
+ Decimal32WithPrecisionAndScale <1 >, Decimal32WithPrecisionAndScale <5 >,
912
+ Decimal64WithPrecisionAndScale <10 >, Decimal64WithPrecisionAndScale< 18 >,
913
+ Decimal128WithPrecisionAndScale<19 >, Decimal128WithPrecisionAndScale<27 >,
864
914
Decimal128WithPrecisionAndScale<38 >, Decimal256WithPrecisionAndScale<39 >,
865
915
Decimal256WithPrecisionAndScale<56 >, Decimal256WithPrecisionAndScale<76 >>
866
916
TestTypes;
@@ -903,8 +953,9 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
903
953
std::shared_ptr<Table> table = MakeSimpleTable (values, false );
904
954
905
955
this ->ResetSink ();
906
- ASSERT_OK_NO_THROW (WriteTable (*table, ::arrow::default_memory_pool (), this ->sink_ ,
907
- values->length (), default_writer_properties ()));
956
+ ASSERT_OK_NO_THROW (WriteTable (
957
+ *table, ::arrow::default_memory_pool (), this ->sink_ , values->length (),
958
+ ::parquet::WriterProperties::Builder ().enable_store_decimal_as_integer()->build()));
908
959
909
960
std::shared_ptr<Table> out;
910
961
std::unique_ptr<FileReader> reader;
@@ -2944,7 +2995,7 @@ TEST(ArrowReadWrite, Decimal256) {
2944
2995
using ::arrow::Decimal256;
2945
2996
using ::arrow::field;
2946
2997
2947
- auto type = ::arrow::decimal256 (8 , 4 );
2998
+ auto type = ::arrow::decimal256 (48 , 4 );
2948
2999
2949
3000
const char * json = R"( ["1.0000", null, "-1.2345", "-1000.5678",
2950
3001
"-9999.9999", "9999.9999"])" ;
@@ -2958,7 +3009,7 @@ TEST(ArrowReadWrite, DecimalStats) {
2958
3009
using ::arrow::Decimal128;
2959
3010
using ::arrow::field;
2960
3011
2961
- auto type = ::arrow::decimal128 (/* precision=*/ 8 , /* scale=*/ 0 );
3012
+ auto type = ::arrow::decimal128 (/* precision=*/ 28 , /* scale=*/ 0 );
2962
3013
2963
3014
const char * json = R"( ["255", "128", null, "0", "1", "-127", "-128", "-129", "-255"])" ;
2964
3015
auto array = ::arrow::ArrayFromJSON (type, json);
@@ -3447,8 +3498,8 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptional) {
3447
3498
types.push_back (::arrow::duration (::arrow::TimeUnit::MILLI));
3448
3499
types.push_back (::arrow::duration (::arrow::TimeUnit::MICRO));
3449
3500
types.push_back (::arrow::duration (::arrow::TimeUnit::NANO));
3450
- types.push_back (::arrow::decimal128 (3 , 2 ));
3451
- types.push_back (::arrow::decimal256 ( 3 , 2 ));
3501
+ types.push_back (::arrow::decimal32 (3 , 2 ));
3502
+ types.push_back (::arrow::decimal128 ( 23 , 2 ));
3452
3503
types.push_back (::arrow::fixed_size_binary (4 ));
3453
3504
// Note large variants of types appear to get converted back to regular on read
3454
3505
types.push_back (::arrow::dictionary (::arrow::int32 (), ::arrow::binary ()));
@@ -3500,9 +3551,8 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
3500
3551
ByteArray (" \x0f\x12\x06 " ), // 987654
3501
3552
};
3502
3553
const std::vector<int32_t > int32_values = {123456 , 987654 };
3503
- const std::vector<int64_t > int64_values = {123456 , 987654 };
3504
3554
3505
- const auto inner_type = ::arrow::decimal128 (6 , 3 );
3555
+ const auto inner_type = ::arrow::decimal32 (6 , 3 );
3506
3556
auto inner_field = ::arrow::field (" inner" , inner_type, /* nullable=*/ false );
3507
3557
auto type = ::arrow::struct_ ({inner_field});
3508
3558
auto field = ::arrow::field (" outer" , type, /* nullable=*/ true );
@@ -3512,7 +3562,7 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
3512
3562
::arrow::StructArray::Make ({inner}, {inner_field}, null_bitmap));
3513
3563
auto table = ::arrow::Table::Make (::arrow::schema ({field}), {array});
3514
3564
3515
- for (const auto & encoding : {Type::BYTE_ARRAY, Type::INT32, Type::INT64 }) {
3565
+ for (const auto & encoding : {Type::BYTE_ARRAY, Type::INT32}) {
3516
3566
// Manually write out file based on encoding type
3517
3567
ARROW_SCOPED_TRACE (" Encoding decimals as " , encoding);
3518
3568
auto parquet_schema = GroupNode::Make (
@@ -3543,12 +3593,6 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
3543
3593
int32_values.data ());
3544
3594
break ;
3545
3595
}
3546
- case Type::INT64: {
3547
- auto typed_writer = checked_cast<Int64Writer*>(column_writer);
3548
- typed_writer->WriteBatch (4 , def_levels.data (), /* rep_levels=*/ nullptr ,
3549
- int64_values.data ());
3550
- break ;
3551
- }
3552
3596
default :
3553
3597
FAIL () << " Invalid encoding" ;
3554
3598
return ;
@@ -3562,11 +3606,11 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) {
3562
3606
}
3563
3607
}
3564
3608
3565
- TEST (ArrowReadWrite, Decimal256AsInt ) {
3609
+ TEST (ArrowReadWrite, Decimal32AsInt ) {
3566
3610
using ::arrow::Decimal256;
3567
3611
using ::arrow::field;
3568
3612
3569
- auto type = ::arrow::decimal256 (8 , 4 );
3613
+ auto type = ::arrow::decimal32 (8 , 4 );
3570
3614
3571
3615
const char * json = R"( ["1.0000", null, "-1.2345", "-1000.5678",
3572
3616
"-9999.9999", "9999.9999"])" ;
@@ -4059,7 +4103,7 @@ TEST(TestArrowReaderAdHoc, WriteBatchedNestedNullableStringColumn) {
4059
4103
// ARROW-10493
4060
4104
std::vector<std::shared_ptr<::arrow::Field>> fields{
4061
4105
::arrow::field (" s" , ::arrow::utf8(), /* nullable=*/ true),
4062
- ::arrow::field(" d" , ::arrow::decimal128 (4 , 2 ), /* nullable=*/ true),
4106
+ ::arrow::field(" d" , ::arrow::decimal32 (4 , 2 ), /* nullable=*/ true),
4063
4107
::arrow::field(" b" , ::arrow::boolean(), /* nullable=*/ true),
4064
4108
::arrow::field(" i8" , ::arrow::int8(), /* nullable=*/ true),
4065
4109
::arrow::field(" i64" , ::arrow::int64(), /* nullable=*/ true)};
@@ -4222,25 +4266,47 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
4222
4266
4223
4267
std::shared_ptr<Array> expected_array;
4224
4268
4225
- ::arrow::Decimal128Builder builder (decimal_type, pool);
4226
-
4227
- for (int32_t i = 0 ; i < expected_length; ++i) {
4228
- ::arrow::Decimal128 value ((i + 1 ) * 100 );
4229
- ASSERT_OK (builder.Append (value));
4269
+ if (decimal_type->id () == ::arrow::Decimal32Type::type_id) {
4270
+ ::arrow::Decimal32Builder builder (decimal_type, pool);
4271
+ for (int32_t i = 0 ; i < expected_length; ++i) {
4272
+ ::arrow::Decimal32 value ((i + 1 ) * 100 );
4273
+ ASSERT_OK (builder.Append (value));
4274
+ }
4275
+ ASSERT_OK (builder.Finish (&expected_array));
4276
+ } else if (decimal_type->id () == ::arrow::Decimal64Type::type_id) {
4277
+ ::arrow::Decimal64Builder builder (decimal_type, pool);
4278
+ for (int32_t i = 0 ; i < expected_length; ++i) {
4279
+ ::arrow::Decimal64 value ((i + 1 ) * 100 );
4280
+ ASSERT_OK (builder.Append (value));
4281
+ }
4282
+ ASSERT_OK (builder.Finish (&expected_array));
4283
+ } else if (decimal_type->id () == ::arrow::Decimal128Type::type_id) {
4284
+ ::arrow::Decimal128Builder builder (decimal_type, pool);
4285
+ for (int32_t i = 0 ; i < expected_length; ++i) {
4286
+ ::arrow::Decimal128 value ((i + 1 ) * 100 );
4287
+ ASSERT_OK (builder.Append (value));
4288
+ }
4289
+ ASSERT_OK (builder.Finish (&expected_array));
4290
+ } else {
4291
+ ::arrow::Decimal256Builder builder (decimal_type, pool);
4292
+ for (int32_t i = 0 ; i < expected_length; ++i) {
4293
+ ::arrow::Decimal256 value ((i + 1 ) * 100 );
4294
+ ASSERT_OK (builder.Append (value));
4295
+ }
4296
+ ASSERT_OK (builder.Finish (&expected_array));
4230
4297
}
4231
- ASSERT_OK (builder. Finish (&expected_array));
4298
+
4232
4299
AssertArraysEqual (*expected_array, *chunk);
4233
4300
}
4234
4301
4235
4302
INSTANTIATE_TEST_SUITE_P (
4236
4303
ReadDecimals, TestArrowReaderAdHocSparkAndHvr,
4237
4304
::testing::Values (
4238
- std::make_tuple (" int32_decimal.parquet" , ::arrow::decimal128 (4 , 2 )),
4239
- std::make_tuple(" int64_decimal.parquet" , ::arrow::decimal128 (10 , 2 )),
4305
+ std::make_tuple (" int32_decimal.parquet" , ::arrow::decimal32 (4 , 2 )),
4306
+ std::make_tuple(" int64_decimal.parquet" , ::arrow::decimal64 (10 , 2 )),
4240
4307
std::make_tuple(" fixed_length_decimal.parquet" , ::arrow::decimal128(25 , 2 )),
4241
- std::make_tuple(" fixed_length_decimal_legacy.parquet" ,
4242
- ::arrow::decimal128 (13 , 2 )),
4243
- std::make_tuple(" byte_array_decimal.parquet" , ::arrow::decimal128(4 , 2 ))));
4308
+ std::make_tuple(" fixed_length_decimal_legacy.parquet" , ::arrow::decimal64(13 , 2 )),
4309
+ std::make_tuple(" byte_array_decimal.parquet" , ::arrow::decimal32(4 , 2 ))));
4244
4310
4245
4311
TEST (TestArrowReaderAdHoc, ReadFloat16Files) {
4246
4312
using ::arrow::util::Float16;
@@ -5162,33 +5228,17 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
5162
5228
this ->ReaderFromSink (&reader);
5163
5229
this ->ReadSingleColumnFile (std::move (reader), &out);
5164
5230
5165
- // Reader always read values as DECIMAL128 type
5166
- ASSERT_EQ (out->type ()->id (), ::arrow::Type::DECIMAL128);
5167
-
5168
- if (values.type ()->id () == ::arrow::Type::DECIMAL128) {
5169
- AssertArraysEqual (values, *out);
5170
- } else {
5171
- auto & expected_values = dynamic_cast <const ::arrow::Decimal256Array&>(values);
5172
- auto & read_values = dynamic_cast <const ::arrow::Decimal128Array&>(*out);
5173
- ASSERT_EQ (expected_values.length (), read_values.length ());
5174
- ASSERT_EQ (expected_values.null_count (), read_values.null_count ());
5175
- ASSERT_EQ (expected_values.length (), read_values.length ());
5176
- for (int64_t i = 0 ; i < expected_values.length (); ++i) {
5177
- ASSERT_EQ (expected_values.IsNull (i), read_values.IsNull (i));
5178
- if (!expected_values.IsNull (i)) {
5179
- ASSERT_EQ (::arrow::Decimal256 (expected_values.Value (i)).ToString (0 ),
5180
- ::arrow::Decimal128 (read_values.Value(i)).ToString(0 ));
5181
- }
5182
- }
5183
- }
5231
+ ASSERT_EQ (out->type ()->id (), TestType::type_id);
5232
+ AssertArraysEqual (values, *out);
5184
5233
}
5185
5234
};
5186
5235
5187
5236
typedef ::testing::Types<
5188
- Decimal128WithPrecisionAndScale<1 >, Decimal128WithPrecisionAndScale<5 >,
5189
- Decimal128WithPrecisionAndScale<10 >, Decimal128WithPrecisionAndScale<18 >,
5190
- Decimal256WithPrecisionAndScale<1 >, Decimal256WithPrecisionAndScale<5 >,
5191
- Decimal256WithPrecisionAndScale<10 >, Decimal256WithPrecisionAndScale<18 >>
5237
+ Decimal32WithPrecisionAndScale<1 >, Decimal32WithPrecisionAndScale<5 >,
5238
+ Decimal64WithPrecisionAndScale<10 >, Decimal64WithPrecisionAndScale<18 >,
5239
+ Decimal128WithPrecisionAndScale<19 >, Decimal128WithPrecisionAndScale<27 >,
5240
+ Decimal128WithPrecisionAndScale<38 >, Decimal256WithPrecisionAndScale<39 >,
5241
+ Decimal256WithPrecisionAndScale<56 >, Decimal256WithPrecisionAndScale<76 >>
5192
5242
DecimalTestTypes;
5193
5243
5194
5244
TYPED_TEST_SUITE (TestIntegerAnnotateDecimalTypeParquetIO, DecimalTestTypes);
0 commit comments