From 1b04cdf9be5b742fee7ac8e3df7dc145725e16c8 Mon Sep 17 00:00:00 2001 From: Lubo Slivka Date: Thu, 1 Feb 2024 14:18:58 +0100 Subject: [PATCH] fix(c/driver/postgresql): fix numeric to str (#1502) - conversion from numeric to string had two bugs due to deviations from the original PostgreSQL code - leading single zero before decimal would always be dropped - in some cases, the numbers after decimal would not be incomplete and instead replaced with 'garbage' here is the PostgreSQL code: https://github.com/postgres/postgres/blob/9589b038d3203cd5ba708fb4f5c23182c88ad0b3/src/backend/utils/adt/numeric.c#L7443 (the DEC_DIGITS=4 variant) Fixes #1501. --- .../copy/postgres_copy_reader_test.cc | 54 +++++++++++++++++++ .../copy/postgres_copy_test_common.h | 15 ++++++ c/driver/postgresql/copy/reader.h | 10 ++-- 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc b/c/driver/postgresql/copy/postgres_copy_reader_test.cc index ccc52bce99..0d85c256ec 100644 --- a/c/driver/postgresql/copy/postgres_copy_reader_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc @@ -373,6 +373,60 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); } +TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric16_10) { + ArrowBufferView data; + data.data.as_uint8 = kTestPgCopyNumeric16_10; + data.size_bytes = sizeof(kTestPgCopyNumeric16_10); + + auto col_type = PostgresType(PostgresTypeId::kNumeric); + PostgresType input_type(PostgresTypeId::kRecord); + input_type.AppendChild("col", col_type); + + PostgresCopyStreamTester tester; + ASSERT_EQ(tester.Init(input_type), NANOARROW_OK); + ASSERT_EQ(tester.ReadAll(&data), ENODATA); + ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric16_10, + sizeof(kTestPgCopyNumeric16_10)); + ASSERT_EQ(data.size_bytes, 0); + + nanoarrow::UniqueArray array; + ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK); + ASSERT_EQ(array->length, 7); + ASSERT_EQ(array->n_children, 1); + + nanoarrow::UniqueSchema schema; + tester.GetSchema(schema.get()); + + nanoarrow::UniqueArrayView array_view; + ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_STRING); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + auto validity = array_view->children[0]->buffer_views[0].data.as_uint8; + ASSERT_TRUE(ArrowBitGet(validity, 0)); + ASSERT_TRUE(ArrowBitGet(validity, 1)); + ASSERT_TRUE(ArrowBitGet(validity, 2)); + ASSERT_TRUE(ArrowBitGet(validity, 3)); + ASSERT_TRUE(ArrowBitGet(validity, 4)); + ASSERT_TRUE(ArrowBitGet(validity, 5)); + ASSERT_FALSE(ArrowBitGet(validity, 6)); + + struct ArrowStringView item; + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 0); + EXPECT_EQ(std::string(item.data, item.size_bytes), "0.0000000000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 1); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0123400000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 2); + EXPECT_EQ(std::string(item.data, item.size_bytes), "1.0123456789"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 3); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-1.0123400000"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 4); + EXPECT_EQ(std::string(item.data, item.size_bytes), "-1.0123456789"); + item = ArrowArrayViewGetStringUnsafe(array_view->children[0], 5); + EXPECT_EQ(std::string(item.data, item.size_bytes), "nan"); +} + TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) { ArrowBufferView data; data.data.as_uint8 = kTestPgCopyTimestamp; diff --git a/c/driver/postgresql/copy/postgres_copy_test_common.h b/c/driver/postgresql/copy/postgres_copy_test_common.h index 55a96004fb..d685486626 100644 --- a/c/driver/postgresql/copy/postgres_copy_test_common.h +++ b/c/driver/postgresql/copy/postgres_copy_test_common.h @@ -175,4 +175,19 @@ static const uint8_t kTestPgCopyNumeric[] = { 0x00, 0xf0, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x20, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +// COPY (SELECT CAST(col AS NUMERIC(16, 10)) AS col FROM ( VALUES ('0'), ('1.01234'), +// ('1.0123456789'), ('-1.01234'), ('-1.0123456789'), ('nan'), (NULL)) AS +// drvd(col)) TO '/tmp/pgdata.out' WITH (FORMAT binary); +static const uint8_t kTestPgCopyNumeric16_10[] = { + 0x50, 0x47, 0x43, 0x4F, 0x50, 0x59, 0x0A, 0xFF, 0x0D, 0x0A, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x01, 0x00, 0x7B, 0x0F, 0xA0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x01, 0x00, + 0x7B, 0x11, 0xD7, 0x22, 0xC4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, + 0x00, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x01, 0x00, 0x7B, 0x0F, 0xA0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x00, 0x40, 0x00, 0x00, 0x0A, 0x00, 0x01, 0x00, + 0x7B, 0x11, 0xD7, 0x22, 0xC4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + } // namespace adbcpq diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h index 49eb308937..a64219cff2 100644 --- a/c/driver/postgresql/copy/reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -352,7 +352,7 @@ class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { // To strip leading zeroes int append = (d > 0); - for (const auto pow10 : {1000, 100, 10, 1}) { + for (const auto pow10 : {1000, 100, 10}) { d1 = dig / pow10; dig -= d1 * pow10; append |= (d1 > 0); @@ -360,6 +360,8 @@ class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { *out++ = d1 + '0'; } } + + *out++ = dig + '0'; } } @@ -372,18 +374,20 @@ class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { *out++ = '.'; actual_chars_required += dscale + 1; - for (int i = 0; i < dscale; i++, d++, i += kDecDigits) { + for (int i = 0; i < dscale; d++, i += kDecDigits) { if (d >= 0 && d < ndigits) { dig = digits_[d]; } else { dig = 0; } - for (const auto pow10 : {1000, 100, 10, 1}) { + for (const auto pow10 : {1000, 100, 10}) { d1 = dig / pow10; dig -= d1 * pow10; *out++ = d1 + '0'; } + + *out++ = dig + '0'; } }