Skip to content

Commit 1ecc529

Browse files
committed
map oracle number with scale 0 or none to decimal
1 parent 38f93e2 commit 1ecc529

File tree

2 files changed

+47
-34
lines changed

2 files changed

+47
-34
lines changed

dlt/sources/sql_database/schema_types.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,24 +113,23 @@ def sqla_col_to_column_schema(
113113
# we represent UUID as text by default, see default_table_adapter
114114
col["data_type"] = "text"
115115
elif isinstance(sql_t, sqltypes.Numeric):
116-
# special handling for oracle NUMBER types
117-
if isinstance(sql_t, NUMBER) and sql_t.scale is None or sql_t.scale == 0:
118-
col["data_type"] = "bigint"
119-
if add_precision and sql_t.precision is not None:
120-
col["precision"] = sql_t.precision
121-
# all Numeric types that are returned as floats will assume "double" type
122-
# and those that are returned as decimals will assume "decimal" type
123-
elif sql_t.asdecimal is False:
116+
# Oracle NUMBER(p, 0) or NUMBER(p) represents integers but uses decimal arithmetic
117+
is_oracle_integer = isinstance(sql_t, NUMBER) and (sql_t.scale is None or sql_t.scale == 0)
118+
# "double" for float-returning types, except Oracle integers
119+
# "decimal" for all decimal-returning types and Oracle integers
120+
if sql_t.asdecimal is False and not is_oracle_integer:
124121
col["data_type"] = "double"
125122
else:
126123
col["data_type"] = "decimal"
127124
if sql_t.precision is not None:
128125
col["precision"] = sql_t.precision
129-
# must have a precision for any meaningful scale
130-
if sql_t.scale is not None:
131-
col["scale"] = sql_t.scale
132-
elif sql_t.decimal_return_scale is not None:
133-
col["scale"] = sql_t.decimal_return_scale
126+
# Scale is explicitly 0 for Oracle integers, otherwise from database type
127+
if is_oracle_integer:
128+
col["scale"] = 0
129+
elif sql_t.scale is not None:
130+
col["scale"] = sql_t.scale
131+
elif sql_t.decimal_return_scale is not None:
132+
col["scale"] = sql_t.decimal_return_scale
134133
elif isinstance(sql_t, sqltypes.SmallInteger):
135134
col["data_type"] = "bigint"
136135
if add_precision:

tests/sources/sql_database/test_schema_types.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
from dlt.common.data_types import TDataType
1010
from dlt.sources.sql_database.schema_types import get_table_references, sqla_col_to_column_schema
1111

12+
from dlt.common.libs.pyarrow import get_py_arrow_datatype
13+
from dlt.common.destination.capabilities import DestinationCapabilitiesContext
14+
from decimal import Decimal
15+
1216

1317
def test_get_table_references() -> None:
1418
# Test converting foreign keys to reference hints
@@ -128,9 +132,9 @@ def test_get_table_references() -> None:
128132
@pytest.mark.parametrize(
129133
"oracle_type,expected_type,expected_precision,expected_scale,test_value",
130134
[
131-
(NUMBER(), "bigint", None, None, 123456789),
132-
(NUMBER(precision=17), "bigint", 17, None, 9309935020231023),
133-
(NUMBER(precision=17, scale=0), "bigint", 17, None, 9309935020231023),
135+
(NUMBER(), "decimal", None, 0, 123456789),
136+
(NUMBER(precision=17), "decimal", 17, 0, 9309935020231023),
137+
(NUMBER(precision=17, scale=0), "decimal", 17, 0, 9309935020231023),
134138
(NUMBER(precision=10, scale=2), "decimal", 10, 2, 12345.67),
135139
(NUMBER(precision=17, scale=2, asdecimal=False), "double", None, None, 12345.67),
136140
],
@@ -146,27 +150,37 @@ def test_oracle_number_type_inference(
146150
"""Test Oracle NUMBER type inference to prevent PyArrow conversion errors.
147151
148152
Oracle NUMBER types can represent both integers and decimals based on their scale:
149-
- NUMBER with scale=0 or no scale → should be inferred as 'bigint'
150-
- NUMBER with scale>0 → should be inferred as 'decimal'
153+
- NUMBER with scale=0 or no scale → should be inferred as 'decimal' with scale=0
154+
- NUMBER with scale>0 → should be inferred as 'decimal' with appropriate scale
155+
- NUMBER with asdecimal=False → should be inferred as 'double'
151156
152-
Previously, all Oracle NUMBER types were incorrectly inferred as 'double',
153-
causing PyArrow conversion failures for large integers like 9309935020231023
154-
that cannot be precisely represented as float64.
157+
Previously, all Oracle NUMBER types were incorrectly inferred as 'double'.
155158
"""
156159
sql_col = sa.Column("test_col", oracle_type, nullable=True)
157160
column_schema = sqla_col_to_column_schema(sql_col, reflection_level="full_with_precision")
158161

159-
assert column_schema["data_type"] == expected_type
160-
if expected_precision is not None:
161-
assert column_schema.get("precision") == expected_precision
162-
if expected_scale is not None and expected_type == "decimal":
163-
assert column_schema.get("scale") == expected_scale
164-
165-
# the inferred bigint type should work with PyArrow
166-
if expected_type == "bigint" and test_value == 9309935020231023:
167-
pa_array = pa.array([test_value], type="int64")
162+
assert column_schema.get("data_type") == expected_type
163+
assert column_schema.get("precision") == expected_precision
164+
assert column_schema.get("scale") == expected_scale
165+
166+
# Use dlt's actual PyArrow type mapping
167+
caps = DestinationCapabilitiesContext.generic_capabilities()
168+
pa_type = get_py_arrow_datatype(column_schema, caps, tz="UTC")
169+
170+
if expected_type == "decimal":
171+
decimal_value = Decimal(str(test_value))
172+
assert pa.types.is_decimal(pa_type)
173+
pa_array = pa.array([decimal_value], type=pa_type)
174+
assert pa_array[0].as_py() == decimal_value
175+
176+
# Original bug: float64 cannot precisely represent large integers
177+
# This is why mapping Oracle NUMBER(p,0) to 'double' was incorrect and caused
178+
# PyArrow conversion errors. Using 'decimal' preserves exact integer values.
179+
if test_value == 9309935020231023:
180+
float_val = float(test_value)
181+
assert float_val != test_value
182+
183+
elif expected_type == "double":
184+
pa_array = pa.array([test_value], type=pa_type)
185+
assert isinstance(pa_array, pa.FloatingPointArray)
168186
assert pa_array[0].as_py() == test_value
169-
with pytest.raises(
170-
pa.ArrowInvalid, match="Integer value 9309935020231023 is outside of the range"
171-
):
172-
pa.array([test_value], type="float64")

0 commit comments

Comments
 (0)