@@ -255,6 +255,9 @@ class ParquetFile:
255
255
it will be parsed as an URI to determine the filesystem.
256
256
page_checksum_verification : bool, default False
257
257
If True, verify the checksum for each page read from the file.
258
+ smallest_decimal_enabled : bool, default False
259
+ If True, always convert to the smallest arrow decimal type based
260
+ on precision.
258
261
259
262
Examples
260
263
--------
@@ -303,7 +306,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None,
303
306
pre_buffer = False , coerce_int96_timestamp_unit = None ,
304
307
decryption_properties = None , thrift_string_size_limit = None ,
305
308
thrift_container_size_limit = None , filesystem = None ,
306
- page_checksum_verification = False ):
309
+ page_checksum_verification = False , smallest_decimal_enabled = False ):
307
310
308
311
self ._close_source = getattr (source , 'closed' , True )
309
312
@@ -323,6 +326,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None,
323
326
thrift_string_size_limit = thrift_string_size_limit ,
324
327
thrift_container_size_limit = thrift_container_size_limit ,
325
328
page_checksum_verification = page_checksum_verification ,
329
+ smallest_decimal_enabled = smallest_decimal_enabled ,
326
330
)
327
331
self .common_metadata = common_metadata
328
332
self ._nested_paths_by_prefix = self ._build_nested_paths ()
@@ -1267,6 +1271,9 @@ class ParquetDataset:
1267
1271
If True, verify the page checksum for each page read from the file.
1268
1272
use_legacy_dataset : bool, optional
1269
1273
Deprecated and has no effect from PyArrow version 15.0.0.
1274
+ smallest_decimal_enabled : bool, default False
1275
+ If True, always convert to the smallest arrow decimal type based
1276
+ on precision.
1270
1277
1271
1278
Examples
1272
1279
--------
@@ -1280,7 +1287,8 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
1280
1287
decryption_properties = None , thrift_string_size_limit = None ,
1281
1288
thrift_container_size_limit = None ,
1282
1289
page_checksum_verification = False ,
1283
- use_legacy_dataset = None ):
1290
+ use_legacy_dataset = None ,
1291
+ smallest_decimal_enabled = False ):
1284
1292
1285
1293
if use_legacy_dataset is not None :
1286
1294
warnings .warn (
@@ -1297,6 +1305,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
1297
1305
"thrift_string_size_limit" : thrift_string_size_limit ,
1298
1306
"thrift_container_size_limit" : thrift_container_size_limit ,
1299
1307
"page_checksum_verification" : page_checksum_verification ,
1308
+ "smallest_decimal_enabled" : smallest_decimal_enabled ,
1300
1309
}
1301
1310
if buffer_size :
1302
1311
read_options .update (use_buffered_stream = True ,
@@ -1686,6 +1695,9 @@ def partitioning(self):
1686
1695
sufficient for most Parquet files.
1687
1696
page_checksum_verification : bool, default False
1688
1697
If True, verify the checksum for each page read from the file.
1698
+ smallest_decimal_enabled : bool, default False
1699
+ If True, always convert to the smallest arrow decimal type based
1700
+ on precision.
1689
1701
1690
1702
Returns
1691
1703
-------
@@ -1781,7 +1793,8 @@ def read_table(source, *, columns=None, use_threads=True,
1781
1793
coerce_int96_timestamp_unit = None ,
1782
1794
decryption_properties = None , thrift_string_size_limit = None ,
1783
1795
thrift_container_size_limit = None ,
1784
- page_checksum_verification = False ):
1796
+ page_checksum_verification = False ,
1797
+ smallest_decimal_enabled = False ):
1785
1798
1786
1799
if use_legacy_dataset is not None :
1787
1800
warnings .warn (
@@ -1806,6 +1819,7 @@ def read_table(source, *, columns=None, use_threads=True,
1806
1819
thrift_string_size_limit = thrift_string_size_limit ,
1807
1820
thrift_container_size_limit = thrift_container_size_limit ,
1808
1821
page_checksum_verification = page_checksum_verification ,
1822
+ smallest_decimal_enabled = smallest_decimal_enabled ,
1809
1823
)
1810
1824
except ImportError :
1811
1825
# fall back on ParquetFile for simple cases when pyarrow.dataset
@@ -1838,6 +1852,7 @@ def read_table(source, *, columns=None, use_threads=True,
1838
1852
thrift_string_size_limit = thrift_string_size_limit ,
1839
1853
thrift_container_size_limit = thrift_container_size_limit ,
1840
1854
page_checksum_verification = page_checksum_verification ,
1855
+ smallest_decimal_enabled = smallest_decimal_enabled ,
1841
1856
)
1842
1857
1843
1858
return dataset .read (columns = columns , use_threads = use_threads ,
0 commit comments