From a2c19f3e23daa73a5b5e34bb9f5b5721042d49e9 Mon Sep 17 00:00:00 2001
From: Jonatan Martens <jonatan.m@pecan.ai>
Date: Sat, 29 Nov 2025 14:13:15 +0200
Subject: [PATCH 1/5] Add rename_columns method

---
 python/pyarrow/_dataset.pyx          | 54 ++++++++++++++++++++++++++++
 python/pyarrow/tests/test_dataset.py | 29 +++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 666fd2c1cc5..1bf9764cfb8 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -412,6 +412,10 @@ cdef class Dataset(_Weakrefable):
         n_legs: [[2,4,4,100]]
         animal: [["Parrot","Dog","Horse","Centipede"]]
         """
+        # Apply column projection from rename_columns() if present
+        if columns is None and 'columns' in self._scan_options:
+            columns = self._scan_options['columns']
+
         return Scanner.from_dataset(
             self,
             columns=columns,
@@ -990,6 +994,56 @@ cdef class Dataset(_Weakrefable):
                                          right_dataset, right_on, right_by,
                                          tolerance, output_type=InMemoryDataset)
 
+    def rename_columns(self, names):
+        """
+        Apply logical column renaming on the Dataset.
+
+        The rename is applied lazily when data is scanned. Column names in the
+        files are not changed; the rename is a logical transformation applied
+        during reads.
+
+        Parameters
+        ----------
+        names : list, tuple, or dict
+            If a list or tuple, the new names for all columns (must match the
+            number of columns). If a dict, maps old column names to new names.
+
+        Returns
+        -------
+        Dataset
+            The existing dataset with column projection applied.
+
+        Examples
+        --------
+        Rename all columns by position:
+
+        >>> dataset.rename_columns(['name', 'age', 'city']).to_table()
+
+        Rename specific columns:
+
+        >>> dataset.rename_columns({'old_name': 'new_name'}).to_table()
+        """
+        import pyarrow.dataset as ds
+
+        schema = self.schema
+
+        if isinstance(names, (list, tuple)):
+            if len(names) != len(schema):
+                raise ValueError(
+                    f"Expected {len(schema)} names, got {len(names)}")
+            name_mapping = {schema.field(i).name: names[i]
+                            for i in range(len(names))}
+        elif isinstance(names, dict):
+            name_mapping = names
+        else:
+            raise TypeError(f"names must be list, tuple, or dict, not {type(names)!r}")
+
+        projection = {new_name: ds.field(old_name)
+                      for old_name, new_name in name_mapping.items()}
+
+        self._scan_options['columns'] = projection
+
+        return self
 
 cdef class InMemoryDataset(Dataset):
     """
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 32bcebb28de..13f76040195 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5932,3 +5932,32 @@ def test_scanner_from_substrait(dataset):
         filter=ps.BoundExpressions.from_substrait(filtering)
     ).to_table()
     assert result.to_pydict() == {'str': ['4', '4']}
+
+
+@pytest.mark.parametrize("names", [
+    ["new-index", "new-color"],
+    ("new-index", "new-color"),
+    {"index": "new-index", "color": "new-color"}
+]
+)
+def test_rename_columns(names):
+    original_schema = pa.schema([
+        pa.field('index', pa.int64()),
+        pa.field('color', pa.string()),
+    ]
+    )
+
+    dataset = ds.InMemoryDataset(
+        pa.RecordBatch.from_pylist(
+            [{"index": 1, "color": "green"}, {"index": 2, "color": "blue"}]),
+        schema=original_schema
+    )
+
+    dataset.rename_columns(names)
+
+    expected_schema = pa.schema([
+        pa.field("new-index", pa.int64()),
+        pa.field("new-color", pa.string())
+    ])
+
+    assert dataset.to_table().schema.equals(expected_schema)

From 04969a5a2ad47eed57f14fda30a539d101d97cb1 Mon Sep 17 00:00:00 2001
From: Jonatan Martens <jonatan.m@pecan.ai>
Date: Thu, 4 Dec 2025 09:56:06 +0200
Subject: [PATCH 2/5] Remove examples from docstring

---
 python/pyarrow/_dataset.pyx | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 1bf9764cfb8..4dad8567815 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1012,16 +1012,6 @@ cdef class Dataset(_Weakrefable):
         -------
         Dataset
             The existing dataset with column projection applied.
-
-        Examples
-        --------
-        Rename all columns by position:
-
-        >>> dataset.rename_columns(['name', 'age', 'city']).to_table()
-
-        Rename specific columns:
-
-        >>> dataset.rename_columns({'old_name': 'new_name'}).to_table()
         """
         import pyarrow.dataset as ds
 

From f48a7384a4dd74c029a9627bc9c6731703852fc9 Mon Sep 17 00:00:00 2001
From: Jonatan Martens <jonatan.m@pecan.ai>
Date: Thu, 4 Dec 2025 14:31:17 +0200
Subject: [PATCH 3/5] Add docstring with proper imports

---
 python/pyarrow/_dataset.pyx | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 4dad8567815..8a2040c0d23 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1012,6 +1012,24 @@ cdef class Dataset(_Weakrefable):
         -------
         Dataset
             The existing dataset with column projection applied.
+
+        Examples
+        --------
+        Rename all columns by position:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
+        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+        ...                              "Brittle stars", "Centipede"]})
+
+        >>> import pyarrow.dataset as ds
+        >>> dataset = ds.InMemoryDataset([table])
+        >>> dataset.rename_columns(['time', 'number_of_legs', 'name']).to_table()
+
+        Rename specific columns:
+
+        >>> dataset.rename_columns({'n_legs': 'number_of_legs'}).to_table()
         """
         import pyarrow.dataset as ds
 

From 70e520d9c0fb6b3c3192dfcf43be2e7ae9e99a0d Mon Sep 17 00:00:00 2001
From: Jonatan Martens <jonatan.m@pecan.ai>
Date: Thu, 4 Dec 2025 19:52:09 +0200
Subject: [PATCH 4/5] Add outputs to examples

---
 python/pyarrow/_dataset.pyx | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 8a2040c0d23..9ce43757d0f 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1026,10 +1026,26 @@ cdef class Dataset(_Weakrefable):
         >>> import pyarrow.dataset as ds
         >>> dataset = ds.InMemoryDataset([table])
         >>> dataset.rename_columns(['time', 'number_of_legs', 'name']).to_table()
+        pyarrow.Table
+        time: int64
+        number_of_legs: int64
+        name: string
+        ----
+        time: [[2020,2022,2021,2022,2019,2021]]
+        number_of_legs: [[2,2,4,4,5,100]]
+        name: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
 
         Rename specific columns:
 
         >>> dataset.rename_columns({'n_legs': 'number_of_legs'}).to_table()
+        pyarrow.Table
+        year: int64
+        number_of_legs: int64
+        animal: string
+        ----
+        year: [[2020,2022,2021,2022,2019,2021]]
+        number_of_legs: [[2,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
         """
         import pyarrow.dataset as ds
 

From 5e6abfd705ac19fdd5ffb422852f170ea94faa79 Mon Sep 17 00:00:00 2001
From: Jonatan Martens <jonatan.m@pecan.ai>
Date: Thu, 4 Dec 2025 20:36:17 +0200
Subject: [PATCH 5/5] Fix columns not in rename dict being ignored

---
 python/pyarrow/_dataset.pyx          |  3 ++-
 python/pyarrow/tests/test_dataset.py | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 9ce43757d0f..af495ae4c2f 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1058,7 +1058,8 @@ cdef class Dataset(_Weakrefable):
             name_mapping = {schema.field(i).name: names[i]
                             for i in range(len(names))}
         elif isinstance(names, dict):
-            name_mapping = names
+            name_mapping = {field.name: names.get(field.name, field.name)
+                            for field in schema}
         else:
             raise TypeError(f"names must be list, tuple, or dict, not {type(names)!r}")
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 13f76040195..f60fcd2c7d9 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5934,13 +5934,18 @@ def test_scanner_from_substrait(dataset):
     assert result.to_pydict() == {'str': ['4', '4']}
 
 
-@pytest.mark.parametrize("names", [
-    ["new-index", "new-color"],
-    ("new-index", "new-color"),
-    {"index": "new-index", "color": "new-color"}
+@pytest.mark.parametrize("names, expected_schema", [
+    (["new-index", "new-color"],
+     pa.schema([pa.field("new-index", pa.int64()), pa.field("new-color", pa.string())])),
+    (("new-index", "new-color"),
+     pa.schema([pa.field("new-index", pa.int64()), pa.field("new-color", pa.string())])),
+    ({"index": "new-index", "color": "new-color"},
+     pa.schema([pa.field("new-index", pa.int64()), pa.field("new-color", pa.string())])),
+    ({"index": "new-index"},
+     pa.schema([pa.field("new-index", pa.int64()), pa.field("color", pa.string())])),
 ]
 )
-def test_rename_columns(names):
+def test_rename_columns(names, expected_schema):
     original_schema = pa.schema([
         pa.field('index', pa.int64()),
         pa.field('color', pa.string()),
@@ -5955,9 +5960,4 @@ def test_rename_columns(names):
 
     dataset.rename_columns(names)
 
-    expected_schema = pa.schema([
-        pa.field("new-index", pa.int64()),
-        pa.field("new-color", pa.string())
-    ])
-
     assert dataset.to_table().schema.equals(expected_schema)