From 5f70c1621ecb618d51d654febfe43aefea765be9 Mon Sep 17 00:00:00 2001 From: Justine Kosinski Date: Mon, 13 Oct 2025 10:50:01 +0200 Subject: [PATCH 1/2] Fix Excel header NaN --- pandas/io/formats/excel.py | 47 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d4d47253a5f82..bb063a3503ff6 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -116,8 +116,8 @@ class CSSToExcelConverter: focusing on font styling, backgrounds, borders and alignment. Operates by first computing CSS styles in a fairly generic - way (see :meth:`compute_css`) then determining Excel style - properties from CSS properties (see :meth:`build_xlstyle`). + way (see :meth: `compute_css`) then determining Excel style + properties from CSS properties (see :meth: `build_xlstyle`). Parameters ---------- @@ -587,14 +587,15 @@ def __init__( def _format_value(self, val): if is_scalar(val) and missing.isna(val): - val = self.na_rep + return self.na_rep elif is_float(val): if missing.isposinf_scalar(val): - val = self.inf_rep + return self.inf_rep elif missing.isneginf_scalar(val): - val = f"-{self.inf_rep}" + return f"-{self.inf_rep}" elif self.float_format is not None: - val = float(self.float_format % val) + return float(self.float_format % val) + if getattr(val, "tzinfo", None) is not None: raise ValueError( "Excel does not support datetimes with " @@ -616,7 +617,17 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: columns = self.columns merge_columns = self.merge_cells in {True, "columns"} - level_strs = columns._format_multi(sparsify=merge_columns, include_names=False) + NBSP = "\u00a0" + + fixed_levels = [] + for lvl in range(columns.nlevels): + vals = columns.get_level_values(lvl) + fixed_levels.append(vals.fillna(NBSP)) + fixed_columns = MultiIndex.from_arrays(fixed_levels, names=columns.names) + + level_strs = fixed_columns._format_multi( + sparsify=merge_columns, include_names=False + ) level_lengths = get_level_lengths(level_strs) coloffset = 0 lnum = 0 @@ -624,7 +635,7 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: if self.index and isinstance(self.df.index, MultiIndex): coloffset = self.df.index.nlevels - 1 - for lnum, name in enumerate(columns.names): + for lnum, name in enumerate(fixed_columns.names): yield ExcelCell( row=lnum, col=coloffset, @@ -633,7 +644,7 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: ) for lnum, (spans, levels, level_codes) in enumerate( - zip(level_lengths, columns.levels, columns.codes, strict=True) + zip(level_lengths, fixed_columns.levels, fixed_columns.codes, strict=True) ): values = levels.take(level_codes) for i, span_val in spans.items(): @@ -657,7 +668,6 @@ def _format_header_mi(self) -> Iterable[ExcelCell]: def _format_header_regular(self) -> Iterable[ExcelCell]: if self._has_aliases or self.header: coloffset = 0 - if self.index: coloffset = 1 if isinstance(self.df.index, MultiIndex): @@ -673,7 +683,10 @@ def _format_header_regular(self) -> Iterable[ExcelCell]: ) colnames = self.header - for colindex, colname in enumerate(colnames): + NBSP = "\u00a0" + output_colnames = colnames.fillna(NBSP) + + for colindex, colname in enumerate(output_colnames): yield CssExcelCell( row=self.rowcounter, col=colindex + coloffset, @@ -687,7 +700,6 @@ def _format_header_regular(self) -> Iterable[ExcelCell]: def _format_header(self) -> Iterable[ExcelCell]: gen: Iterable[ExcelCell] - if isinstance(self.columns, MultiIndex): gen = self._format_header_mi() else: @@ -695,7 +707,7 @@ def _format_header(self) -> Iterable[ExcelCell]: gen2: Iterable[ExcelCell] = () - if self.df.index.names: + if self.df.index.names and self.header is not False: row = [x if x is not None else "" for x in self.df.index.names] + [ "" ] * len(self.columns) @@ -762,12 +774,11 @@ def _format_regular_rows(self) -> Iterable[ExcelCell]: def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: if self._has_aliases or self.header: self.rowcounter += 1 - gcolidx = 0 if self.index: - index_labels = self.df.index.names # check for aliases + index_labels = self.df.index.names if self.index_label and isinstance( self.index_label, (list, tuple, np.ndarray, Index) ): @@ -802,10 +813,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: allow_fill=levels._can_hold_na, fill_value=levels._na_value, ) - # GH#60099 - if isinstance(values[0], Period): + if values.size > 0 and isinstance(values[0], Period): values = values.to_timestamp() - for i, span_val in spans.items(): mergestart, mergeend = None, None if span_val > 1: @@ -901,9 +910,7 @@ def write( write engine to use if writer is a path - you can also set this via the options ``io.excel.xlsx.writer``, or ``io.excel.xlsm.writer``. - {storage_options} - engine_kwargs: dict, optional Arbitrary keyword arguments passed to excel engine. """ From 1b5fe97353d4ae15999b85106865f489c1fd65ff Mon Sep 17 00:00:00 2001 From: Mathis Date: Wed, 15 Oct 2025 13:49:12 +0200 Subject: [PATCH 2/2] add test and update whatsnews --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/io/excel/test_writers.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..5e1429755e15e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1095,6 +1095,7 @@ I/O - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) +- Bug in :meth:`~pandas.DataFrame.to_excel` with a :class:`MultiIndex` in the columns containing ``NaN`` causing ``NaN`` to be replaced by the last valid value (:issue:`62340`). Period ^^^^^^ diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ced4feb9e7eb9..b6575d2070a9c 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1583,6 +1583,29 @@ def assert_called_and_reset(cls): df.to_excel(filepath2, engine="dummy") DummyClass.assert_called_and_reset() + @td.skip_if_no("openpyxl") + def test_to_excel_multiindex_nan_in_columns(self, merge_cells): + # GH 62340 + df = ( + DataFrame({"a": list("ABBAAAB"), "b": [-1, 1, 1, -2, float("nan"), 3, -4]}) + .assign(b_bin=lambda x: pd.cut(x.b, bins=[-float("inf"), 0, float("inf")])) + .groupby(["b_bin", "a"], as_index=False, observed=True, dropna=False) + .agg(b_sum=("b", "sum"), b_prod=("b", "prod")) + .pivot(index="a", columns="b_bin", values=["b_sum", "b_prod"]) + ) + + with ExcelWriter("test.xlsx", engine="openpyxl") as writer: + df.to_excel(writer, sheet_name="Sheet1", merge_cells=merge_cells) + + reader = ExcelFile("test.xlsx") + result = pd.read_excel(reader, index_col=0, header=[0, 1]) + + original_values = df.to_numpy() + result_values = result.to_numpy() + tm.assert_numpy_array_equal(original_values, result_values) + + assert result.columns[1][1] != result.columns[2][1] + @td.skip_if_no("xlrd") @td.skip_if_no("openpyxl")