diff --git a/src/Tables/utils/file_reader.py b/src/Tables/utils/file_reader.py index b7adb74..64bd599 100644 --- a/src/Tables/utils/file_reader.py +++ b/src/Tables/utils/file_reader.py @@ -230,7 +230,20 @@ def read_excel(self, path: Path, **kwargs) -> DataFrame: def read_parquet(self, path: Path) -> DataFrame: """ """ - return pd.read_parquet(path) + df: DataFrame = pd.read_parquet(path) + + # try to transform to ISO timeformat -> if transformation fails, just return original parquet dataframe + try: + return self._parquet_transform_to_iso_timeformat(df) + except Exception: + return pd.read_parquet(path) + + def _parquet_transform_to_iso_timeformat(self, df: DataFrame) -> DataFrame: + ts_cols = df.select_dtypes(include=["datetime64[ns]", "datetime64[ns, UTC]"]).columns + for col in ts_cols: + df[col] = df[col].dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ") + df[col] = df[col].str[:-3] + "Z" + return df def read_table_file(self, path: Path) -> DataFrame: """ diff --git a/tests/atest/test_reader.robot b/tests/atest/test_reader.robot index 52da056..38a1b3c 100644 --- a/tests/atest/test_reader.robot +++ b/tests/atest/test_reader.robot @@ -206,6 +206,13 @@ Read Parquet File - Without Header ${result} = BuiltIn.Evaluate "_time" not in "${content}" BuiltIn.Should Be True ${result} +Open Table - Parquet + Tables.Configure Ignore Header False + ${alias} = Tables.Open Table ${CURDIR}${/}testdata${/}example_03.parquet + @{data} = Tables.Get Table + Should Contain ${data}[1][0] 2025 + Tables.Count Table ${alias} Rows == ${1001} + Get Table Cell - Parquet Tables.Configure Ignore Header False Tables.Open Table ${CURDIR}${/}testdata${/}example_03.parquet