diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3a941deb2c68d..c710b7d34e9c5 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -58,9 +58,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Python and Cython Doctests' ; echo "$MSG" python -c 'import pandas as pd; pd.test(run_doctests=True)' - # TEMP don't let doctests fail the build until all string dtype changes are fixed - # RET=$(($RET + $?)) ; echo "$MSG" "DONE" - echo "$MSG" "DONE" + RET=$(($RET + $?)) ; echo "$MSG" "DONE" fi diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a6a6a03486800..533b9b689af0b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -391,11 +391,11 @@ def unique(values): >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] An ordered Categorical preserves the category ordering. @@ -405,7 +405,7 @@ def unique(values): ... ) ... ) ['b', 'a', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] An array of tuples @@ -751,7 +751,7 @@ def factorize( array([0, 0, 1]) >>> uniques ['a', 'c'] - Categories (3, str): [a, b, c] + Categories (3, str): ['a', 'b', 'c'] Notice that ``'b'`` is in ``uniques.categories``, despite not being present in ``cat.values``. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d11e2271f9574..bfa2309bb023a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1688,13 +1688,13 @@ def factorize( >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat ['a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> cat.repeat(2) ['a', 'a', 'b', 'b', 'c', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> cat.repeat([1, 2, 3]) ['a', 'b', 'b', 'c', 'c', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] """ @Substitution(klass="ExtensionArray") diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 768477d55e883..e4420d07675ba 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -332,7 +332,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi >>> pd.Categorical(["a", "b", "c", "a", "b", "c"]) ['a', 'b', 'c', 'a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] Missing values are not included as a category. @@ -355,7 +355,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi ... ) >>> c ['a', 'b', 'c', 'a', 'b', 'c'] - Categories (3, object): ['c' < 'b' < 'a'] + Categories (3, str): ['c' < 'b' < 'a'] >>> c.min() 'c' """ @@ -510,9 +510,9 @@ def dtype(self) -> CategoricalDtype: >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat ['a', 'b'] - Categories (2, object): ['a' < 'b'] + Categories (2, str): ['a' < 'b'] >>> cat.dtype - CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object) + CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=str) """ return self._dtype @@ -740,7 +740,7 @@ def from_codes( >>> dtype = pd.CategoricalDtype(["a", "b"], ordered=True) >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) ['a', 'b', 'a', 'b'] - Categories (2, object): ['a' < 'b'] + Categories (2, str): ['a' < 'b'] """ dtype = CategoricalDtype._from_values_or_dtype( categories=categories, ordered=ordered, dtype=dtype @@ -922,12 +922,12 @@ def _set_categories(self, categories, fastpath: bool = False) -> None: >>> c = pd.Categorical(["a", "b"]) >>> c ['a', 'b'] - Categories (2, object): ['a', 'b'] + Categories (2, str): ['a', 'b'] >>> c._set_categories(pd.Index(["a", "c"])) >>> c ['a', 'c'] - Categories (2, object): ['a', 'c'] + Categories (2, str): ['a', 'c'] """ if fastpath: new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) @@ -1111,7 +1111,7 @@ def set_categories( 2 c 3 NaN dtype: category - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] >>> ser.cat.set_categories(["A", "B", "C"], rename=True) 0 A @@ -1119,7 +1119,7 @@ def set_categories( 2 C 3 NaN dtype: category - Categories (3, object): ['A' < 'B' < 'C'] + Categories (3, str): ['A' < 'B' < 'C'] For :class:`pandas.CategoricalIndex`: @@ -1215,13 +1215,13 @@ def rename_categories(self, new_categories) -> Self: >>> c.rename_categories({"a": "A", "c": "C"}) ['A', 'A', 'b'] - Categories (2, object): ['A', 'b'] + Categories (2, str): ['A', 'b'] You may also provide a callable to create the new categories >>> c.rename_categories(lambda x: x.upper()) ['A', 'A', 'B'] - Categories (2, object): ['A', 'B'] + Categories (2, str): ['A', 'B'] """ if is_dict_like(new_categories): @@ -1281,7 +1281,7 @@ def reorder_categories(self, new_categories, ordered=None) -> Self: 2 c 3 a dtype: category - Categories (3, object): ['c' < 'b' < 'a'] + Categories (3, str): ['c' < 'b' < 'a'] >>> ser.sort_values() 2 c @@ -1289,7 +1289,7 @@ def reorder_categories(self, new_categories, ordered=None) -> Self: 0 a 3 a dtype: category - Categories (3, object): ['c' < 'b' < 'a'] + Categories (3, str): ['c' < 'b' < 'a'] For :class:`pandas.CategoricalIndex`: @@ -1346,11 +1346,11 @@ def add_categories(self, new_categories) -> Self: >>> c = pd.Categorical(["c", "b", "c"]) >>> c ['c', 'b', 'c'] - Categories (2, object): ['b', 'c'] + Categories (2, str): ['b', 'c'] >>> c.add_categories(["d", "a"]) ['c', 'b', 'c'] - Categories (4, object): ['b', 'c', 'd', 'a'] + Categories (4, str): ['b', 'c', 'd', 'a'] """ if not is_list_like(new_categories): @@ -1414,11 +1414,11 @@ def remove_categories(self, removals) -> Self: >>> c = pd.Categorical(["a", "c", "b", "c", "d"]) >>> c ['a', 'c', 'b', 'c', 'd'] - Categories (4, object): ['a', 'b', 'c', 'd'] + Categories (4, str): ['a', 'b', 'c', 'd'] >>> c.remove_categories(["d", "a"]) [NaN, 'c', 'b', 'c', NaN] - Categories (2, object): ['b', 'c'] + Categories (2, str): ['b', 'c'] """ from pandas import Index @@ -1465,17 +1465,17 @@ def remove_unused_categories(self) -> Self: >>> c = pd.Categorical(["a", "c", "b", "c", "d"]) >>> c ['a', 'c', 'b', 'c', 'd'] - Categories (4, object): ['a', 'b', 'c', 'd'] + Categories (4, str): ['a', 'b', 'c', 'd'] >>> c[2] = "a" >>> c[4] = "c" >>> c ['a', 'c', 'a', 'c', 'c'] - Categories (4, object): ['a', 'b', 'c', 'd'] + Categories (4, str): ['a', 'b', 'c', 'd'] >>> c.remove_unused_categories() ['a', 'c', 'a', 'c', 'c'] - Categories (2, object): ['a', 'c'] + Categories (2, str): ['a', 'c'] """ idx, inv = np.unique(self._codes, return_inverse=True) @@ -1540,13 +1540,13 @@ def map( >>> cat = pd.Categorical(["a", "b", "c"]) >>> cat ['a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> cat.map(lambda x: x.upper(), na_action=None) ['A', 'B', 'C'] - Categories (3, object): ['A', 'B', 'C'] + Categories (3, str): ['A', 'B', 'C'] >>> cat.map({"a": "first", "b": "second", "c": "third"}, na_action=None) ['first', 'second', 'third'] - Categories (3, object): ['first', 'second', 'third'] + Categories (3, str): ['first', 'second', 'third'] If the mapping is one-to-one the ordering of the categories is preserved: @@ -1554,7 +1554,7 @@ def map( >>> cat = pd.Categorical(["a", "b", "c"], ordered=True) >>> cat ['a', 'b', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] >>> cat.map({"a": 3, "b": 2, "c": 1}, na_action=None) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] @@ -1562,13 +1562,13 @@ def map( If the mapping is not one-to-one an :class:`~pandas.Index` is returned: >>> cat.map({"a": "first", "b": "second", "c": "first"}, na_action=None) - Index(['first', 'second', 'first'], dtype='object') + Index(['first', 'second', 'first'], dtype='str') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: >>> cat.map({"a": "first", "b": "second"}, na_action=None) - Index(['first', 'second', nan], dtype='object') + Index(['first', 'second', nan], dtype='str') """ assert callable(mapper) or is_dict_like(mapper) @@ -2383,9 +2383,9 @@ def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: >>> c = pd.Categorical(list("aabca")) >>> c ['a', 'a', 'b', 'c', 'a'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> c.categories - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> c.codes array([0, 0, 1, 2, 0], dtype=int8) >>> c._reverse_indexer() @@ -2517,10 +2517,10 @@ def unique(self) -> Self: -------- >>> pd.Categorical(list("baabc")).unique() ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique() ['b', 'a'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] """ return super().unique() @@ -2845,10 +2845,10 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> s.cat.categories - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> s.cat.rename_categories(list("cba")) 0 c @@ -2858,7 +2858,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 a 5 a dtype: category - Categories (3, object): ['c', 'b', 'a'] + Categories (3, str): ['c', 'b', 'a'] >>> s.cat.reorder_categories(list("cba")) 0 a @@ -2868,7 +2868,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): ['c', 'b', 'a'] + Categories (3, str): ['c', 'b', 'a'] >>> s.cat.add_categories(["d", "e"]) 0 a @@ -2878,7 +2878,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + Categories (5, str): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.remove_categories(["a", "c"]) 0 NaN @@ -2888,7 +2888,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 NaN 5 NaN dtype: category - Categories (1, object): ['b'] + Categories (1, str): ['b'] >>> s1 = s.cat.add_categories(["d", "e"]) >>> s1.cat.remove_unused_categories() @@ -2899,7 +2899,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> s.cat.set_categories(list("abcde")) 0 a @@ -2909,7 +2909,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + Categories (5, str): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.as_ordered() 0 a @@ -2919,7 +2919,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] >>> s.cat.as_unordered() 0 a @@ -2929,7 +2929,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] """ def __init__(self, data) -> None: diff --git a/pandas/core/base.py b/pandas/core/base.py index a1a041f13878a..2e2bf487562a8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -567,7 +567,7 @@ def array(self) -> ExtensionArray: >>> ser = pd.Series(pd.Categorical(["a", "b", "a"])) >>> ser.array ['a', 'b', 'a'] - Categories (2, str): [a, b] + Categories (2, str): ['a', 'b'] """ raise AbstractMethodError(self) @@ -1386,7 +1386,7 @@ def factorize( ... ) >>> ser ['apple', 'bread', 'bread', 'cheese', 'milk'] - Categories (4, str): [apple < bread < cheese < milk] + Categories (4, str): ['apple' < 'bread' < 'cheese' < 'milk'] >>> ser.searchsorted('bread') np.int64(1) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ada492787a179..46e3e47afb2ac 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -176,9 +176,9 @@ def array( NumPy array. >>> pd.array(["a", "b"], dtype=str) - + ['a', 'b'] - Length: 2, dtype: str32 + Length: 2, dtype: str This would instead return the new ExtensionArray dedicated for string data. If you really need the new array to be backed by a NumPy array, @@ -250,7 +250,7 @@ def array( >>> pd.array(["a", "b", "a"], dtype="category") ['a', 'b', 'a'] - Categories (2, object): ['a', 'b'] + Categories (2, str): ['a', 'b'] Or specify the actual dtype @@ -258,7 +258,7 @@ def array( ... ["a", "b", "a"], dtype=pd.CategoricalDtype(["a", "b", "c"], ordered=True) ... ) ['a', 'b', 'a'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] If pandas does not infer a dedicated extension type a :class:`arrays.NumpyExtensionArray` is returned. @@ -454,7 +454,7 @@ def extract_array( -------- >>> extract_array(pd.Series(["a", "b", "c"], dtype="category")) ['a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] Other objects like lists, arrays, and DataFrames are just passed through. diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index dcf8cb5c78536..0d94df65a1d6c 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -223,7 +223,7 @@ def union_categoricals( >>> b = pd.Categorical(["a", "b"]) >>> pd.api.types.union_categoricals([a, b]) ['b', 'c', 'a', 'b'] - Categories (3, object): ['b', 'c', 'a'] + Categories (3, str): ['b', 'c', 'a'] By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be @@ -231,7 +231,7 @@ def union_categoricals( >>> pd.api.types.union_categoricals([a, b], sort_categories=True) ['b', 'c', 'a', 'b'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what @@ -241,7 +241,7 @@ def union_categoricals( >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> pd.api.types.union_categoricals([a, b]) ['a', 'b', 'a', 'b', 'a'] - Categories (2, object): ['a' < 'b'] + Categories (2, str): ['a' < 'b'] Raises `TypeError` because the categories are ordered and not identical. @@ -259,7 +259,7 @@ def union_categoricals( >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> pd.api.types.union_categoricals([a, b], ignore_order=True) ['a', 'b', 'c', 'c', 'b', 'a'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will @@ -269,7 +269,7 @@ def union_categoricals( >>> b = pd.Series(["a", "b"], dtype="category") >>> pd.api.types.union_categoricals([a, b]) ['b', 'c', 'a', 'b'] - Categories (3, object): ['b', 'c', 'a'] + Categories (3, str): ['b', 'c', 'a'] """ from pandas import Categorical from pandas.core.arrays.categorical import recode_for_categories diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 912421dff1026..79976cfd51352 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -209,7 +209,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): 2 a 3 NaN dtype: category - Categories (2, object): ['b' < 'a'] + Categories (2, str): ['b' < 'a'] An empty CategoricalDtype with a specific dtype can be created by providing an empty index. As follows, @@ -302,7 +302,7 @@ def _from_values_or_dtype( >>> pd.CategoricalDtype._from_values_or_dtype( ... categories=["a", "b"], ordered=True ... ) - CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object) + CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=str) >>> dtype1 = pd.CategoricalDtype(["a", "b"], ordered=True) >>> dtype2 = pd.CategoricalDtype(["x", "y"], ordered=False) >>> c = pd.Categorical([0, 1], dtype=dtype1) @@ -317,7 +317,7 @@ def _from_values_or_dtype( The supplied dtype takes precedence over values' dtype: >>> pd.CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) - CategoricalDtype(categories=['x', 'y'], ordered=False, categories_dtype=object) + CategoricalDtype(categories=['x', 'y'], ordered=False, categories_dtype=str) """ if dtype is not None: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 034b861a83f43..44b5df6a4f1b2 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -175,14 +175,14 @@ def cut( >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, labels=["bad", "medium", "good"]) ['bad', 'good', 'medium', 'medium', 'good', 'bad'] - Categories (3, object): ['bad' < 'medium' < 'good'] + Categories (3, str): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, labels=["B", "A", "B"], ordered=False) ['B', 'B', 'A', 'A', 'B', 'B'] - Categories (2, object): ['A', 'B'] + Categories (2, str): ['A', 'B'] ``labels=False`` implies you just want the bins back. @@ -349,7 +349,7 @@ def qcut( >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) ... # doctest: +SKIP [good, good, medium, bad, bad] - Categories (3, object): [good < medium < bad] + Categories (3, str): [good < medium < bad] >>> pd.qcut(range(5), 4, labels=False) array([0, 0, 1, 2, 3]) diff --git a/pandas/core/series.py b/pandas/core/series.py index ce5b2e5ed8de5..63ef68f202a6e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -764,11 +764,13 @@ def values(self): array([1, 2, 3]) >>> pd.Series(list("aabc")).values - array(['a', 'a', 'b', 'c'], dtype=object) + + ['a', 'a', 'b', 'c'] + Length: 4, dtype: str >>> pd.Series(list("aabc")).astype("category").values ['a', 'a', 'b', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] Timezone aware datetime data is converted to UTC: @@ -2144,12 +2146,12 @@ def unique(self) -> ArrayLike: >>> pd.Series(pd.Categorical(list("baabc"))).unique() ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] + Categories (3, str): ['a', 'b', 'c'] >>> pd.Series( ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) ... ).unique() ['b', 'a', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + Categories (3, str): ['a' < 'b' < 'c'] """ return super().unique() diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 6b4f6c05c3123..59911a57acc02 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -746,9 +746,9 @@ def read_json( >>> df.to_json(orient='table') '\ {{"schema":{{"fields":[\ -{{"name":"index","type":"string"}},\ -{{"name":"col 1","type":"string"}},\ -{{"name":"col 2","type":"string"}}],\ +{{"name":"index","type":"string","extDtype":"str"}},\ +{{"name":"col 1","type":"string","extDtype":"str"}},\ +{{"name":"col 2","type":"string","extDtype":"str"}}],\ "primaryKey":["index"],\ "pandas_version":"1.4.0"}},\ "data":[\ diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 3a98189ac1a96..feca60c6e28a2 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -183,7 +183,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: ... "ordered": True, ... } ... ) - CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=object) + CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=str) >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"}) 'datetime64[ns]' @@ -292,7 +292,7 @@ def build_table_schema( {'fields': \ [{'name': 'idx', 'type': 'integer'}, \ {'name': 'A', 'type': 'integer'}, \ -{'name': 'B', 'type': 'string'}, \ +{'name': 'B', 'type': 'string', 'extDtype': 'str'}, \ {'name': 'C', 'type': 'datetime'}], \ 'primaryKey': ['idx'], \ 'pandas_version': '1.4.0'}