Quantco · borchero · May 15, 2025 · May 5, 2025 · May 5, 2025 · May 14, 2025
@@ -0,0 +1,39 @@
+# Copyright (c) QuantCo 2025-2025
+# SPDX-License-Identifier: BSD-3-Clause
+
+import os
+import warnings
+from collections.abc import Callable
+from functools import wraps
+
+TRUTHY_VALUES = ["1", "true"]
+
+
+def skip_if(env: str) -> Callable:
+    """Decorator to skip warnings based on environment variable.
+
+    If the environment variable is equivalent to any of TRUTHY_VALUES, the wrapped
+    function is skipped.
+    """
+
+    def decorator(fun: Callable) -> Callable:
+        @wraps(fun)
+        def wrapper() -> None:
+            if os.getenv(env, "").lower() in TRUTHY_VALUES:
+                return
+            fun()
+
+        return wrapper
+
+    return decorator
+
+
+@skip_if(env="DATAFRAMELY_NO_FUTURE_WARNINGS")
+def warn_nullable_default_change() -> None:
+    warnings.warn(
+        "The 'nullable' argument was not explicitly set. In a future release, "
+        "'nullable=False' will be the default if 'nullable' is not specified. "
+        "Explicitly set 'nullable=True' if you want your column to be nullable.",
+        FutureWarning,
+        stacklevel=4,
+    )
@@ -10,6 +10,7 @@
 import polars as pl
 
 from dataframely._compat import pa, sa, sa_TypeEngine
+from dataframely._deprecation import warn_nullable_default_change
 from dataframely._polars import PolarsDataType
 from dataframely.random import Generator
 
@@ -28,7 +29,7 @@ class Column(ABC):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -37,6 +38,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             check: A custom check to run for this column. Must return a non-aggregated
@@ -48,6 +52,10 @@ def __init__(
                 internally sets the alias to the column's name in the parent schema.
             metadata: A dictionary of metadata to attach to the column.
         """
+        if nullable is None:
+            warn_nullable_default_change()
+            nullable = True
+
         self.nullable = nullable and not primary_key
         self.primary_key = primary_key
         self.check = check

@@ -32,7 +32,7 @@ class Date(OrdinalMixin[dt.date], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.date | None = None,
         min_exclusive: dt.date | None = None,
@@ -46,6 +46,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum date for dates in this column (inclusive).
@@ -142,7 +145,7 @@ class Time(OrdinalMixin[dt.time], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.time | None = None,
         min_exclusive: dt.time | None = None,
@@ -156,6 +159,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum time for times in this column (inclusive).
@@ -258,7 +264,7 @@ class Datetime(OrdinalMixin[dt.datetime], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.datetime | None = None,
         min_exclusive: dt.datetime | None = None,
@@ -272,6 +278,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum datetime for datetimes in this column (inclusive).
@@ -364,7 +373,7 @@ class Duration(OrdinalMixin[dt.timedelta], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: dt.timedelta | None = None,
         min_exclusive: dt.timedelta | None = None,
@@ -378,6 +387,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum duration for durations in this column (inclusive).

@@ -27,7 +27,7 @@ def __init__(
         precision: int | None = None,
         scale: int = 0,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: decimal.Decimal | None = None,
         min_exclusive: decimal.Decimal | None = None,
@@ -42,6 +42,9 @@ def __init__(
             precision: Maximum number of digits in each number.
             scale: Number of digits to the right of the decimal point in each number.
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum value for decimals in this column (inclusive).

@@ -22,7 +22,7 @@ def __init__(
         self,
         categories: Sequence[str],
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -32,6 +32,9 @@ def __init__(
         Args:
             categories: The list of valid categories for the enum.
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             check: A custom check to run for this column. Must return a non-aggregated

@@ -26,7 +26,7 @@ class _BaseFloat(OrdinalMixin[float], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         allow_inf_nan: bool = False,
         min: float | None = None,
@@ -40,6 +40,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             allow_inf_nan: Whether this column may contain NaN and infinity values.

@@ -23,7 +23,7 @@ class _BaseInteger(IsInMixin[int], OrdinalMixin[int], Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min: int | None = None,
         min_exclusive: int | None = None,
@@ -37,6 +37,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
                 If ``True``, ``nullable`` is automatically set to ``False``.
             min: The minimum value for integers in this column (inclusive).

@@ -24,7 +24,7 @@ def __init__(
         self,
         inner: Column,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -40,6 +40,9 @@ def __init__(
                 must be unique across all list items. Note that if the struct itself has
                 ``primary_key=True`` set, the fields' settings do not take effect.
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
             check: A custom check to run for this column. Must return a non-aggregated
                 boolean expression.

@@ -23,7 +23,7 @@ class String(Column):
     def __init__(
         self,
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         min_length: int | None = None,
         max_length: int | None = None,
@@ -35,6 +35,9 @@ def __init__(
         """
         Args:
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
             min_length: The minimum byte-length of string values in this column.
             max_length: The maximum byte-length of string values in this column.

@@ -22,7 +22,7 @@ def __init__(
         self,
         inner: dict[str, Column],
         *,
-        nullable: bool = True,
+        nullable: bool | None = None,
         primary_key: bool = False,
         check: Callable[[pl.Expr], pl.Expr] | None = None,
         alias: str | None = None,
@@ -35,6 +35,9 @@ def __init__(
                 struct is nested inside a list. In this case, the list items must be
                 unique wrt. the struct fields that have ``primary_key=True`` set.
             nullable: Whether this column may contain null values.
+                Explicitly set `nullable=True` if you want your column to be nullable.
+                In a future release, `nullable=False` will be the default if `nullable`
+                is not specified.
             primary_key: Whether this column is part of the primary key of the schema.
             check: A custom check to run for this column. Must return a non-aggregated
                 boolean expression.

@@ -22,8 +22,8 @@ requires-python = ">=3.11"
 version = "0.0.0"
 
 [project.urls]
-Repository = "https://github.com/quantco/dataframely"
 Documentation = "https://dataframely.readthedocs.io/"
+Repository = "https://github.com/quantco/dataframely"
 
 [tool.maturin]
 module-name = "dataframely._extre"
@@ -82,7 +82,8 @@ module = ["pyarrow.*"]
 [tool.pytest.ini_options]
 addopts = "--import-mode=importlib"
 filterwarnings = [
-  "ignore:datetime.datetime.utcfromtimestamp\\(\\) is deprecated.*:DeprecationWarning",
+  # Almost all tests are oblivious to the value of `nullable`. Let's ignore the warning as long as it exists.
+  "ignore:The 'nullable' argument was not explicitly set:FutureWarning",
 ]
 testpaths = ["tests"]
 

@@ -0,0 +1,28 @@
+# Copyright (c) QuantCo 2025-2025
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+
+import pytest
+
+import dataframely as dy
+
+
+def test_column_constructor_warns_about_nullable(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DATAFRAMELY_NO_FUTURE_WARNINGS", "")
+    with pytest.warns(
+        FutureWarning, match="The 'nullable' argument was not explicitly set"
+    ):
+        dy.Integer()
+
+
+@pytest.mark.parametrize("env_var", ["1", "True", "true"])
+def test_future_warning_skip(monkeypatch: pytest.MonkeyPatch, env_var: str) -> None:
+    monkeypatch.setenv("DATAFRAMELY_NO_FUTURE_WARNINGS", env_var)
+
+    # Elevates FutureWarning to an exception
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", FutureWarning)
+        dy.Integer()