Skip to content

Commit 800fa26

Browse files
authored
Merge pull request #17 from StreetEasy/bug/datetime-dtype-generation
Bug/datetime dtype generation
2 parents 61b33ae + 77107ce commit 800fa26

13 files changed

+44
-27
lines changed

.flake8

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[flake8]
22
max-line-length = 88
3-
ignore = E501, E203, W503, E265
3+
ignore = E501, E203, W503, E265, E231
44
per-file-ignores = __init__.py:F401
55
exclude =
66
.git

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22
**/__pycache__/*
33
*.pyc
44
.DS_Store
5-
**/.DS_Store
5+
**/.DS_Store
6+
7+
.hypothesis/*

changelog.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
4+
5+
6+
v0.0.5:
7+
- fix column dtype generation/validation bug
8+
9+
## Pre-Publication
310
v1.3.0
411
- renamed strict_column_set to additionalColumns
512
- renamed strict_column_order to exactColumnOrder

dfschema/core/column.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def _validate_dtype(self, series: pd.Series) -> None:
249249
if not self._dtype_test_func[_dtype](series):
250250
txt = _tmplt.format(self.name, series.dtype, _dtype, self.dtype)
251251
raise DataFrameValidationError(txt)
252-
elif series.dtype != self._dtype:
252+
elif series.dtype != _dtype:
253253
txt = _tmplt.format(self.name, series.dtype, _dtype, self.dtype)
254254
raise DataFrameValidationError(txt)
255255

dfschema/core/dtype.py

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"int32": "int",
3232
"int16": "int",
3333
# time
34+
"datetime64[ns]": "datetime64[ns]",
3435
"datetime": "datetime64[ns]",
3536
"date": "datetime64[ns]",
3637
"timedelta": "timedelta64[ns]",

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dfschema"
3-
version = "0.0.4" # set via gitlab-ci
3+
version = "0.0.5" # set via gitlab-ci
44
description = "lightweight pandas.DataFrame schema"
55
authors = ["Philipp <[email protected]>"]
66
readme = "README.md"

scripts/generate_jsonschema.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
if __name__ == "__main__":
2-
from dfs.core.core import DfSchema
2+
from dfschema.core.core import DfSchema
33

44
with open("./jsonschemas/schema.json", "w") as f:
55
f.write(DfSchema.schema_json(indent=2))

scripts/generate_v2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from dfs.core.core import DfSchema
2+
from dfschema.core.core import DfSchema
33

44

55
def get_files(path: Path) -> list:

tests/conftest.py

+9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ def df3():
3030
return pd.DataFrame({"x": [np.nan] * 4, "y": ["foo", "bar", "baz", np.nan]})
3131

3232

33+
@pytest.fixture()
34+
def df4():
35+
df = pd.DataFrame(
36+
{"x": [1, 2, 3, 4], "y": ["foo", "bar", "baz", None], "z": ["2022-10-23",] * 4}
37+
)
38+
df["z"] = pd.to_datetime(df["z"])
39+
return df
40+
41+
3342
# This section for `test_jsonvalidate.py`
3443

3544

tests/test_generate.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# import pytest
22

33

4-
def test_generate(df1):
4+
def test_generate_df1(df1):
55
from dfschema.core import DfSchema
66

77
print(df1.dtypes)
@@ -13,3 +13,17 @@ def test_generate(df1):
1313
raise Exception(sd, e)
1414

1515
S.validate_df(df1) # type: ignore
16+
17+
18+
def test_generate_df4(df4):
19+
from dfschema.core import DfSchema
20+
21+
print(df4.dtypes)
22+
23+
try:
24+
S = DfSchema.from_df(df4)
25+
except Exception as e: # for debugging
26+
sd = DfSchema.from_df(df4, return_dict=True)
27+
raise Exception(sd, e)
28+
29+
S.validate_df(df4) # type: ignore

tests/test_numeric.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,7 @@
4444
],
4545
"df2": [ # protocol 1.0
4646
{
47-
"columns": {
48-
"x": {"min_value": 1},
49-
"y": {"dtype": "string"},
50-
},
47+
"columns": {"x": {"min_value": 1}, "y": {"dtype": "string"},},
5148
"strict_cols": True,
5249
},
5350
{

tests/test_subsets.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,7 @@ def test_subset_dict(df_subset):
2121
{
2222
"predicate": {"y": "baz"},
2323
"columns": [
24-
{
25-
"name": "x",
26-
"dtype": "int",
27-
"value_limits": {"min": 3, "max": 3},
28-
}
24+
{"name": "x", "dtype": "int", "value_limits": {"min": 3, "max": 3},}
2925
],
3026
},
3127
],
@@ -49,11 +45,7 @@ def test_subset_query(df_subset):
4945
"predicate": "x >= 3",
5046
"shape": {"rows": 2},
5147
"columns": [
52-
{
53-
"name": "x",
54-
"dtype": "int",
55-
"value_limits": {"max": 4, "min": 3},
56-
}
48+
{"name": "x", "dtype": "int", "value_limits": {"max": 4, "min": 3},}
5749
],
5850
},
5951
],

tests/test_validate.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,7 @@ def test_validate_df(df1, schema):
4040
{"shape": {"min_cols": 3}},
4141
{"columns": ["x", "y", "z"]},
4242
{"columns": {"x": {"dtype": "floating"}, "y": {"dtype": "floating"}}},
43-
{
44-
"columns": {
45-
"x": {"dtype": "int"},
46-
"y": {"dtype": "character", "na_limit": 0.2},
47-
}
48-
},
43+
{"columns": {"x": {"dtype": "int"}, "y": {"dtype": "character", "na_limit": 0.2},}},
4944
]
5045

5146

0 commit comments

Comments
 (0)