From 9d882f2565abc6d76a997e606f5cbe8aa507dc67 Mon Sep 17 00:00:00 2001 From: Simon Shanks Date: Tue, 4 Oct 2022 12:22:00 +0100 Subject: [PATCH 1/5] get_daz and get_ftz for inspecting settings --- daz/__init__.py | 2 ++ daz/_core.c | 18 ++++++++++++++++++ tests/test_daz.py | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/daz/__init__.py b/daz/__init__.py index b76dc1f..d23fb78 100644 --- a/daz/__init__.py +++ b/daz/__init__.py @@ -1,5 +1,7 @@ from daz._core import set_daz # NOQA from daz._core import set_ftz # NOQA +from daz._core import get_daz # NOQA +from daz._core import get_ftz # NOQA from daz._core import unset_daz # NOQA from daz._core import unset_ftz # NOQA from daz._version import __version__ # NOQA diff --git a/daz/_core.c b/daz/_core.c index a0ce206..c3b7e8d 100644 --- a/daz/_core.c +++ b/daz/_core.c @@ -19,6 +19,22 @@ static PyObject* set_ftz(void) return Py_None; } +static PyObject* get_daz(void) +{ + unsigned int mxcsr = _mm_getcsr(); + if((1<<6)&mxcsr) + Py_RETURN_TRUE; + Py_RETURN_FALSE; +} + +static PyObject* get_ftz(void) +{ + unsigned int mxcsr = _mm_getcsr(); + if((1<<15)&mxcsr) + Py_RETURN_TRUE; + Py_RETURN_FALSE; +} + static PyObject* unset_daz(void) { unsigned int mxcsr = _mm_getcsr(); @@ -40,6 +56,8 @@ static PyObject* unset_ftz(void) static PyMethodDef methods[] = { {"set_ftz", (PyCFunction)set_ftz, METH_NOARGS, 0}, {"set_daz", (PyCFunction)set_daz, METH_NOARGS, 0}, + {"get_ftz", (PyCFunction)get_ftz, METH_NOARGS, 0}, + {"get_daz", (PyCFunction)get_daz, METH_NOARGS, 0}, {"unset_ftz", (PyCFunction)unset_ftz, METH_NOARGS, 0}, {"unset_daz", (PyCFunction)unset_daz, METH_NOARGS, 0}, {NULL, NULL, 0, NULL} diff --git a/tests/test_daz.py b/tests/test_daz.py index c770a55..54f93cb 100644 --- a/tests/test_daz.py +++ b/tests/test_daz.py @@ -16,12 +16,16 @@ def setUp(self): def check_normal(self): assert self.normal == self.denormal * self.scale assert self.normal / self.scale == self.denormal + assert not daz.get_daz() + assert not daz.get_ftz() def test_normal(self): self.check_normal() def test_daz(self): daz.set_daz() + assert daz.get_daz() + assert not daz.get_ftz() assert self.normal / self.scale == 0 assert self.denormal * self.scale == 0 assert self.denormal == 0 @@ -30,6 +34,8 @@ def test_daz(self): def test_ftz(self): daz.set_ftz() + assert daz.get_ftz() + assert not daz.get_daz() assert self.normal / self.scale == 0 assert self.denormal * self.scale == self.normal assert self.denormal != 0 From 442d894c8f2f3febf52723c044b2417d3b8945b7 Mon Sep 17 00:00:00 2001 From: Mark S Date: Wed, 17 Aug 2022 14:03:09 -0400 Subject: [PATCH 2/5] feat: Enable compilation on/for `arm64` targets --- daz/_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/daz/_core.c b/daz/_core.c index c3b7e8d..375ec95 100644 --- a/daz/_core.c +++ b/daz/_core.c @@ -1,5 +1,8 @@ #include + +#if defined(__SSE__) #include +#endif static PyObject* set_daz(void) { From 0d08e7706fb9a514800c8afa1836f864ca13c1b3 Mon Sep 17 00:00:00 2001 From: hayati ayguen Date: Sun, 13 Aug 2023 00:24:28 +0200 Subject: [PATCH 3/5] fix compilation on non-SSE like ARM - finally doing nothing! Signed-off-by: hayati ayguen --- daz/_core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/daz/_core.c b/daz/_core.c index 375ec95..fe2d0b4 100644 --- a/daz/_core.c +++ b/daz/_core.c @@ -6,52 +6,64 @@ static PyObject* set_daz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr |= (1<<6); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } static PyObject* set_ftz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr |= (1<<15); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } static PyObject* get_daz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); if((1<<6)&mxcsr) Py_RETURN_TRUE; +#endif Py_RETURN_FALSE; } static PyObject* get_ftz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); if((1<<15)&mxcsr) Py_RETURN_TRUE; +#endif Py_RETURN_FALSE; } static PyObject* unset_daz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr &= ~(1<<6); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } static PyObject* unset_ftz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr &= ~(1<<15); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } From d03a277b9ead8bfe4e7d0f5f18fefe9d8889322e Mon Sep 17 00:00:00 2001 From: hayati ayguen Date: Sat, 12 Aug 2023 22:47:17 +0200 Subject: [PATCH 4/5] updated docs, added type hints, added 'DAZ' class-API and context manager Signed-off-by: hayati ayguen --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ daz/__init__.py | 11 ++++------- daz/_version.py | 2 +- daz/daz.py | 40 ++++++++++++++++++++++++++++++++++++++++ daz/daz.pyi | 15 +++++++++++++++ daz/py.typed | 0 setup.py | 1 + 7 files changed, 105 insertions(+), 8 deletions(-) create mode 100755 daz/daz.py create mode 100644 daz/daz.pyi create mode 100644 daz/py.typed diff --git a/README.md b/README.md index c0709b6..8b15d4f 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,55 @@ The tool to change the CPU flag about denormals number. [![GitHub license](https://img.shields.io/github/license/chainer/daz.svg)](https://github.com/chainer/daz) [![travis](https://img.shields.io/travis/chainer/daz/master.svg)](https://travis-ci.org/chainer/daz) + * **DAZ** (Denormals-Are-Zero) treats denormal inputs as zero + * **FTZ** (Flush-To-Zero) writes zero for denormal outputs + + +# Application + +On x86-64 CPUs with 64-bit programs, the CPUs SSE unit performs the floating point operations. +When it comes to calculate with denormal (aka. subnormal) numbers, there are performance penalties. + +If your specific use-case doesn't require highest precision with small numbers, +these can be treated as - or rounded to - zero. +This is achieved by setting the CPU-flags. +When doing so, the calculations won't be slowed down by factors! + +In python, especially [NumPy](https://numpy.org/) functions show a measurable benefit. + # Usage +basic functional use: ```python import daz daz.set_ftz() daz.set_daz() +daz.unset_ftz() +daz.unset_daz() +daz.get_ftz() +daz.get_daz() +``` + +alternative 1: +```python +from daz import DAZ +# prev_daz: bool = DAZ.set_daz(daz: bool | None = True) +# prev_ftz: bool = DAZ.set_ftz(ftz: bool | None = True) +prev_daz = DAZ.set_daz(True) +prev_ftz = DAZ.set_ftz() +``` + +alternative 2: +```python +from daz import DAZ + +# DAZ(daz: bool = True, ftz: bool = True) +with DAZ(): + # daz and ftz set True + pass + +with DAZ(False, True): + # daz unset, but ftz set True + pass ``` diff --git a/daz/__init__.py b/daz/__init__.py index d23fb78..7dc7ee4 100644 --- a/daz/__init__.py +++ b/daz/__init__.py @@ -1,7 +1,4 @@ -from daz._core import set_daz # NOQA -from daz._core import set_ftz # NOQA -from daz._core import get_daz # NOQA -from daz._core import get_ftz # NOQA -from daz._core import unset_daz # NOQA -from daz._core import unset_ftz # NOQA -from daz._version import __version__ # NOQA +from daz._core import get_daz, set_daz, unset_daz +from daz._core import get_ftz, set_ftz, unset_ftz +from daz._version import __version__ +from .daz import DAZ diff --git a/daz/_version.py b/daz/_version.py index b8023d8..3b93d0b 100644 --- a/daz/_version.py +++ b/daz/_version.py @@ -1 +1 @@ -__version__ = '0.0.1' +__version__ = "0.0.2" diff --git a/daz/daz.py b/daz/daz.py new file mode 100755 index 0000000..8fc51cd --- /dev/null +++ b/daz/daz.py @@ -0,0 +1,40 @@ + +import daz._core as daz_core + +class DAZ: + def __init__(self, daz: bool = True, ftz: bool = True) -> None: + self.daz: bool = daz + self.ftz: bool = ftz + self._prev_daz: bool = False + self._prev_ftz: bool = False + + @staticmethod + def set_daz(daz: bool | None = True) -> bool: + """sets 'Denormals-Are-Zero'-flag and return previous value""" + prev = daz_core.get_daz() + if daz is not None: + if daz is True: + daz_core.set_daz() + else: + daz_core.unset_daz() + return prev + + @staticmethod + def set_ftz(ftz: bool | None = True) -> bool: + """sets 'Flush-To-Zero'-flag and return previous value""" + prev = daz_core.get_ftz() + if ftz is not None: + if ftz is True: + daz_core.set_ftz() + else: + daz_core.unset_ftz() + return prev + + def __enter__(self) -> tuple[bool, bool]: + self._prev_daz = DAZ.set_daz(self.daz) + self._prev_ftz = DAZ.set_ftz(self.ftz) + return self.daz, self.ftz + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + DAZ.set_daz(self._prev_daz) + DAZ.set_ftz(self._prev_ftz) diff --git a/daz/daz.pyi b/daz/daz.pyi new file mode 100644 index 0000000..b42c7b0 --- /dev/null +++ b/daz/daz.pyi @@ -0,0 +1,15 @@ +def set_ftz() -> None: ... +def set_daz() -> None: ... +def unset_ftz() -> None: ... +def unset_daz() -> None: ... +def get_ftz() -> bool: ... +def get_daz() -> bool: ... + +class DAZ: + def __init__(self, daz: bool = True, ftz: bool = True) -> None: ... + @staticmethod + def set_daz(daz: bool | None = True) -> bool: ... + @staticmethod + def set_ftz(ftz: bool | None = True) -> bool: ... + def __enter__(self) -> tuple[bool, bool]: ... + def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... diff --git a/daz/py.typed b/daz/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index f7b9d8c..8bb94bd 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ def reduce_requirements(reqs): packages=[ 'daz', ], + package_data={"daz": ["py.typed", "daz.pyi"]}, ext_modules=[setuptools.Extension('daz._core', ['daz/_core.c'])], zip_safe=False, extras_require=extras_require, From 61a6a6d8ad582d2eda267f0ef61abc88e2b00709 Mon Sep 17 00:00:00 2001 From: hayati ayguen Date: Mon, 14 Aug 2023 21:39:23 +0200 Subject: [PATCH 5/5] Update README.md --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8b15d4f..7402acc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# daz : Denormals are zeros -The tool to change the CPU flag about denormals number. - [![pypi](https://img.shields.io/pypi/v/daz.svg)](https://pypi.python.org/pypi/daz) [![GitHub license](https://img.shields.io/github/license/chainer/daz.svg)](https://github.com/chainer/daz) [![travis](https://img.shields.io/travis/chainer/daz/master.svg)](https://travis-ci.org/chainer/daz) +# daz : Denormals are zeros +The tool to change the CPU flag about denormals number. + * **DAZ** (Denormals-Are-Zero) treats denormal inputs as zero * **FTZ** (Flush-To-Zero) writes zero for denormal outputs @@ -14,11 +14,13 @@ The tool to change the CPU flag about denormals number. On x86-64 CPUs with 64-bit programs, the CPUs SSE unit performs the floating point operations. When it comes to calculate with denormal (aka. subnormal) numbers, there are performance penalties. -If your specific use-case doesn't require highest precision with small numbers, +If your specific use-case doesn't require highest accuracy with small numbers, these can be treated as - or rounded to - zero. This is achieved by setting the CPU-flags. When doing so, the calculations won't be slowed down by factors! +See [Wikipedia](https://en.wikipedia.org/wiki/Subnormal_number) for additional infos on denormal numbers. + In python, especially [NumPy](https://numpy.org/) functions show a measurable benefit.