diff --git a/README.md b/README.md index c0709b6..7402acc 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,61 @@ -# daz : Denormals are zeros -The tool to change the CPU flag about denormals number. - [![pypi](https://img.shields.io/pypi/v/daz.svg)](https://pypi.python.org/pypi/daz) [![GitHub license](https://img.shields.io/github/license/chainer/daz.svg)](https://github.com/chainer/daz) [![travis](https://img.shields.io/travis/chainer/daz/master.svg)](https://travis-ci.org/chainer/daz) +# daz : Denormals are zeros +The tool to change the CPU flag about denormals number. + + * **DAZ** (Denormals-Are-Zero) treats denormal inputs as zero + * **FTZ** (Flush-To-Zero) writes zero for denormal outputs + + +# Application + +On x86-64 CPUs with 64-bit programs, the CPUs SSE unit performs the floating point operations. +When it comes to calculate with denormal (aka. subnormal) numbers, there are performance penalties. + +If your specific use-case doesn't require highest accuracy with small numbers, +these can be treated as - or rounded to - zero. +This is achieved by setting the CPU-flags. +When doing so, the calculations won't be slowed down by factors! + +See [Wikipedia](https://en.wikipedia.org/wiki/Subnormal_number) for additional infos on denormal numbers. + +In python, especially [NumPy](https://numpy.org/) functions show a measurable benefit. + # Usage +basic functional use: ```python import daz daz.set_ftz() daz.set_daz() +daz.unset_ftz() +daz.unset_daz() +daz.get_ftz() +daz.get_daz() +``` + +alternative 1: +```python +from daz import DAZ +# prev_daz: bool = DAZ.set_daz(daz: bool | None = True) +# prev_ftz: bool = DAZ.set_ftz(ftz: bool | None = True) +prev_daz = DAZ.set_daz(True) +prev_ftz = DAZ.set_ftz() +``` + +alternative 2: +```python +from daz import DAZ + +# DAZ(daz: bool = True, ftz: bool = True) +with DAZ(): + # daz and ftz set True + pass + +with DAZ(False, True): + # daz unset, but ftz set True + pass ``` diff --git a/daz/__init__.py b/daz/__init__.py index b76dc1f..7dc7ee4 100644 --- a/daz/__init__.py +++ b/daz/__init__.py @@ -1,5 +1,4 @@ -from daz._core import set_daz # NOQA -from daz._core import set_ftz # NOQA -from daz._core import unset_daz # NOQA -from daz._core import unset_ftz # NOQA -from daz._version import __version__ # NOQA +from daz._core import get_daz, set_daz, unset_daz +from daz._core import get_ftz, set_ftz, unset_ftz +from daz._version import __version__ +from .daz import DAZ diff --git a/daz/_core.c b/daz/_core.c index a0ce206..fe2d0b4 100644 --- a/daz/_core.c +++ b/daz/_core.c @@ -1,38 +1,69 @@ #include + +#if defined(__SSE__) #include +#endif static PyObject* set_daz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr |= (1<<6); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } static PyObject* set_ftz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr |= (1<<15); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } +static PyObject* get_daz(void) +{ +#if defined(__SSE__) + unsigned int mxcsr = _mm_getcsr(); + if((1<<6)&mxcsr) + Py_RETURN_TRUE; +#endif + Py_RETURN_FALSE; +} + +static PyObject* get_ftz(void) +{ +#if defined(__SSE__) + unsigned int mxcsr = _mm_getcsr(); + if((1<<15)&mxcsr) + Py_RETURN_TRUE; +#endif + Py_RETURN_FALSE; +} + static PyObject* unset_daz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr &= ~(1<<6); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } static PyObject* unset_ftz(void) { +#if defined(__SSE__) unsigned int mxcsr = _mm_getcsr(); mxcsr &= ~(1<<15); _mm_setcsr(mxcsr); +#endif Py_INCREF(Py_None); return Py_None; } @@ -40,6 +71,8 @@ static PyObject* unset_ftz(void) static PyMethodDef methods[] = { {"set_ftz", (PyCFunction)set_ftz, METH_NOARGS, 0}, {"set_daz", (PyCFunction)set_daz, METH_NOARGS, 0}, + {"get_ftz", (PyCFunction)get_ftz, METH_NOARGS, 0}, + {"get_daz", (PyCFunction)get_daz, METH_NOARGS, 0}, {"unset_ftz", (PyCFunction)unset_ftz, METH_NOARGS, 0}, {"unset_daz", (PyCFunction)unset_daz, METH_NOARGS, 0}, {NULL, NULL, 0, NULL} diff --git a/daz/_version.py b/daz/_version.py index b8023d8..3b93d0b 100644 --- a/daz/_version.py +++ b/daz/_version.py @@ -1 +1 @@ -__version__ = '0.0.1' +__version__ = "0.0.2" diff --git a/daz/daz.py b/daz/daz.py new file mode 100755 index 0000000..8fc51cd --- /dev/null +++ b/daz/daz.py @@ -0,0 +1,40 @@ + +import daz._core as daz_core + +class DAZ: + def __init__(self, daz: bool = True, ftz: bool = True) -> None: + self.daz: bool = daz + self.ftz: bool = ftz + self._prev_daz: bool = False + self._prev_ftz: bool = False + + @staticmethod + def set_daz(daz: bool | None = True) -> bool: + """sets 'Denormals-Are-Zero'-flag and return previous value""" + prev = daz_core.get_daz() + if daz is not None: + if daz is True: + daz_core.set_daz() + else: + daz_core.unset_daz() + return prev + + @staticmethod + def set_ftz(ftz: bool | None = True) -> bool: + """sets 'Flush-To-Zero'-flag and return previous value""" + prev = daz_core.get_ftz() + if ftz is not None: + if ftz is True: + daz_core.set_ftz() + else: + daz_core.unset_ftz() + return prev + + def __enter__(self) -> tuple[bool, bool]: + self._prev_daz = DAZ.set_daz(self.daz) + self._prev_ftz = DAZ.set_ftz(self.ftz) + return self.daz, self.ftz + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + DAZ.set_daz(self._prev_daz) + DAZ.set_ftz(self._prev_ftz) diff --git a/daz/daz.pyi b/daz/daz.pyi new file mode 100644 index 0000000..b42c7b0 --- /dev/null +++ b/daz/daz.pyi @@ -0,0 +1,15 @@ +def set_ftz() -> None: ... +def set_daz() -> None: ... +def unset_ftz() -> None: ... +def unset_daz() -> None: ... +def get_ftz() -> bool: ... +def get_daz() -> bool: ... + +class DAZ: + def __init__(self, daz: bool = True, ftz: bool = True) -> None: ... + @staticmethod + def set_daz(daz: bool | None = True) -> bool: ... + @staticmethod + def set_ftz(ftz: bool | None = True) -> bool: ... + def __enter__(self) -> tuple[bool, bool]: ... + def __exit__(self, exc_type, exc_val, exc_tb) -> None: ... diff --git a/daz/py.typed b/daz/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index f7b9d8c..8bb94bd 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ def reduce_requirements(reqs): packages=[ 'daz', ], + package_data={"daz": ["py.typed", "daz.pyi"]}, ext_modules=[setuptools.Extension('daz._core', ['daz/_core.c'])], zip_safe=False, extras_require=extras_require, diff --git a/tests/test_daz.py b/tests/test_daz.py index c770a55..54f93cb 100644 --- a/tests/test_daz.py +++ b/tests/test_daz.py @@ -16,12 +16,16 @@ def setUp(self): def check_normal(self): assert self.normal == self.denormal * self.scale assert self.normal / self.scale == self.denormal + assert not daz.get_daz() + assert not daz.get_ftz() def test_normal(self): self.check_normal() def test_daz(self): daz.set_daz() + assert daz.get_daz() + assert not daz.get_ftz() assert self.normal / self.scale == 0 assert self.denormal * self.scale == 0 assert self.denormal == 0 @@ -30,6 +34,8 @@ def test_daz(self): def test_ftz(self): daz.set_ftz() + assert daz.get_ftz() + assert not daz.get_daz() assert self.normal / self.scale == 0 assert self.denormal * self.scale == self.normal assert self.denormal != 0