diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl new file mode 100644 index 0000000000..d3b3543a28 --- /dev/null +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -0,0 +1,148 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace bxdf +{ + +namespace impl +{ +template<typename T> +struct orientedEtas; + +template<> +struct orientedEtas<float> +{ + static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta) + { + const bool backside = NdotI < 0.0; + const float rcpEta = 1.0 / eta; + orientedEta = backside ? rcpEta : eta; + rcpOrientedEta = backside ? eta : rcpEta; + return backside; + } +}; + +template<> +struct orientedEtas<float32_t3> +{ + static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta) + { + const bool backside = NdotI < 0.0; + const float32_t3 rcpEta = (float32_t3)1.0 / eta; + orientedEta = backside ? rcpEta:eta; + rcpOrientedEta = backside ? eta:rcpEta; + return backside; + } +}; +} + +template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>) +bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta) +{ + return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta); +} + + +template <typename T NBL_FUNC_REQUIRES(concepts::Vectorial<T>) +T reflect(T I, T N, typename vector_traits<T>::scalar_type NdotI) +{ + return N * 2.0f * NdotI - I; +} + +template<typename T NBL_PRIMARY_REQUIRES(vector_traits<T>::Dimension == 3) +struct refract +{ + using this_t = refract<T>; + using vector_type = T; + using scalar_type = typename vector_traits<T>::scalar_type; + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, bool backside, scalar_type NdotI, scalar_type NdotI2, scalar_type rcpOrientedEta, scalar_type rcpOrientedEta2) + { + this_t retval; + retval.I = I; + retval.N = N; + retval.backside = backside; + retval.NdotI = NdotI; + retval.NdotI2 = NdotI2; + retval.rcpOrientedEta = rcpOrientedEta; + retval.rcpOrientedEta2 = rcpOrientedEta2; + return retval; + } + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type NdotI, scalar_type eta) + { + this_t retval; + retval.I = I; + retval.N = N; + scalar_type orientedEta; + retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, NdotI, eta); + retval.NdotI = NdotI; + retval.NdotI2 = NdotI * NdotI; + retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; + return retval; + } + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type eta) + { + this_t retval; + retval.I = I; + retval.N = N; + retval.NdotI = dot<vector_type>(N, I); + scalar_type orientedEta; + retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta); + retval.NdotI2 = retval.NdotI * retval.NdotI; + retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; + return retval; + } + + static scalar_type computeNdotT(bool backside, scalar_type NdotI2, scalar_type rcpOrientedEta2) + { + scalar_type NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2; + scalar_type absNdotT = sqrt<scalar_type>(NdotT2); + return backside ? absNdotT : -(absNdotT); + } + + vector_type doRefract() + { + return N * (NdotI * rcpOrientedEta + computeNdotT(backside, NdotI2, rcpOrientedEta2)) - rcpOrientedEta * I; + } + + static vector_type doReflectRefract(bool _refract, NBL_CONST_REF_ARG(vector_type) _I, NBL_CONST_REF_ARG(vector_type) _N, scalar_type _NdotI, scalar_type _NdotTorR, scalar_type _rcpOrientedEta) + { + return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f); + } + + vector_type doReflectRefract(bool r) + { + const T NdotTorR = r ? computeNdotT(backside, NdotI2, rcpOrientedEta2) : NdotI; + return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta); + } + + vector_type I; + vector_type N; + bool backside; + scalar_type NdotI; + scalar_type NdotI2; + scalar_type rcpOrientedEta; + scalar_type rcpOrientedEta2; +}; + +} + +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..c1bc0277df 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -47,7 +47,7 @@ template<typename T> NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v<T> && ::nbl::hlsl::is_integral_v<T> && nbl::hlsl::is_scalar_v<T>; template<typename T> -NBL_BOOL_CONCEPT FloatingPointScalar = nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>; +NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>); template<typename T> NBL_BOOL_CONCEPT BooleanScalar = concepts::Boolean<T> && nbl::hlsl::is_scalar_v<T>; diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..4b281c2111 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -148,7 +148,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = using AsFloat = typename float_of_size<sizeof(FloatingPoint)>::type; using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type; const AsUint asUint = ieee754::impl::bitCastToUintType(val); - return bit_cast<FloatingPoint>(asUint ^ (flip ? ieee754::traits<AsFloat>::signMask : 0ull)); + return bit_cast<FloatingPoint>(asUint ^ (flip ? ieee754::traits<AsFloat>::signMask : AsUint(0ull))); } } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index c6705e9a52..be341b6a12 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -6,6 +6,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" #include "nbl/builtin/hlsl/ieee754.hlsl" @@ -38,7 +40,6 @@ struct lp_norm<T,0,false NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectoria } }; -// TOOD: is this doing what it should be? template<typename T> NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T> || concepts::IntVectorial<T>) struct lp_norm<T,1,true NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T> || concepts::IntVectorial<T>) > { @@ -75,213 +76,20 @@ struct lp_norm<T,2,false NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectoria return hlsl::sqrt<scalar_type>(__sum(v)); } }; - -// TODO: even/odd cases } -template<typename T, uint32_t LP NBL_FUNC_REQUIRES(LP>0) +template<typename T, uint32_t LP NBL_FUNC_REQUIRES((concepts::FloatingPointVector<T> || concepts::FloatingPointVectorial<T>) && LP>0) scalar_type_t<T> lpNormPreroot(NBL_CONST_REF_ARG(T) v) { return impl::lp_norm<T,LP>::__sum(v); } -template<typename T, uint32_t LP> +template<typename T, uint32_t LP NBL_FUNC_REQUIRES(concepts::FloatingPointVector<T> || concepts::FloatingPointVectorial<T>) scalar_type_t<T> lpNorm(NBL_CONST_REF_ARG(T) v) { return impl::lp_norm<T,LP>::__call(v); } -template <typename T NBL_FUNC_REQUIRES(concepts::Vectorial<T>) -T reflect(T I, T N, typename vector_traits<T>::scalar_type NdotI) -{ - return N * 2.0f * NdotI - I; -} - -namespace impl -{ -template<typename T> -struct orientedEtas; - -template<> -struct orientedEtas<float> -{ - static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta) - { - const bool backside = NdotI < 0.0; - const float rcpEta = 1.0 / eta; - orientedEta = backside ? rcpEta : eta; - rcpOrientedEta = backside ? eta : rcpEta; - return backside; - } -}; - -template<> -struct orientedEtas<float32_t3> -{ - static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta) - { - const bool backside = NdotI < 0.0; - const float32_t3 rcpEta = (float32_t3)1.0 / eta; - orientedEta = backside ? rcpEta:eta; - rcpOrientedEta = backside ? eta:rcpEta; - return backside; - } -}; -} - -template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>) -bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta) -{ - return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta); -} - - -namespace impl -{ - -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct refract; - -template<typename T> NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T>) -struct refract<T NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) > -{ - using this_t = refract<T>; - using vector_type = T; - using scalar_type = typename vector_traits<T>::scalar_type; - - static this_t create(vector_type I, vector_type N, bool backside, scalar_type NdotI, scalar_type NdotI2, scalar_type rcpOrientedEta, scalar_type rcpOrientedEta2) - { - this_t retval; - retval.I = I; - retval.N = N; - retval.backside = backside; - retval.NdotI = NdotI; - retval.NdotI2 = NdotI2; - retval.rcpOrientedEta = rcpOrientedEta; - retval.rcpOrientedEta2 = rcpOrientedEta2; - return retval; - } - - static this_t create(vector_type I, vector_type N, scalar_type NdotI, scalar_type eta) - { - this_t retval; - retval.I = I; - retval.N = N; - T orientedEta; - retval.backside = getOrientedEtas<T>(orientedEta, retval.rcpOrientedEta, NdotI, eta); - retval.NdotI = NdotI; - retval.NdotI2 = NdotI * NdotI; - retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; - return retval; - } - - static this_t create(vector_type I, vector_type N, scalar_type eta) - { - this_t retval; - retval.I = I; - retval.N = N; - retval.NdotI = dot<vector_type>(N, I); - scalar_type orientedEta; - retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta); - retval.NdotI2 = retval.NdotI * retval.NdotI; - retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; - return retval; - } - - scalar_type computeNdotT() - { - scalar_type NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2; - scalar_type absNdotT = sqrt<scalar_type>(NdotT2); - return backside ? absNdotT : -(absNdotT); - } - - vector_type doRefract() - { - return N * (NdotI * rcpOrientedEta + computeNdotT()) - rcpOrientedEta * I; - } - - static vector_type doReflectRefract(bool _refract, vector_type _I, vector_type _N, scalar_type _NdotI, scalar_type _NdotTorR, scalar_type _rcpOrientedEta) - { - return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f); - } - - vector_type doReflectRefract(bool r) - { - const scalar_type NdotTorR = r ? computeNdotT() : NdotI; - return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta); - } - - vector_type I; - vector_type N; - bool backside; - scalar_type NdotI; - scalar_type NdotI2; - scalar_type rcpOrientedEta; - scalar_type rcpOrientedEta2; -}; -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T refract(T I, T N, bool backside, - typename vector_traits<T>::scalar_type NdotI, - typename vector_traits<T>::scalar_type NdotI2, - typename vector_traits<T>::scalar_type rcpOrientedEta, - typename vector_traits<T>::scalar_type rcpOrientedEta2) -{ - impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); - return r.doRefract(); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T refract(T I, T N, typename vector_traits<T>::scalar_type NdotI, typename vector_traits<T>::scalar_type eta) -{ - impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta); - return r.doRefract(); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T refract(T I, T N, typename vector_traits<T>::scalar_type eta) -{ - impl::refract<T> r = impl::refract<T>::create(I, N, eta); - return r.doRefract(); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -typename vector_traits<T>::scalar_type reflectRefract_computeNdotT(bool backside, typename vector_traits<T>::scalar_type NdotI2, typename vector_traits<T>::scalar_type rcpOrientedEta2) -{ - impl::refract<T> r; - r.NdotI2 = NdotI2; - r.rcpOrientedEta2 = rcpOrientedEta2; - r.backside = backside; - return r.computeNdotT(); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T reflectRefract_impl(bool _refract, T _I, T _N, - typename vector_traits<T>::scalar_type _NdotI, - typename vector_traits<T>::scalar_type _NdotTorR, - typename vector_traits<T>::scalar_type _rcpOrientedEta) -{ - return impl::refract<T>::doReflectRefract(_refract, _I, _N, _NdotI, _NdotTorR, _rcpOrientedEta); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T reflectRefract(bool _refract, T I, T N, bool backside, - typename vector_traits<T>::scalar_type NdotI, - typename vector_traits<T>::scalar_type NdotI2, - typename vector_traits<T>::scalar_type rcpOrientedEta, - typename vector_traits<T>::scalar_type rcpOrientedEta2) -{ - impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); - return r.doReflectRefract(_refract); -} - -template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>) -T reflectRefract(bool _refract, T I, T N, typename vector_traits<T>::scalar_type NdotI, typename vector_traits<T>::scalar_type eta) -{ - impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta); - return r.doReflectRefract(_refract); -} // valid only for `theta` in [-PI,PI] template <typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>) @@ -292,13 +100,24 @@ void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c) s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0))); } -template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) -matrix<T, 2, 3> frisvad(vector<T, 3> n) +template <typename T NBL_FUNC_REQUIRES(vector_traits<T>::Dimension == 3) +void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent) { - const T a = T(NBL_FP64_LITERAL(1.0)) / (T(NBL_FP64_LITERAL(1.0)) + n.z); - const T b = -n.x * n.y * a; - return (n.z < -T(NBL_FP64_LITERAL(0.9999999))) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) : - matrix<T, 2, 3>(vector<T, 3>(T(NBL_FP64_LITERAL(1.0))-n.x*n.x*a, b, -n.x), vector<T, 3>(b, T(NBL_FP64_LITERAL(1.0))-n.y*n.y*a, -n.y)); + using scalar_t = typename vector_traits<T>::scalar_type; + const scalar_t unit = _static_cast<scalar_t>(1); + + const scalar_t a = unit / (unit + normal.z); + const scalar_t b = -normal.x * normal.y * a; + if (normal.z < -_static_cast<scalar_t>(0.9999999)) + { + tangent = T(0.0,-1.0,0.0); + bitangent = T(-1.0,0.0,0.0); + } + else + { + tangent = T(unit - normal.x * normal.x * a, b, -normal.x); + bitangent = T(b, unit - normal.y * normal.y * a, -normal.y); + } } bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) @@ -319,6 +138,7 @@ bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(fl return pickRight; } + namespace impl { template <typename T NBL_STRUCT_CONSTRAINABLE> @@ -394,7 +214,7 @@ struct trigonometry const bool ABltC = cosSumAB < tmp2; // apply triple angle formula const float absArccosSumABC = acos<float>(clamp<float>(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f, 1.f)); - return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + (AltminusB | ABltminusC ? numbers::pi<float> : (-numbers::pi<float>)); + return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<float> : (-numbers::pi<float>)); } static void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1) diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index 6e80ef2fd6..c73ad500c4 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -50,26 +50,26 @@ template<typename T NBL_STRUCT_CONSTRAINABLE> struct sin_helper; template<typename T NBL_STRUCT_CONSTRAINABLE> struct acos_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct tan_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct asin_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct atan_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct sinh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct cosh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct tanh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct asinh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct acosh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct atanh_helper; -template<typename T NBL_STRUCT_CONSTRAINABLE> -struct atan2_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct tan_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct asin_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct atan_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct sinh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct cosh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct tanh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct asinh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct acosh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct atanh_helper; +template<typename T NBL_STRUCT_CONSTRAINABLE> +struct atan2_helper; template<typename T NBL_STRUCT_CONSTRAINABLE> struct sqrt_helper; @@ -115,15 +115,15 @@ struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_R template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, (T), (T)(T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, sAbs, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, fAbs, (T), (T), T) @@ -146,7 +146,7 @@ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frexpStruct_helper, fre #define ISINF_AND_ISNAN_RETURN_TYPE conditional_t<is_vector_v<T>, vector<bool, vector_traits<T>::Dimension>, bool> template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isinf_helper, isInf, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isnan_helper, isNan, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE) -#undef ISINF_AND_ISNAN_RETURN_TYPE +#undef ISINF_AND_ISNAN_RETURN_TYPE #undef DECLVAL #undef DECL_ARG @@ -206,6 +206,7 @@ struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScala } }; + #else // C++ only specializations #define DECL_ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) const _T arg##i @@ -226,16 +227,16 @@ struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST)>\ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, concepts::FloatingPointScalar<T>, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar<T>, (T), (T), T) -template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar<T>, (T), (T), T) +template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar<T>, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, concepts::FloatingPointScalar<T>, (T), (T)(T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sqrt_helper, sqrt, concepts::FloatingPointScalar<T>, (T), (T), T) template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, abs, concepts::Scalar<T>, (T), (T), T) @@ -283,11 +284,11 @@ requires concepts::FloatingPointScalar<T> struct isinf_helper<T> { using return_t = bool; - static inline return_t __call(const T arg) + static inline return_t __call(const T arg) { - // GCC and Clang will always return false with call to std::isinf when fast math is enabled, - // this implementation will always return appropriate output regardless is fast math is enabled or not - using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type; + // GCC and Clang will always return false with call to std::isinf when fast math is enabled, + // this implementation will always return appropriate output regardless is fast math is enabled or not + using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type; return cpp_compat_intrinsics_impl::isinf_uint_impl(reinterpret_cast<const AsUint&>(arg)); } }; @@ -297,7 +298,7 @@ requires concepts::FloatingPointScalar<T> struct isnan_helper<T> { using return_t = bool; - static inline return_t __call(const T arg) + static inline return_t __call(const T arg) { // GCC and Clang will always return false with call to std::isnan when fast math is enabled, // this implementation will always return appropriate output regardless is fast math is enabled or not @@ -324,13 +325,13 @@ struct roundEven_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPoin static FloatingPoint __call(NBL_CONST_REF_ARG(FloatingPoint) x) { // TODO: no way this is optimal, find a better implementation - float tmp; - if (std::abs(std::modf(x, &tmp)) == 0.5f) - { - int32_t result = static_cast<int32_t>(x); - if (result % 2 != 0) - result >= 0 ? ++result : --result; - return result; + float tmp; + if (std::abs(std::modf(x, &tmp)) == 0.5f) + { + int32_t result = static_cast<int32_t>(x); + if (result % 2 != 0) + result >= 0 ? ++result : --result; + return result; } return std::round(x); @@ -389,6 +390,27 @@ struct frexpStruct_helper<T> // C++ and HLSL specializations +template<> +struct erf_helper<float16_t> +{ + static float16_t __call(float16_t _x) + { + // A&S approximation to 2.5x10-5 + const float16_t a1 = float16_t(0.3480242f); + const float16_t a2 = float16_t(-0.0958798f); + const float16_t a3 = float16_t(0.7478556f); + const float16_t p = float16_t(0.47047f); + + float16_t _sign = float16_t(sign<float16_t>(_x)); + float16_t x = abs_helper<float16_t>::__call(_x); + + float16_t t = float16_t(1.f) / (float16_t(1.f) + p * x); + float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp_helper<float16_t>::__call(-x * x); + + return _sign * y; + } +}; + template<typename FloatingPoint> NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>) struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) > @@ -429,6 +451,92 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc } }; +// log doesn't accept float64_t +// template<> +// struct erfInv_helper<float64_t> +// { +// static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x) +// { +// float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999)); + +// float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x)); +// float64_t p; +// if (w < 6.250000) +// { +// w -= NBL_FP64_LITERAL(3.125000); +// p = NBL_FP64_LITERAL(-3.6444120640178196996e-21); +// p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w; +// p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w; +// p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w; +// p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w; +// p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w; +// p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w; +// p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w; +// p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w; +// p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w; +// p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w; +// p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w; +// p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w; +// p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w; +// p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w; +// p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w; +// p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w; +// p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w; +// p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w; +// p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w; +// p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w; +// p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w; +// p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w; +// } +// else if (w < 16.000000) +// { +// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000); +// p = NBL_FP64_LITERAL(2.2137376921775787049e-09); +// p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w; +// p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w; +// p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w; +// p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w; +// p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w; +// p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w; +// p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w; +// p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w; +// p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w; +// p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w; +// p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w; +// p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w; +// p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w; +// p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w; +// p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w; +// p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w; +// p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w; +// p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w; +// } +// else +// { +// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000); +// p = NBL_FP64_LITERAL(-2.7109920616438573243e-11); +// p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w; +// p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w; +// p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w; +// p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w; +// p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w; +// p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w; +// p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w; +// p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w; +// p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w; +// p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w; +// p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w; +// p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w; +// p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w; +// p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w; +// p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w; +// p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w; +// } + +// return p * x; +// } +// }; + #ifdef __HLSL_VERSION // SPIR-V already defines specializations for builtin vector types #define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial<T> && !is_vector_v<T> @@ -471,14 +579,14 @@ AUTO_SPECIALIZE_HELPER_FOR_VECTOR(isnan_helper, VECTOR_SPECIALIZATION_CONCEPT, I AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cos_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acos_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(modf_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(round_helper, VECTOR_SPECIALIZATION_CONCEPT, T) @@ -501,11 +609,11 @@ struct pow_helper<T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT) > using traits = hlsl::vector_traits<T>; array_get<T, typename traits::scalar_type> getter; array_set<T, typename traits::scalar_type> setter; - + return_t output; for (uint32_t i = 0; i < traits::Dimension; ++i) setter(output, i, pow_helper<typename traits::scalar_type>::__call(getter(x, i), getter(y, i))); - + return output; } }; @@ -636,4 +744,4 @@ struct atan2_helper<T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT) > } } -#endif \ No newline at end of file +#endif