From b2af579ff8fc696e0844913aec78f6a6e44f4b0a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Feb 2025 14:42:11 +0700 Subject: [PATCH 01/11] split out fresnel stuff, functions.hlsl fixes --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 155 +++++++++++ include/nbl/builtin/hlsl/math/functions.hlsl | 278 +++---------------- 2 files changed, 200 insertions(+), 233 deletions(-) create mode 100644 include/nbl/builtin/hlsl/bxdf/fresnel.hlsl diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl new file mode 100644 index 0000000000..5d54c6c261 --- /dev/null +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -0,0 +1,155 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace bxdf +{ + +namespace impl +{ +template +struct orientedEtas; + +template<> +struct orientedEtas +{ + static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta) + { + const bool backside = NdotI < 0.0; + const float rcpEta = 1.0 / eta; + orientedEta = backside ? rcpEta : eta; + rcpOrientedEta = backside ? eta : rcpEta; + return backside; + } +}; + +template<> +struct orientedEtas +{ + static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta) + { + const bool backside = NdotI < 0.0; + const float32_t3 rcpEta = (float32_t3)1.0 / eta; + orientedEta = backside ? rcpEta:eta; + rcpOrientedEta = backside ? eta:rcpEta; + return backside; + } +}; +} + +template || is_vector_v) +bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t NdotI, T eta) +{ + return impl::orientedEtas::__call(orientedEta, rcpOrientedEta, NdotI, eta); +} + +} + + +template ::Dimensions == 3) +T reflect(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, typename vector_traits::scalar_type NdotI) +{ + return N * 2.0f * NdotI - I; +} + +template ::Dimensions == 3) +T reflect(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N) +{ + typename vector_traits::scalar_type NdotI = nbl::hlsl::dot(N, I); + return reflect(I, N, NdotI); +} + +template::Dimensions == 3) +struct refract +{ + using this_t = refract; + using scalar_type = typename vector_traits::scalar_type; + using vector_type = T; + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, bool backside, scalar_type NdotI, scalar_type NdotI2, scalar_type rcpOrientedEta, scalar_type rcpOrientedEta2) + { + this_t retval; + retval.I = I; + retval.N = N; + retval.backside = backside; + retval.NdotI = NdotI; + retval.NdotI2 = NdotI2; + retval.rcpOrientedEta = rcpOrientedEta; + retval.rcpOrientedEta2 = rcpOrientedEta2; + return retval; + } + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type NdotI, scalar_type eta) + { + this_t retval; + retval.I = I; + retval.N = N; + T orientedEta; + retval.backside = bxdf::getOrientedEtas(orientedEta, retval.rcpOrientedEta, NdotI, eta); + retval.NdotI = NdotI; + retval.NdotI2 = NdotI * NdotI; + retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; + return retval; + } + + static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type eta) + { + this_t retval; + retval.I = I; + retval.N = N; + retval.NdotI = nbl::hlsl::dot(N, I); + scalar_type orientedEta; + retval.backside = bxdf::getOrientedEtas(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta); + retval.NdotI2 = retval.NdotI * retval.NdotI; + retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; + return retval; + } + + static scalar_type computeNdotT(bool backside, scalar_type NdotI2, scalar_type rcpOrientedEta2) + { + scalar_type NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2; + scalar_type absNdotT = nbl::hlsl::sqrt(NdotT2); + return backside ? absNdotT : -(absNdotT); + } + + vector_type doRefract() + { + return N * (NdotI * rcpOrientedEta + computeNdotT(backside, NdotI2, rcpOrientedEta2)) - rcpOrientedEta * I; + } + + static vector_type doReflectRefract(bool _refract, NBL_CONST_REF_ARG(vector_type) _I, NBL_CONST_REF_ARG(vector_type) _N, scalar_type _NdotI, scalar_type _NdotTorR, scalar_type _rcpOrientedEta) + { + return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f); + } + + vector_type doReflectRefract(bool r) + { + const T NdotTorR = r ? computeNdotT(backside, NdotI2, rcpOrientedEta2) : NdotI; + return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta); + } + + vector_type I; + vector_type N; + bool backside; + T NdotI; + T NdotI2; + T rcpOrientedEta; + T rcpOrientedEta2; +}; + +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a36c2027f8..41e1f376a1 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -6,6 +6,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" namespace nbl @@ -24,28 +26,31 @@ struct lp_norm; template struct lp_norm { - static scalar_type_t __call(const T v) + using scalar_type = typename vector_traits::scalar_type; + + static scalar_type __call(const T v) { - scalar_type_t retval = abs(v[0]); + scalar_type retval = nbl::hlsl::abs(v[0]); for (int i = 1; i < extent::value; i++) - retval = max(abs(v[i]),retval); + retval = nbl::hlsl::max(nbl::hlsl::abs(v[i]),retval); return retval; } }; -// TOOD: is this doing what it should be? template -struct lp_norm +struct lp_norm { - static scalar_type_t __sum(const T v) + using scalar_type = typename vector_traits::scalar_type; + + static scalar_type __sum(const T v) { - scalar_type_t retval = abs(v[0]); + scalar_type retval = nbl::hlsl::abs(v[0]); for (int i = 1; i < extent::value; i++) - retval += abs(v[i]); + retval += nbl::hlsl::abs(v[i]); return retval; } - static scalar_type_t __call(const T v) + static scalar_type __call(const T v) { return __sum(v); } @@ -54,218 +59,32 @@ struct lp_norm template struct lp_norm { - static scalar_type_t __sum(const T v) + using scalar_type = typename vector_traits::scalar_type; + + static scalar_type __sum(const T v) { - return dot(v, v); // TODO: wait for overloaded dot? + return nbl::hlsl::dot(v, v); } - static scalar_type_t __call(const T v) + static scalar_type __call(const T v) { - return sqrt(__sum(v)); + return nbl::hlsl::sqrt(__sum(v)); } }; - -// TODO: even/odd cases } -template0) +template || concepts::FloatingPointVectorial) && LP>0) scalar_type_t lpNormPreroot(NBL_CONST_REF_ARG(T) v) { return impl::lp_norm::__sum(v); } -template +template || concepts::FloatingPointVectorial) scalar_type_t lpNorm(NBL_CONST_REF_ARG(T) v) { return impl::lp_norm::__call(v); } - -template ) -vector reflect(vector I, vector N, T NdotI) -{ - return N * 2.0f * NdotI - I; -} - -template ) -vector reflect(vector I, vector N) -{ - T NdotI = dot(N, I); - return reflect(I, N, NdotI); -} - - -namespace impl -{ -template -struct orientedEtas; - -template<> -struct orientedEtas -{ - static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta) - { - const bool backside = NdotI < 0.0; - const float rcpEta = 1.0 / eta; - orientedEta = backside ? rcpEta : eta; - rcpOrientedEta = backside ? eta : rcpEta; - return backside; - } -}; - -template<> -struct orientedEtas -{ - static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta) - { - const bool backside = NdotI < 0.0; - const float32_t3 rcpEta = (float32_t3)1.0 / eta; - orientedEta = backside ? rcpEta:eta; - rcpOrientedEta = backside ? eta:rcpEta; - return backside; - } -}; -} - -template || is_vector_v) -bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t NdotI, T eta) -{ - return impl::orientedEtas::__call(orientedEta, rcpOrientedEta, NdotI, eta); -} - - -namespace impl -{ -template -struct refract -{ - using this_t = refract; - using vector_type = vector; - - static this_t create(vector_type I, vector_type N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2) - { - this_t retval; - retval.I = I; - retval.N = N; - retval.backside = backside; - retval.NdotI = NdotI; - retval.NdotI2 = NdotI2; - retval.rcpOrientedEta = rcpOrientedEta; - retval.rcpOrientedEta2 = rcpOrientedEta2; - return retval; - } - - static this_t create(vector_type I, vector_type N, T NdotI, T eta) - { - this_t retval; - retval.I = I; - retval.N = N; - T orientedEta; - retval.backside = getOrientedEtas(orientedEta, retval.rcpOrientedEta, NdotI, eta); - retval.NdotI = NdotI; - retval.NdotI2 = NdotI * NdotI; - retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; - return retval; - } - - static this_t create(vector_type I, vector_type N, T eta) - { - this_t retval; - retval.I = I; - retval.N = N; - retval.NdotI = dot(N, I); - T orientedEta; - retval.backside = getOrientedEtas(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta); - retval.NdotI2 = retval.NdotI * retval.NdotI; - retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta; - return retval; - } - - T computeNdotT() - { - T NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2; - T absNdotT = sqrt(NdotT2); - return backside ? absNdotT : -(absNdotT); - } - - vector_type doRefract() - { - return N * (NdotI * rcpOrientedEta + computeNdotT()) - rcpOrientedEta * I; - } - - static vector_type doReflectRefract(bool _refract, vector_type _I, vector_type _N, T _NdotI, T _NdotTorR, T _rcpOrientedEta) - { - return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f); - } - - vector_type doReflectRefract(bool r) - { - const T NdotTorR = r ? computeNdotT() : NdotI; - return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta); - } - - vector_type I; - vector_type N; - bool backside; - T NdotI; - T NdotI2; - T rcpOrientedEta; - T rcpOrientedEta2; -}; -} - -template) -vector refract(vector I, vector N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2) -{ - impl::refract r = impl::refract::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); - return r.doRefract(); -} - -template) -vector refract(vector I, vector N, T NdotI, T eta) -{ - impl::refract r = impl::refract::create(I, N, NdotI, eta); - return r.doRefract(); -} - -template) -vector refract(vector I, vector N, T eta) -{ - impl::refract r = impl::refract::create(I, N, eta); - return r.doRefract(); -} - -template) -T reflectRefract_computeNdotT(bool backside, T NdotI2, T rcpOrientedEta2) -{ - impl::refract r; - r.NdotI2 = NdotI2; - r.rcpOrientedEta2 = rcpOrientedEta2; - r.backside = backside; - return r.computeNdotT(); -} - -template) -vector reflectRefract_impl(bool _refract, vector _I, vector _N, T _NdotI, T _NdotTorR, T _rcpOrientedEta) -{ - return impl::refract::doReflectRefract(_refract, _I, _N, _NdotI, _NdotTorR, _rcpOrientedEta); -} - -template) -vector reflectRefract(bool _refract, vector I, vector N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2) -{ - impl::refract r = impl::refract::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); - return r.doReflectRefract(_refract); -} - -template) -vector reflectRefract(bool _refract, vector I, vector N, T NdotI, T eta) -{ - impl::refract r = impl::refract::create(I, N, NdotI, eta); - return r.doReflectRefract(_refract); -} - - // valid only for `theta` in [-PI,PI] template ) void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c) @@ -275,13 +94,21 @@ void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c) s = (theta < 0.0) ? -s : s; // TODO: test with XOR } -template ) -matrix frisvad(vector n) +template ::Dimension == 3) +void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent) { - const T a = 1.0 / (1.0 + n.z); - const T b = -n.x * n.y * a; - return (n.z < -0.9999999) ? matrix(vector(0.0,-1.0,0.0), vector(-1.0,0.0,0.0)) : - matrix(vector(1.0-n.x*n.x*a, b, -n.x), vector(b, 1.0-n.y*n.y*a, -n.y)); + const typename vector_traits::scalar_type a = 1.0 / (1.0 + normal.z); + const typename vector_traits::scalar_type b = -normal.x * normal.y * a; + if (normal.z < -0.9999999) + { + tangent = T(0.0,-1.0,0.0); + bitangent = T(-1.0,0.0,0.0); + } + else + { + tangent = T(1.0-normal.x*normal.x*a, b, -normal.x); + bitangent = T(b, 1.0-normal.y*normal.y*a, -normal.y); + } } bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) @@ -303,40 +130,25 @@ bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(fl } -// TODO: make it work in C++, ignoring problem for now -#ifdef __HLSL_VERSION +// TODO: impl signed integer versions // @ return abs(x) if cond==true, max(x,0.0) otherwise -template || is_vector_v) +template || concepts::FloatingPointVector || concepts::FloatingPointVectorial) T conditionalAbsOrMax(bool cond, T x, T limit); template <> float conditionalAbsOrMax(bool cond, float x, float limit) { - const float condAbs = asfloat(asuint(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu)); - return max(condAbs,limit); + const float condAbs = nbl::hlsl::bit_cast(nbl::hlsl::bit_cast(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu)); + return nbl::hlsl::max(condAbs,limit); } -template <> -float32_t2 conditionalAbsOrMax(bool cond, float32_t2 x, float32_t2 limit) +template +vector conditionalAbsOrMax >(bool cond, NBL_CONST_REF_ARG(vector) x, NBL_CONST_REF_ARG(vector) limit) { - const float32_t2 condAbs = asfloat(asuint(x) & select(cond, (uint32_t2)0x7fFFffFFu, (uint32_t2)0xffFFffFFu)); - return max(condAbs,limit); + const vector condAbs = nbl::hlsl::bit_cast, vector >(nbl::hlsl::bit_cast, vector >(x) & nbl::hlsl::mix((vector)0x7fFFffFFu, (vector)0xffFFffFFu, promote, bool>(cond))); + return nbl::hlsl::max >(condAbs,limit); } -template <> -float32_t3 conditionalAbsOrMax(bool cond, float32_t3 x, float32_t3 limit) -{ - const float32_t3 condAbs = asfloat(asuint(x) & select(cond, (uint32_t3)0x7fFFffFFu, (uint32_t3)0xffFFffFFu)); - return max(condAbs,limit); -} - -template <> -float32_t4 conditionalAbsOrMax(bool cond, float32_t4 x, float32_t4 limit) -{ - const float32_t4 condAbs = asfloat(asuint(x) & select(cond, (uint32_t4)0x7fFFffFFu, (uint32_t4)0xffFFffFFu)); - return max(condAbs,limit); -} -#endif namespace impl { @@ -441,7 +253,7 @@ float getSumofArccosABCD(float cosA, float cosB, float cosC, float cosD) } template) -matrix applyChainRule(matrix dFdG, matrix dGdR) +matrix applyChainRule(NBL_CONST_REF_ARG(matrix) dFdG, NBL_CONST_REF_ARG(matrix) dGdR) { return mul(dFdG,dGdR); } From 8e8c55c8e19ae474e62eedad137068dc884e739e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Feb 2025 17:40:55 +0700 Subject: [PATCH 02/11] fix conditionalAbsOrMax --- include/nbl/builtin/hlsl/math/functions.hlsl | 36 ++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 41e1f376a1..2972023540 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -130,23 +130,39 @@ bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(fl } +namespace impl +{ // TODO: impl signed integer versions // @ return abs(x) if cond==true, max(x,0.0) otherwise -template || concepts::FloatingPointVector || concepts::FloatingPointVectorial) -T conditionalAbsOrMax(bool cond, T x, T limit); +template || concepts::FloatingPointVector || concepts::FloatingPointVectorial) +struct ConditionalAbsOrMax; + +template<> +struct ConditionalAbsOrMax +{ + static float absOrMax(bool cond, float x, float limit) + { + const float condAbs = nbl::hlsl::bit_cast(nbl::hlsl::bit_cast(x) & uint32_t(cond ? 0x7fFFffFFu : 0xffFFffFFu)); + return nbl::hlsl::max(condAbs,limit); + } +}; -template <> -float conditionalAbsOrMax(bool cond, float x, float limit) +template +struct ConditionalAbsOrMax > { - const float condAbs = nbl::hlsl::bit_cast(nbl::hlsl::bit_cast(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu)); - return nbl::hlsl::max(condAbs,limit); + static vector absOrMax(bool cond, NBL_CONST_REF_ARG(vector) x, NBL_CONST_REF_ARG(vector) limit) + { + const vector condAbs = nbl::hlsl::bit_cast, vector >(nbl::hlsl::bit_cast, vector >(x) & nbl::hlsl::mix((vector)0x7fFFffFFu, (vector)0xffFFffFFu, promote, bool>(cond))); + return nbl::hlsl::max >(condAbs,limit); + } +}; + } -template -vector conditionalAbsOrMax >(bool cond, NBL_CONST_REF_ARG(vector) x, NBL_CONST_REF_ARG(vector) limit) +template +T conditionalAbsOrMax(bool cond, NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) limit) { - const vector condAbs = nbl::hlsl::bit_cast, vector >(nbl::hlsl::bit_cast, vector >(x) & nbl::hlsl::mix((vector)0x7fFFffFFu, (vector)0xffFFffFFu, promote, bool>(cond))); - return nbl::hlsl::max >(condAbs,limit); + return impl::ConditionalAbsOrMax::absOrMax(cond, x, limit); } From 107ca800645254e95ae64d36ab8336995afcefdc Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Feb 2025 16:58:28 +0700 Subject: [PATCH 03/11] fix getArccosSumofABC_minus_PI incorrect operator --- include/nbl/builtin/hlsl/math/functions.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 2972023540..d931b311a4 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -204,7 +204,7 @@ struct trigonometry const bool ABltC = cosSumAB < tmp2; // apply triple angle formula const float absArccosSumABC = acos(clamp(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f, 1.f)); - return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + (AltminusB | ABltminusC ? numbers::pi : (-numbers::pi)); + return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi : (-numbers::pi)); } static void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1) From 8009dab4ccddb793730821ca7e849fc7c246fe48 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 3 Mar 2025 10:46:34 +0700 Subject: [PATCH 04/11] fix typo --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index e28c1faf44..2e0d6a6fa0 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -87,7 +87,7 @@ struct refract this_t retval; retval.I = I; retval.N = N; - T orientedEta; + scalar_type orientedEta; retval.backside = getOrientedEtas(orientedEta, retval.rcpOrientedEta, NdotI, eta); retval.NdotI = NdotI; retval.NdotI2 = NdotI * NdotI; From 93c051d10fff4c5bdd5dcf886b5ddd4c2b11e4ec Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 3 Mar 2025 11:20:30 +0700 Subject: [PATCH 05/11] fix typo bug --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 2e0d6a6fa0..d3b3543a28 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -62,7 +62,7 @@ T reflect(T I, T N, typename vector_traits::scalar_type NdotI) return N * 2.0f * NdotI - I; } -template::Dimensions == 3) +template::Dimension == 3) struct refract { using this_t = refract; From 0c2de650d20caa61986f253b598228596bcd3f2c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Mar 2025 14:43:18 +0700 Subject: [PATCH 06/11] erf for fp16 and fp64 --- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 307 +++++++++++++++++----- 1 file changed, 237 insertions(+), 70 deletions(-) diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index 6e80ef2fd6..faa939459e 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -50,26 +50,26 @@ template struct sin_helper; template struct acos_helper; -template -struct tan_helper; -template -struct asin_helper; -template -struct atan_helper; -template -struct sinh_helper; -template -struct cosh_helper; -template -struct tanh_helper; -template -struct asinh_helper; -template -struct acosh_helper; -template -struct atanh_helper; -template -struct atan2_helper; +template +struct tan_helper; +template +struct asin_helper; +template +struct atan_helper; +template +struct sinh_helper; +template +struct cosh_helper; +template +struct tanh_helper; +template +struct asinh_helper; +template +struct acosh_helper; +template +struct atanh_helper; +template +struct atan2_helper; template struct sqrt_helper; @@ -115,15 +115,15 @@ struct HELPER_NAME AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, sAbs, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, fAbs, (T), (T), T) @@ -189,23 +189,169 @@ struct erf_helper(x) >> 32); + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) // erf(nan)=nan, erf(+-inf)=+-1 + { + int32_t i = ((uint32_t)hx >> 31) << 1; + return (float64_t)(1.0 - i) + one / x; + } + + float64_t P, Q; + if (ix < 0x3feb0000) // |x| < 0.84375 + { + if (ix < 0x3e300000) // |x| < 2**-28 + { + if (ix < 0x00800000) + { + // avoid underflow + return FloatingPoint(0.0625 * (16.0 * x + (16.0 * efx) * x)); + } + return FloatingPoint(x + efx * x); + } + z = x * x; + r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); + s = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + y = r / s; + return FloatingPoint(x + x * y); + } + if (ix < 0x3ff40000) // 0.84375 <= |x| < 1.25 + { + s = abs_helper::__call(x) - one; + P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6))))); + Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * (qa5 + s * qa6)))))); + if (hx >= 0) + return FloatingPoint(erx + P / Q); + else + return FloatingPoint(-erx - P / Q); + } + if (ix >= 0x40180000) // inf > |x| >= 6 + { + if (hx >= 0) + return FloatingPoint(one - tiny); + else + return FloatingPoint(tiny - one); + } + + x = abs_helper::__call(x); + s = one / (x * x); + float64_t R, S; + if (ix < 0x4006DB6E) // |x| < 1/0.35 ~2.85714 + { + R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s * (ra5 + s * (ra6 + s * ra7)))))); + S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s * (sa5 + s * (sa6 + s * sa7)))))); + } + else // |x| >= 1/0.35 + { + R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * rb5)))); + S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s * (sb5 + s * (sb6 + s * sb7)))))); + } + z = x; + uint64_t z1 = bit_cast(x); + z1 &= 0xffffffff00000000; + z = bit_cast(z1); + r = exp_helper::__call(-z * z - 0.5625) * exp_helper::__call((z - x) * (z + x) + R / S); + if (hx >= 0) + return FloatingPoint(one - r / x); + else + return FloatingPoint(r / x - one); + } +}; + +template<> +struct erf_helper +{ + static float32_t __call(NBL_CONST_REF_ARG(float32_t) _x) + { + // A&S approximation to 1.5x10-7 + const float32_t a1 = float32_t(NBL_FP64_LITERAL(0.254829592)); + const float32_t a2 = float32_t(NBL_FP64_LITERAL(-0.284496736)); + const float32_t a3 = float32_t(NBL_FP64_LITERAL(1.421413741)); + const float32_t a4 = float32_t(NBL_FP64_LITERAL(-1.453152027)); + const float32_t a5 = float32_t(NBL_FP64_LITERAL(1.061405429)); + const float32_t p = float32_t(NBL_FP64_LITERAL(0.3275911)); - FloatingPoint _sign = FloatingPoint(sign(_x)); - FloatingPoint x = abs(_x); + float32_t _sign = float32_t(sign(_x)); + float32_t x = abs(_x); - FloatingPoint t = FloatingPoint(NBL_FP64_LITERAL(1.0)) / (FloatingPoint(NBL_FP64_LITERAL(1.0)) + p * x); - FloatingPoint y = FloatingPoint(NBL_FP64_LITERAL(1.0)) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x); + float32_t t = float32_t(NBL_FP64_LITERAL(1.0)) / (float32_t(NBL_FP64_LITERAL(1.0)) + p * x); + float32_t y = float32_t(NBL_FP64_LITERAL(1.0)) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x); return _sign * y; } }; + #else // C++ only specializations #define DECL_ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) const _T arg##i @@ -226,16 +372,16 @@ struct HELPER_NAME\ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, concepts::FloatingPointScalar, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, concepts::FloatingPointScalar, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sqrt_helper, sqrt, concepts::FloatingPointScalar, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, abs, concepts::Scalar, (T), (T), T) @@ -283,11 +429,11 @@ requires concepts::FloatingPointScalar struct isinf_helper { using return_t = bool; - static inline return_t __call(const T arg) + static inline return_t __call(const T arg) { - // GCC and Clang will always return false with call to std::isinf when fast math is enabled, - // this implementation will always return appropriate output regardless is fast math is enabled or not - using AsUint = typename unsigned_integer_of_size::type; + // GCC and Clang will always return false with call to std::isinf when fast math is enabled, + // this implementation will always return appropriate output regardless is fast math is enabled or not + using AsUint = typename unsigned_integer_of_size::type; return cpp_compat_intrinsics_impl::isinf_uint_impl(reinterpret_cast(arg)); } }; @@ -297,7 +443,7 @@ requires concepts::FloatingPointScalar struct isnan_helper { using return_t = bool; - static inline return_t __call(const T arg) + static inline return_t __call(const T arg) { // GCC and Clang will always return false with call to std::isnan when fast math is enabled, // this implementation will always return appropriate output regardless is fast math is enabled or not @@ -324,13 +470,13 @@ struct roundEven_helper(x); - if (result % 2 != 0) - result >= 0 ? ++result : --result; - return result; + float tmp; + if (std::abs(std::modf(x, &tmp)) == 0.5f) + { + int32_t result = static_cast(x); + if (result % 2 != 0) + result >= 0 ? ++result : --result; + return result; } return std::round(x); @@ -389,6 +535,27 @@ struct frexpStruct_helper // C++ and HLSL specializations +template<> +struct erf_helper +{ + static float16_t __call(float16_t _x) + { + // A&S approximation to 2.5x10-5 + const float16_t a1 = float16_t(0.3480242f); + const float16_t a2 = float16_t(-0.0958798f); + const float16_t a3 = float16_t(0.7478556f); + const float16_t p = float16_t(0.47047f); + + float16_t _sign = float16_t(sign(_x)); + float16_t x = abs_helper::__call(_x); + + float16_t t = float16_t(1.f) / (float16_t(1.f) + p * x); + float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp(-x * x); + + return _sign * y; + } +}; + template NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar) struct erfInv_helper) > @@ -471,14 +638,14 @@ AUTO_SPECIALIZE_HELPER_FOR_VECTOR(isnan_helper, VECTOR_SPECIALIZATION_CONCEPT, I AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cos_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acos_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) -AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) +AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(modf_helper, VECTOR_SPECIALIZATION_CONCEPT, T) AUTO_SPECIALIZE_HELPER_FOR_VECTOR(round_helper, VECTOR_SPECIALIZATION_CONCEPT, T) @@ -636,4 +803,4 @@ struct atan2_helper } } -#endif \ No newline at end of file +#endif From d1ff526e771c843cf0938293a8d6e411c4e910e0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Mar 2025 15:40:38 +0700 Subject: [PATCH 07/11] erfinv for fp64 --- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 91 ++++++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index faa939459e..f46aeabdc1 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -146,7 +146,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frexpStruct_helper, fre #define ISINF_AND_ISNAN_RETURN_TYPE conditional_t, vector::Dimension>, bool> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isinf_helper, isInf, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isnan_helper, isNan, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE) -#undef ISINF_AND_ISNAN_RETURN_TYPE +#undef ISINF_AND_ISNAN_RETURN_TYPE #undef DECLVAL #undef DECL_ARG @@ -596,6 +596,91 @@ struct erfInv_helper +struct erfInv_helper +{ + static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x) + { + float64_t x = clamp(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999)); + + float64_t w = -log_helper::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x)); + float64_t p; + if (w < 6.250000) + { + w -= NBL_FP64_LITERAL(3.125000); + p = NBL_FP64_LITERAL(-3.6444120640178196996e-21); + p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w; + p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w; + p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w; + p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w; + p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w; + p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w; + p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w; + p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w; + p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w; + p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w; + p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w; + p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w; + p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w; + p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w; + p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w; + p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w; + p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w; + p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w; + p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w; + p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w; + p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w; + p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w; + } + else if (w < 16.000000) + { + w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(3.250000); + p = NBL_FP64_LITERAL(2.2137376921775787049e-09); + p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w; + p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w; + p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w; + p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w; + p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w; + p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w; + p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w; + p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w; + p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w; + p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w; + p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w; + p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w; + p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w; + p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w; + p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w; + p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w; + p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w; + p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w; + } + else + { + w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(5.000000); + p = NBL_FP64_LITERAL(-2.7109920616438573243e-11); + p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w; + p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w; + p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w; + p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w; + p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w; + p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w; + p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w; + p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w; + p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w; + p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w; + p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w; + p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w; + p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w; + p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w; + p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w; + p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w; + } + + return p * x; + } +}; + #ifdef __HLSL_VERSION // SPIR-V already defines specializations for builtin vector types #define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial && !is_vector_v @@ -668,11 +753,11 @@ struct pow_helper using traits = hlsl::vector_traits; array_get getter; array_set setter; - + return_t output; for (uint32_t i = 0; i < traits::Dimension; ++i) setter(output, i, pow_helper::__call(getter(x, i), getter(y, i))); - + return output; } }; From 4c963587b5cc8700224a02a15e41301bc49654ff Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Mar 2025 15:53:40 +0700 Subject: [PATCH 08/11] reverted erf_helper --- include/nbl/builtin/hlsl/concepts/core.hlsl | 6 +- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 169 ++------------------ 2 files changed, 15 insertions(+), 160 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..a732783f2c 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -29,13 +29,13 @@ template NBL_BOOL_CONCEPT UnsignedIntegral = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v; template -NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v; +NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v || nbl::hlsl::is_same_v; template NBL_BOOL_CONCEPT Boolean = nbl::hlsl::is_same_v || (nbl::hlsl::is_vector_v && nbl::hlsl::is_same_v::scalar_type, bool>); template -NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v; +NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v || nbl::hlsl::is_same_v; template NBL_BOOL_CONCEPT IntegralScalar = nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; @@ -47,7 +47,7 @@ template NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; template -NBL_BOOL_CONCEPT FloatingPointScalar = nbl::hlsl::is_floating_point_v && nbl::hlsl::is_scalar_v; +NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v && nbl::hlsl::is_scalar_v) || nbl::hlsl::is_same_v; template NBL_BOOL_CONCEPT BooleanScalar = concepts::Boolean && nbl::hlsl::is_scalar_v; diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index f46aeabdc1..46f18c85db 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -189,163 +189,18 @@ struct erf_helper(x) >> 32); - ix = hx & 0x7fffffff; - if (ix >= 0x7ff00000) // erf(nan)=nan, erf(+-inf)=+-1 - { - int32_t i = ((uint32_t)hx >> 31) << 1; - return (float64_t)(1.0 - i) + one / x; - } - - float64_t P, Q; - if (ix < 0x3feb0000) // |x| < 0.84375 - { - if (ix < 0x3e300000) // |x| < 2**-28 - { - if (ix < 0x00800000) - { - // avoid underflow - return FloatingPoint(0.0625 * (16.0 * x + (16.0 * efx) * x)); - } - return FloatingPoint(x + efx * x); - } - z = x * x; - r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); - s = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); - y = r / s; - return FloatingPoint(x + x * y); - } - if (ix < 0x3ff40000) // 0.84375 <= |x| < 1.25 - { - s = abs_helper::__call(x) - one; - P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6))))); - Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * (qa5 + s * qa6)))))); - if (hx >= 0) - return FloatingPoint(erx + P / Q); - else - return FloatingPoint(-erx - P / Q); - } - if (ix >= 0x40180000) // inf > |x| >= 6 - { - if (hx >= 0) - return FloatingPoint(one - tiny); - else - return FloatingPoint(tiny - one); - } - - x = abs_helper::__call(x); - s = one / (x * x); - float64_t R, S; - if (ix < 0x4006DB6E) // |x| < 1/0.35 ~2.85714 - { - R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s * (ra5 + s * (ra6 + s * ra7)))))); - S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s * (sa5 + s * (sa6 + s * sa7)))))); - } - else // |x| >= 1/0.35 - { - R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * rb5)))); - S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s * (sb5 + s * (sb6 + s * sb7)))))); - } - z = x; - uint64_t z1 = bit_cast(x); - z1 &= 0xffffffff00000000; - z = bit_cast(z1); - r = exp_helper::__call(-z * z - 0.5625) * exp_helper::__call((z - x) * (z + x) + R / S); - if (hx >= 0) - return FloatingPoint(one - r / x); - else - return FloatingPoint(r / x - one); - } -}; - -template<> -struct erf_helper -{ - static float32_t __call(NBL_CONST_REF_ARG(float32_t) _x) - { - // A&S approximation to 1.5x10-7 - const float32_t a1 = float32_t(NBL_FP64_LITERAL(0.254829592)); - const float32_t a2 = float32_t(NBL_FP64_LITERAL(-0.284496736)); - const float32_t a3 = float32_t(NBL_FP64_LITERAL(1.421413741)); - const float32_t a4 = float32_t(NBL_FP64_LITERAL(-1.453152027)); - const float32_t a5 = float32_t(NBL_FP64_LITERAL(1.061405429)); - const float32_t p = float32_t(NBL_FP64_LITERAL(0.3275911)); - - float32_t _sign = float32_t(sign(_x)); - float32_t x = abs(_x); - - float32_t t = float32_t(NBL_FP64_LITERAL(1.0)) / (float32_t(NBL_FP64_LITERAL(1.0)) + p * x); - float32_t y = float32_t(NBL_FP64_LITERAL(1.0)) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x); + const FloatingPoint a1 = FloatingPoint(NBL_FP64_LITERAL(0.254829592)); + const FloatingPoint a2 = FloatingPoint(NBL_FP64_LITERAL(-0.284496736)); + const FloatingPoint a3 = FloatingPoint(NBL_FP64_LITERAL(1.421413741)); + const FloatingPoint a4 = FloatingPoint(NBL_FP64_LITERAL(-1.453152027)); + const FloatingPoint a5 = FloatingPoint(NBL_FP64_LITERAL(1.061405429)); + const FloatingPoint p = FloatingPoint(NBL_FP64_LITERAL(0.3275911)); + + FloatingPoint _sign = FloatingPoint(sign(_x)); + FloatingPoint x = abs(_x); + + FloatingPoint t = FloatingPoint(NBL_FP64_LITERAL(1.0)) / (FloatingPoint(NBL_FP64_LITERAL(1.0)) + p * x); + FloatingPoint y = FloatingPoint(NBL_FP64_LITERAL(1.0)) - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x); return _sign * y; } From 75b50faa02c0af386c9cc4f8ae299bbe37cd7867 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Mar 2025 17:01:16 +0700 Subject: [PATCH 09/11] some minor fixes --- include/nbl/builtin/hlsl/ieee754.hlsl | 2 +- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..4b281c2111 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -148,7 +148,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; const AsUint asUint = ieee754::impl::bitCastToUintType(val); - return bit_cast(asUint ^ (flip ? ieee754::traits::signMask : 0ull)); + return bit_cast(asUint ^ (flip ? ieee754::traits::signMask : AsUint(0ull))); } } diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index 46f18c85db..24bf6796ea 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -458,7 +458,7 @@ struct erfInv_helper { float64_t x = clamp(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999)); - float64_t w = -log_helper::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x)); + float64_t w = float64_t(-log_helper::__call((float32_t(NBL_FP64_LITERAL(1.0)) - x) * float32_t(NBL_FP64_LITERAL(1.0)) + x)); float64_t p; if (w < 6.250000) { From 37a8b7b5335fae41743943694b98ffee6158c1fd Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 5 Mar 2025 10:39:18 +0700 Subject: [PATCH 10/11] reverted some changes, erf/erfInv fixes --- include/nbl/builtin/hlsl/concepts/core.hlsl | 6 +- include/nbl/builtin/hlsl/tgmath/impl.hlsl | 171 ++++++++++---------- 2 files changed, 89 insertions(+), 88 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index a732783f2c..c1bc0277df 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -29,13 +29,13 @@ template NBL_BOOL_CONCEPT UnsignedIntegral = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v; template -NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v || nbl::hlsl::is_same_v; +NBL_BOOL_CONCEPT FloatingPoint = nbl::hlsl::is_floating_point_v; template NBL_BOOL_CONCEPT Boolean = nbl::hlsl::is_same_v || (nbl::hlsl::is_vector_v && nbl::hlsl::is_same_v::scalar_type, bool>); template -NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v || nbl::hlsl::is_same_v; +NBL_BOOL_CONCEPT Scalar = nbl::hlsl::is_scalar_v; template NBL_BOOL_CONCEPT IntegralScalar = nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; @@ -47,7 +47,7 @@ template NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v && ::nbl::hlsl::is_integral_v && nbl::hlsl::is_scalar_v; template -NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v && nbl::hlsl::is_scalar_v) || nbl::hlsl::is_same_v; +NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v && nbl::hlsl::is_scalar_v); template NBL_BOOL_CONCEPT BooleanScalar = concepts::Boolean && nbl::hlsl::is_scalar_v; diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl index 24bf6796ea..c73ad500c4 100644 --- a/include/nbl/builtin/hlsl/tgmath/impl.hlsl +++ b/include/nbl/builtin/hlsl/tgmath/impl.hlsl @@ -405,7 +405,7 @@ struct erf_helper float16_t x = abs_helper::__call(_x); float16_t t = float16_t(1.f) / (float16_t(1.f) + p * x); - float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp(-x * x); + float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp_helper::__call(-x * x); return _sign * y; } @@ -451,90 +451,91 @@ struct erfInv_helper -struct erfInv_helper -{ - static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x) - { - float64_t x = clamp(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999)); - - float64_t w = float64_t(-log_helper::__call((float32_t(NBL_FP64_LITERAL(1.0)) - x) * float32_t(NBL_FP64_LITERAL(1.0)) + x)); - float64_t p; - if (w < 6.250000) - { - w -= NBL_FP64_LITERAL(3.125000); - p = NBL_FP64_LITERAL(-3.6444120640178196996e-21); - p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w; - p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w; - p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w; - p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w; - p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w; - p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w; - p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w; - p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w; - p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w; - p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w; - p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w; - p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w; - p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w; - p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w; - p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w; - p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w; - p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w; - p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w; - p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w; - p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w; - p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w; - p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w; - } - else if (w < 16.000000) - { - w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(3.250000); - p = NBL_FP64_LITERAL(2.2137376921775787049e-09); - p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w; - p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w; - p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w; - p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w; - p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w; - p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w; - p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w; - p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w; - p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w; - p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w; - p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w; - p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w; - p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w; - p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w; - p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w; - p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w; - p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w; - p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w; - } - else - { - w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(5.000000); - p = NBL_FP64_LITERAL(-2.7109920616438573243e-11); - p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w; - p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w; - p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w; - p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w; - p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w; - p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w; - p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w; - p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w; - p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w; - p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w; - p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w; - p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w; - p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w; - p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w; - p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w; - p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w; - } - - return p * x; - } -}; +// log doesn't accept float64_t +// template<> +// struct erfInv_helper +// { +// static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x) +// { +// float64_t x = clamp(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999)); + +// float64_t w = -log_helper::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x)); +// float64_t p; +// if (w < 6.250000) +// { +// w -= NBL_FP64_LITERAL(3.125000); +// p = NBL_FP64_LITERAL(-3.6444120640178196996e-21); +// p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w; +// p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w; +// p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w; +// p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w; +// p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w; +// p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w; +// p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w; +// p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w; +// p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w; +// p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w; +// p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w; +// p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w; +// p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w; +// p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w; +// p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w; +// p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w; +// p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w; +// p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w; +// p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w; +// p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w; +// p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w; +// p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w; +// } +// else if (w < 16.000000) +// { +// w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(3.250000); +// p = NBL_FP64_LITERAL(2.2137376921775787049e-09); +// p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w; +// p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w; +// p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w; +// p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w; +// p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w; +// p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w; +// p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w; +// p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w; +// p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w; +// p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w; +// p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w; +// p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w; +// p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w; +// p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w; +// p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w; +// p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w; +// p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w; +// p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w; +// } +// else +// { +// w = sqrt_helper::__call(w) - NBL_FP64_LITERAL(5.000000); +// p = NBL_FP64_LITERAL(-2.7109920616438573243e-11); +// p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w; +// p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w; +// p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w; +// p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w; +// p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w; +// p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w; +// p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w; +// p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w; +// p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w; +// p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w; +// p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w; +// p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w; +// p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w; +// p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w; +// p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w; +// p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w; +// } + +// return p * x; +// } +// }; #ifdef __HLSL_VERSION // SPIR-V already defines specializations for builtin vector types From ff66405843fa0bd4fb81dbcb2831097aec7e9230 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 19 Mar 2025 16:13:26 +0700 Subject: [PATCH 11/11] removed cast fp64 from frisvad --- include/nbl/builtin/hlsl/math/functions.hlsl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 283841d4ab..be341b6a12 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -103,17 +103,20 @@ void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c) template ::Dimension == 3) void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent) { - const typename vector_traits::scalar_type a = NBL_FP64_LITERAL(1.0) / (NBL_FP64_LITERAL(1.0) + normal.z); - const typename vector_traits::scalar_type b = -normal.x * normal.y * a; - if (normal.z < -NBL_FP64_LITERAL(0.9999999)) + using scalar_t = typename vector_traits::scalar_type; + const scalar_t unit = _static_cast(1); + + const scalar_t a = unit / (unit + normal.z); + const scalar_t b = -normal.x * normal.y * a; + if (normal.z < -_static_cast(0.9999999)) { tangent = T(0.0,-1.0,0.0); bitangent = T(-1.0,0.0,0.0); } else { - tangent = T(NBL_FP64_LITERAL(1.0)-normal.x*normal.x*a, b, -normal.x); - bitangent = T(b, NBL_FP64_LITERAL(1.0)-normal.y*normal.y*a, -normal.y); + tangent = T(unit - normal.x * normal.x * a, b, -normal.x); + bitangent = T(b, unit - normal.y * normal.y * a, -normal.y); } }