Skip to content

Maths for HLSL BxDFs (template cmath, tgmath) #803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 31, 2025
Merged
148 changes: 148 additions & 0 deletions include/nbl/builtin/hlsl/bxdf/fresnel.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h
#ifndef _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_
#define _NBL_BUILTIN_HLSL_BXDF_FRESNEL_INCLUDED_

#include "nbl/builtin/hlsl/cpp_compat.hlsl"
#include "nbl/builtin/hlsl/numbers.hlsl"
#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"

namespace nbl
{
namespace hlsl
{

namespace bxdf
{

namespace impl
{
template<typename T>
struct orientedEtas;

template<>
struct orientedEtas<float>
{
static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta)
{
const bool backside = NdotI < 0.0;
const float rcpEta = 1.0 / eta;
orientedEta = backside ? rcpEta : eta;
rcpOrientedEta = backside ? eta : rcpEta;
return backside;
}
};

template<>
struct orientedEtas<float32_t3>
{
static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta)
{
const bool backside = NdotI < 0.0;
const float32_t3 rcpEta = (float32_t3)1.0 / eta;
orientedEta = backside ? rcpEta:eta;
rcpOrientedEta = backside ? eta:rcpEta;
return backside;
}
};
}

template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>)
bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta)
{
return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta);
}


template <typename T NBL_FUNC_REQUIRES(concepts::Vectorial<T>)
T reflect(T I, T N, typename vector_traits<T>::scalar_type NdotI)
{
return N * 2.0f * NdotI - I;
}

template<typename T NBL_PRIMARY_REQUIRES(vector_traits<T>::Dimension == 3)
struct refract
{
using this_t = refract<T>;
using vector_type = T;
using scalar_type = typename vector_traits<T>::scalar_type;

static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, bool backside, scalar_type NdotI, scalar_type NdotI2, scalar_type rcpOrientedEta, scalar_type rcpOrientedEta2)
{
this_t retval;
retval.I = I;
retval.N = N;
retval.backside = backside;
retval.NdotI = NdotI;
retval.NdotI2 = NdotI2;
retval.rcpOrientedEta = rcpOrientedEta;
retval.rcpOrientedEta2 = rcpOrientedEta2;
return retval;
}

static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type NdotI, scalar_type eta)
{
this_t retval;
retval.I = I;
retval.N = N;
scalar_type orientedEta;
retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, NdotI, eta);
retval.NdotI = NdotI;
retval.NdotI2 = NdotI * NdotI;
retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta;
return retval;
}

static this_t create(NBL_CONST_REF_ARG(vector_type) I, NBL_CONST_REF_ARG(vector_type) N, scalar_type eta)
{
this_t retval;
retval.I = I;
retval.N = N;
retval.NdotI = dot<vector_type>(N, I);
scalar_type orientedEta;
retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta);
retval.NdotI2 = retval.NdotI * retval.NdotI;
retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta;
return retval;
}

static scalar_type computeNdotT(bool backside, scalar_type NdotI2, scalar_type rcpOrientedEta2)
{
scalar_type NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2;
scalar_type absNdotT = sqrt<scalar_type>(NdotT2);
return backside ? absNdotT : -(absNdotT);
}

vector_type doRefract()
{
return N * (NdotI * rcpOrientedEta + computeNdotT(backside, NdotI2, rcpOrientedEta2)) - rcpOrientedEta * I;
}

static vector_type doReflectRefract(bool _refract, NBL_CONST_REF_ARG(vector_type) _I, NBL_CONST_REF_ARG(vector_type) _N, scalar_type _NdotI, scalar_type _NdotTorR, scalar_type _rcpOrientedEta)
{
return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f);
}

vector_type doReflectRefract(bool r)
{
const T NdotTorR = r ? computeNdotT(backside, NdotI2, rcpOrientedEta2) : NdotI;
return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta);
}

vector_type I;
vector_type N;
bool backside;
scalar_type NdotI;
scalar_type NdotI2;
scalar_type rcpOrientedEta;
scalar_type rcpOrientedEta2;
};

}

}
}

#endif
2 changes: 1 addition & 1 deletion include/nbl/builtin/hlsl/concepts/core.hlsl
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ template<typename T>
NBL_BOOL_CONCEPT UnsignedIntegralScalar = !nbl::hlsl::is_signed_v<T> && ::nbl::hlsl::is_integral_v<T> && nbl::hlsl::is_scalar_v<T>;

template<typename T>
NBL_BOOL_CONCEPT FloatingPointScalar = nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>;
NBL_BOOL_CONCEPT FloatingPointScalar = (nbl::hlsl::is_floating_point_v<T> && nbl::hlsl::is_scalar_v<T>);

template<typename T>
NBL_BOOL_CONCEPT BooleanScalar = concepts::Boolean<T> && nbl::hlsl::is_scalar_v<T>;
2 changes: 1 addition & 1 deletion include/nbl/builtin/hlsl/ieee754.hlsl
Original file line number Diff line number Diff line change
@@ -148,7 +148,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip =
using AsFloat = typename float_of_size<sizeof(FloatingPoint)>::type;
using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
const AsUint asUint = ieee754::impl::bitCastToUintType(val);
return bit_cast<FloatingPoint>(asUint ^ (flip ? ieee754::traits<AsFloat>::signMask : 0ull));
return bit_cast<FloatingPoint>(asUint ^ (flip ? ieee754::traits<AsFloat>::signMask : AsUint(0ull)));
}

}
226 changes: 23 additions & 203 deletions include/nbl/builtin/hlsl/math/functions.hlsl
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@

#include "nbl/builtin/hlsl/cpp_compat.hlsl"
#include "nbl/builtin/hlsl/numbers.hlsl"
#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
#include "nbl/builtin/hlsl/concepts/vector.hlsl"
#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
#include "nbl/builtin/hlsl/ieee754.hlsl"

@@ -38,7 +40,6 @@ struct lp_norm<T,0,false NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectoria
}
};

// TOOD: is this doing what it should be?
template<typename T> NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T> || concepts::IntVectorial<T>)
struct lp_norm<T,1,true NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T> || concepts::IntVectorial<T>) >
{
@@ -75,213 +76,20 @@ struct lp_norm<T,2,false NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectoria
return hlsl::sqrt<scalar_type>(__sum(v));
}
};

// TODO: even/odd cases
}

template<typename T, uint32_t LP NBL_FUNC_REQUIRES(LP>0)
template<typename T, uint32_t LP NBL_FUNC_REQUIRES((concepts::FloatingPointVector<T> || concepts::FloatingPointVectorial<T>) && LP>0)
scalar_type_t<T> lpNormPreroot(NBL_CONST_REF_ARG(T) v)
{
return impl::lp_norm<T,LP>::__sum(v);
}

template<typename T, uint32_t LP>
template<typename T, uint32_t LP NBL_FUNC_REQUIRES(concepts::FloatingPointVector<T> || concepts::FloatingPointVectorial<T>)
scalar_type_t<T> lpNorm(NBL_CONST_REF_ARG(T) v)
{
return impl::lp_norm<T,LP>::__call(v);
}

template <typename T NBL_FUNC_REQUIRES(concepts::Vectorial<T>)
T reflect(T I, T N, typename vector_traits<T>::scalar_type NdotI)
{
return N * 2.0f * NdotI - I;
}

namespace impl
{
template<typename T>
struct orientedEtas;

template<>
struct orientedEtas<float>
{
static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta)
{
const bool backside = NdotI < 0.0;
const float rcpEta = 1.0 / eta;
orientedEta = backside ? rcpEta : eta;
rcpOrientedEta = backside ? eta : rcpEta;
return backside;
}
};

template<>
struct orientedEtas<float32_t3>
{
static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta)
{
const bool backside = NdotI < 0.0;
const float32_t3 rcpEta = (float32_t3)1.0 / eta;
orientedEta = backside ? rcpEta:eta;
rcpOrientedEta = backside ? eta:rcpEta;
return backside;
}
};
}

template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>)
bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta)
{
return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta);
}


namespace impl
{

template<typename T NBL_STRUCT_CONSTRAINABLE>
struct refract;

template<typename T> NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial<T>)
struct refract<T NBL_PARTIAL_REQ_BOT(concepts::FloatingPointLikeVectorial<T>) >
{
using this_t = refract<T>;
using vector_type = T;
using scalar_type = typename vector_traits<T>::scalar_type;

static this_t create(vector_type I, vector_type N, bool backside, scalar_type NdotI, scalar_type NdotI2, scalar_type rcpOrientedEta, scalar_type rcpOrientedEta2)
{
this_t retval;
retval.I = I;
retval.N = N;
retval.backside = backside;
retval.NdotI = NdotI;
retval.NdotI2 = NdotI2;
retval.rcpOrientedEta = rcpOrientedEta;
retval.rcpOrientedEta2 = rcpOrientedEta2;
return retval;
}

static this_t create(vector_type I, vector_type N, scalar_type NdotI, scalar_type eta)
{
this_t retval;
retval.I = I;
retval.N = N;
T orientedEta;
retval.backside = getOrientedEtas<T>(orientedEta, retval.rcpOrientedEta, NdotI, eta);
retval.NdotI = NdotI;
retval.NdotI2 = NdotI * NdotI;
retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta;
return retval;
}

static this_t create(vector_type I, vector_type N, scalar_type eta)
{
this_t retval;
retval.I = I;
retval.N = N;
retval.NdotI = dot<vector_type>(N, I);
scalar_type orientedEta;
retval.backside = getOrientedEtas<scalar_type>(orientedEta, retval.rcpOrientedEta, retval.NdotI, eta);
retval.NdotI2 = retval.NdotI * retval.NdotI;
retval.rcpOrientedEta2 = retval.rcpOrientedEta * retval.rcpOrientedEta;
return retval;
}

scalar_type computeNdotT()
{
scalar_type NdotT2 = rcpOrientedEta2 * NdotI2 + 1.0 - rcpOrientedEta2;
scalar_type absNdotT = sqrt<scalar_type>(NdotT2);
return backside ? absNdotT : -(absNdotT);
}

vector_type doRefract()
{
return N * (NdotI * rcpOrientedEta + computeNdotT()) - rcpOrientedEta * I;
}

static vector_type doReflectRefract(bool _refract, vector_type _I, vector_type _N, scalar_type _NdotI, scalar_type _NdotTorR, scalar_type _rcpOrientedEta)
{
return _N * (_NdotI * (_refract ? _rcpOrientedEta : 1.0f) + _NdotTorR) - _I * (_refract ? _rcpOrientedEta : 1.0f);
}

vector_type doReflectRefract(bool r)
{
const scalar_type NdotTorR = r ? computeNdotT() : NdotI;
return doReflectRefract(r, I, N, NdotI, NdotTorR, rcpOrientedEta);
}

vector_type I;
vector_type N;
bool backside;
scalar_type NdotI;
scalar_type NdotI2;
scalar_type rcpOrientedEta;
scalar_type rcpOrientedEta2;
};
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T refract(T I, T N, bool backside,
typename vector_traits<T>::scalar_type NdotI,
typename vector_traits<T>::scalar_type NdotI2,
typename vector_traits<T>::scalar_type rcpOrientedEta,
typename vector_traits<T>::scalar_type rcpOrientedEta2)
{
impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2);
return r.doRefract();
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T refract(T I, T N, typename vector_traits<T>::scalar_type NdotI, typename vector_traits<T>::scalar_type eta)
{
impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta);
return r.doRefract();
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T refract(T I, T N, typename vector_traits<T>::scalar_type eta)
{
impl::refract<T> r = impl::refract<T>::create(I, N, eta);
return r.doRefract();
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
typename vector_traits<T>::scalar_type reflectRefract_computeNdotT(bool backside, typename vector_traits<T>::scalar_type NdotI2, typename vector_traits<T>::scalar_type rcpOrientedEta2)
{
impl::refract<T> r;
r.NdotI2 = NdotI2;
r.rcpOrientedEta2 = rcpOrientedEta2;
r.backside = backside;
return r.computeNdotT();
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T reflectRefract_impl(bool _refract, T _I, T _N,
typename vector_traits<T>::scalar_type _NdotI,
typename vector_traits<T>::scalar_type _NdotTorR,
typename vector_traits<T>::scalar_type _rcpOrientedEta)
{
return impl::refract<T>::doReflectRefract(_refract, _I, _N, _NdotI, _NdotTorR, _rcpOrientedEta);
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T reflectRefract(bool _refract, T I, T N, bool backside,
typename vector_traits<T>::scalar_type NdotI,
typename vector_traits<T>::scalar_type NdotI2,
typename vector_traits<T>::scalar_type rcpOrientedEta,
typename vector_traits<T>::scalar_type rcpOrientedEta2)
{
impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2);
return r.doReflectRefract(_refract);
}

template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<T>)
T reflectRefract(bool _refract, T I, T N, typename vector_traits<T>::scalar_type NdotI, typename vector_traits<T>::scalar_type eta)
{
impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta);
return r.doReflectRefract(_refract);
}

// valid only for `theta` in [-PI,PI]
template <typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T>)
@@ -292,13 +100,24 @@ void sincos(T theta, NBL_REF_ARG(T) s, NBL_REF_ARG(T) c)
s = ieee754::flipSign(s, theta < T(NBL_FP64_LITERAL(0.0)));
}

template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
matrix<T, 2, 3> frisvad(vector<T, 3> n)
template <typename T NBL_FUNC_REQUIRES(vector_traits<T>::Dimension == 3)
void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent)
{
const T a = T(NBL_FP64_LITERAL(1.0)) / (T(NBL_FP64_LITERAL(1.0)) + n.z);
const T b = -n.x * n.y * a;
return (n.z < -T(NBL_FP64_LITERAL(0.9999999))) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) :
matrix<T, 2, 3>(vector<T, 3>(T(NBL_FP64_LITERAL(1.0))-n.x*n.x*a, b, -n.x), vector<T, 3>(b, T(NBL_FP64_LITERAL(1.0))-n.y*n.y*a, -n.y));
using scalar_t = typename vector_traits<T>::scalar_type;
const scalar_t unit = _static_cast<scalar_t>(1);

const scalar_t a = unit / (unit + normal.z);
const scalar_t b = -normal.x * normal.y * a;
if (normal.z < -_static_cast<scalar_t>(0.9999999))
{
tangent = T(0.0,-1.0,0.0);
bitangent = T(-1.0,0.0,0.0);
}
else
{
tangent = T(unit - normal.x * normal.x * a, b, -normal.x);
bitangent = T(b, unit - normal.y * normal.y * a, -normal.y);
}
}

bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb)
@@ -319,6 +138,7 @@ bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(fl
return pickRight;
}


namespace impl
{
template <typename T NBL_STRUCT_CONSTRAINABLE>
@@ -394,7 +214,7 @@ struct trigonometry
const bool ABltC = cosSumAB < tmp2;
// apply triple angle formula
const float absArccosSumABC = acos<float>(clamp<float>(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f, 1.f));
return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + (AltminusB | ABltminusC ? numbers::pi<float> : (-numbers::pi<float>));
return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<float> : (-numbers::pi<float>));
}

static void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1)
234 changes: 171 additions & 63 deletions include/nbl/builtin/hlsl/tgmath/impl.hlsl
Original file line number Diff line number Diff line change
@@ -50,26 +50,26 @@ template<typename T NBL_STRUCT_CONSTRAINABLE>
struct sin_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct acos_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct tan_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct asin_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atan_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct sinh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct cosh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct tanh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct asinh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct acosh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atanh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atan2_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct tan_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct asin_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atan_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct sinh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct cosh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct tanh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct asinh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct acosh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atanh_helper;
template<typename T NBL_STRUCT_CONSTRAINABLE>
struct atan2_helper;

template<typename T NBL_STRUCT_CONSTRAINABLE>
struct sqrt_helper;
@@ -115,15 +115,15 @@ struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_R
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, (T), (T)(T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, sAbs, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, fAbs, (T), (T), T)
@@ -146,7 +146,7 @@ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(frexpStruct_helper, fre
#define ISINF_AND_ISNAN_RETURN_TYPE conditional_t<is_vector_v<T>, vector<bool, vector_traits<T>::Dimension>, bool>
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isinf_helper, isInf, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(isnan_helper, isNan, (T), (T), ISINF_AND_ISNAN_RETURN_TYPE)
#undef ISINF_AND_ISNAN_RETURN_TYPE
#undef ISINF_AND_ISNAN_RETURN_TYPE

#undef DECLVAL
#undef DECL_ARG
@@ -206,6 +206,7 @@ struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScala
}
};


#else // C++ only specializations

#define DECL_ARG(r,data,i,_T) BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i,0)) const _T arg##i
@@ -226,16 +227,16 @@ struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST)>\

template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cos_helper, cos, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sin_helper, sin, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tan_helper, tan, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asin_helper, asin, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acos_helper, acos, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan_helper, atan, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sinh_helper, sinh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(cosh_helper, cosh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(tanh_helper, tanh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(asinh_helper, asinh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(acosh_helper, acosh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atanh_helper, atanh, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(atan2_helper, atan2, concepts::FloatingPointScalar<T>, (T), (T)(T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sqrt_helper, sqrt, concepts::FloatingPointScalar<T>, (T), (T), T)
template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(abs_helper, abs, concepts::Scalar<T>, (T), (T), T)
@@ -283,11 +284,11 @@ requires concepts::FloatingPointScalar<T>
struct isinf_helper<T>
{
using return_t = bool;
static inline return_t __call(const T arg)
static inline return_t __call(const T arg)
{
// GCC and Clang will always return false with call to std::isinf when fast math is enabled,
// this implementation will always return appropriate output regardless is fast math is enabled or not
using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
// GCC and Clang will always return false with call to std::isinf when fast math is enabled,
// this implementation will always return appropriate output regardless is fast math is enabled or not
using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
return cpp_compat_intrinsics_impl::isinf_uint_impl(reinterpret_cast<const AsUint&>(arg));
}
};
@@ -297,7 +298,7 @@ requires concepts::FloatingPointScalar<T>
struct isnan_helper<T>
{
using return_t = bool;
static inline return_t __call(const T arg)
static inline return_t __call(const T arg)
{
// GCC and Clang will always return false with call to std::isnan when fast math is enabled,
// this implementation will always return appropriate output regardless is fast math is enabled or not
@@ -324,13 +325,13 @@ struct roundEven_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPoin
static FloatingPoint __call(NBL_CONST_REF_ARG(FloatingPoint) x)
{
// TODO: no way this is optimal, find a better implementation
float tmp;
if (std::abs(std::modf(x, &tmp)) == 0.5f)
{
int32_t result = static_cast<int32_t>(x);
if (result % 2 != 0)
result >= 0 ? ++result : --result;
return result;
float tmp;
if (std::abs(std::modf(x, &tmp)) == 0.5f)
{
int32_t result = static_cast<int32_t>(x);
if (result % 2 != 0)
result >= 0 ? ++result : --result;
return result;
}

return std::round(x);
@@ -389,6 +390,27 @@ struct frexpStruct_helper<T>

// C++ and HLSL specializations

template<>
struct erf_helper<float16_t>
{
static float16_t __call(float16_t _x)
{
// A&S approximation to 2.5x10-5
const float16_t a1 = float16_t(0.3480242f);
const float16_t a2 = float16_t(-0.0958798f);
const float16_t a3 = float16_t(0.7478556f);
const float16_t p = float16_t(0.47047f);

float16_t _sign = float16_t(sign<float16_t>(_x));
float16_t x = abs_helper<float16_t>::__call(_x);

float16_t t = float16_t(1.f) / (float16_t(1.f) + p * x);
float16_t y = float16_t(1.f) - (((a3 * t + a2) * t) + a1) * t * exp_helper<float16_t>::__call(-x * x);

return _sign * y;
}
};

template<typename FloatingPoint>
NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>)
struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) >
@@ -429,6 +451,92 @@ struct erfInv_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointSc
}
};

// log doesn't accept float64_t
// template<>
// struct erfInv_helper<float64_t>
// {
// static float64_t __call(NBL_CONST_REF_ARG(float64_t) _x)
// {
// float64_t x = clamp<float64_t>(_x, NBL_FP64_LITERAL(-0.99999), NBL_FP64_LITERAL(0.99999));

// float64_t w = -log_helper<float64_t>::__call((NBL_FP64_LITERAL(1.0) - x) * (NBL_FP64_LITERAL(1.0) + x));
// float64_t p;
// if (w < 6.250000)
// {
// w -= NBL_FP64_LITERAL(3.125000);
// p = NBL_FP64_LITERAL(-3.6444120640178196996e-21);
// p = NBL_FP64_LITERAL(-1.685059138182016589e-19) + p * w;
// p = NBL_FP64_LITERAL(1.2858480715256400167e-18) + p * w;
// p = NBL_FP64_LITERAL(1.115787767802518096e-17) + p * w;
// p = NBL_FP64_LITERAL(-1.333171662854620906e-16) + p * w;
// p = NBL_FP64_LITERAL(2.0972767875968561637e-17) + p * w;
// p = NBL_FP64_LITERAL(6.6376381343583238325e-15) + p * w;
// p = NBL_FP64_LITERAL(-4.0545662729752068639e-14) + p * w;
// p = NBL_FP64_LITERAL(-8.1519341976054721522e-14) + p * w;
// p = NBL_FP64_LITERAL(2.6335093153082322977e-12) + p * w;
// p = NBL_FP64_LITERAL(-1.2975133253453532498e-11) + p * w;
// p = NBL_FP64_LITERAL(-5.4154120542946279317e-11) + p * w;
// p = NBL_FP64_LITERAL(1.051212273321532285e-09) + p * w;
// p = NBL_FP64_LITERAL(-4.1126339803469836976e-09) + p * w;
// p = NBL_FP64_LITERAL(-2.9070369957882005086e-08) + p * w;
// p = NBL_FP64_LITERAL(4.2347877827932403518e-07) + p * w;
// p = NBL_FP64_LITERAL(-1.3654692000834678645e-06) + p * w;
// p = NBL_FP64_LITERAL(-1.3882523362786468719e-05) + p * w;
// p = NBL_FP64_LITERAL(0.0001867342080340571352) + p * w;
// p = NBL_FP64_LITERAL(-0.00074070253416626697512) + p * w;
// p = NBL_FP64_LITERAL(-0.0060336708714301490533) + p * w;
// p = NBL_FP64_LITERAL(0.24015818242558961693) + p * w;
// p = NBL_FP64_LITERAL(1.6536545626831027356) + p * w;
// }
// else if (w < 16.000000)
// {
// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(3.250000);
// p = NBL_FP64_LITERAL(2.2137376921775787049e-09);
// p = NBL_FP64_LITERAL(9.0756561938885390979e-08) + p * w;
// p = NBL_FP64_LITERAL(-2.7517406297064545428e-07) + p * w;
// p = NBL_FP64_LITERAL(1.8239629214389227755e-08) + p * w;
// p = NBL_FP64_LITERAL(1.5027403968909827627e-06) + p * w;
// p = NBL_FP64_LITERAL(-4.013867526981545969e-06) + p * w;
// p = NBL_FP64_LITERAL(2.9234449089955446044e-06) + p * w;
// p = NBL_FP64_LITERAL(1.2475304481671778723e-05) + p * w;
// p = NBL_FP64_LITERAL(-4.7318229009055733981e-05) + p * w;
// p = NBL_FP64_LITERAL(6.8284851459573175448e-05) + p * w;
// p = NBL_FP64_LITERAL(2.4031110387097893999e-05) + p * w;
// p = NBL_FP64_LITERAL(-0.0003550375203628474796) + p * w;
// p = NBL_FP64_LITERAL(0.00095328937973738049703) + p * w;
// p = NBL_FP64_LITERAL(-0.0016882755560235047313) + p * w;
// p = NBL_FP64_LITERAL(0.0024914420961078508066) + p * w;
// p = NBL_FP64_LITERAL(-0.0037512085075692412107) + p * w;
// p = NBL_FP64_LITERAL(0.005370914553590063617) + p * w;
// p = NBL_FP64_LITERAL(1.0052589676941592334) + p * w;
// p = NBL_FP64_LITERAL(3.0838856104922207635) + p * w;
// }
// else
// {
// w = sqrt_helper<float64_t>::__call(w) - NBL_FP64_LITERAL(5.000000);
// p = NBL_FP64_LITERAL(-2.7109920616438573243e-11);
// p = NBL_FP64_LITERAL(-2.5556418169965252055e-10) + p * w;
// p = NBL_FP64_LITERAL(1.5076572693500548083e-09) + p * w;
// p = NBL_FP64_LITERAL(-3.7894654401267369937e-09) + p * w;
// p = NBL_FP64_LITERAL(7.6157012080783393804e-09) + p * w;
// p = NBL_FP64_LITERAL(-1.4960026627149240478e-08) + p * w;
// p = NBL_FP64_LITERAL(2.9147953450901080826e-08) + p * w;
// p = NBL_FP64_LITERAL(-6.7711997758452339498e-08) + p * w;
// p = NBL_FP64_LITERAL(2.2900482228026654717e-07) + p * w;
// p = NBL_FP64_LITERAL(-9.9298272942317002539e-07) + p * w;
// p = NBL_FP64_LITERAL(4.5260625972231537039e-06) + p * w;
// p = NBL_FP64_LITERAL(-1.9681778105531670567e-05) + p * w;
// p = NBL_FP64_LITERAL(7.5995277030017761139e-05) + p * w;
// p = NBL_FP64_LITERAL(-0.00021503011930044477347) + p * w;
// p = NBL_FP64_LITERAL(-0.00013871931833623122026) + p * w;
// p = NBL_FP64_LITERAL(1.0103004648645343977) + p * w;
// p = NBL_FP64_LITERAL(4.8499064014085844221) + p * w;
// }

// return p * x;
// }
// };

#ifdef __HLSL_VERSION
// SPIR-V already defines specializations for builtin vector types
#define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial<T> && !is_vector_v<T>
@@ -471,14 +579,14 @@ AUTO_SPECIALIZE_HELPER_FOR_VECTOR(isnan_helper, VECTOR_SPECIALIZATION_CONCEPT, I
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cos_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sin_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acos_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tan_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asin_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atan_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(sinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(cosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(tanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(asinh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(acosh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(atanh_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(modf_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
AUTO_SPECIALIZE_HELPER_FOR_VECTOR(round_helper, VECTOR_SPECIALIZATION_CONCEPT, T)
@@ -501,11 +609,11 @@ struct pow_helper<T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT) >
using traits = hlsl::vector_traits<T>;
array_get<T, typename traits::scalar_type> getter;
array_set<T, typename traits::scalar_type> setter;

return_t output;
for (uint32_t i = 0; i < traits::Dimension; ++i)
setter(output, i, pow_helper<typename traits::scalar_type>::__call(getter(x, i), getter(y, i)));

return output;
}
};
@@ -636,4 +744,4 @@ struct atan2_helper<T NBL_PARTIAL_REQ_BOT(VECTOR_SPECIALIZATION_CONCEPT) >
}
}

#endif
#endif