Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/nbl/asset/IAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
PREFER_FAST_BUILD_BIT = 0x1u<<3u,
LOW_MEMORY_BIT = 0x1u<<4u,
// Synthetic flag we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u,
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u, // this flag really shouldn't be settable outside of `video::IGPU`
// Provided by VK_NV_ray_tracing_motion_blur, but we always override and deduce from creation flag because of
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-dstAccelerationStructure-04927
//MOTION_BIT = 0x1u<<5u,
Expand Down
26 changes: 13 additions & 13 deletions include/nbl/asset/ICPUAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
return {};
}
inline std::span<const uint32_t> getGeometryPrimitiveCounts(const size_t geomIx) const
inline std::span<const uint32_t> getGeometryPrimitiveCounts() const
{
if (m_geometryPrimitiveCount)
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
Expand Down Expand Up @@ -79,25 +79,25 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
{
if (!isMutable())
return false;
m_buildFlags &= BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
m_buildFlags &= ~BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
m_geometryPrimitiveCount = std::move(ranges);
m_triangleGeoms = std::move(geometries);
m_AABBGeoms = nullptr;
return true;
}

//
inline core::SRange<AABBs<asset::ICPUBuffer>> getAABBGeometries()
inline std::span<AABBs<asset::ICPUBuffer>> getAABBGeometries()
{
if (!isMutable() || !m_AABBGeoms)
return {nullptr,nullptr};
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
}
inline core::SRange<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
inline std::span<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
{
if (!m_AABBGeoms)
return {nullptr,nullptr};
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
}
inline bool setGeometries(core::smart_refctd_dynamic_array<AABBs<ICPUBuffer>>&& geometries, core::smart_refctd_dynamic_array<uint32_t>&& ranges)
{
Expand Down Expand Up @@ -337,17 +337,17 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
std::variant<StaticInstance,MatrixMotionInstance,SRTMotionInstance> instance = StaticInstance{};
};

core::SRange<PolymorphicInstance> getInstances()
std::span<PolymorphicInstance> getInstances()
{
if (!isMutable() || !m_instances)
return {nullptr,nullptr};
return {m_instances->begin(),m_instances->end()};
return {};
return {m_instances->data(),m_instances->size()};
}
core::SRange<const PolymorphicInstance> getInstances() const
std::span<const PolymorphicInstance> getInstances() const
{
if (!m_instances)
return {nullptr,nullptr};
return {m_instances->begin(),m_instances->end()};
return {};
return {m_instances->data(),m_instances->size()};
}
bool setInstances(core::smart_refctd_dynamic_array<PolymorphicInstance>&& _instances)
{
Expand All @@ -367,7 +367,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
}

//!
constexpr static inline auto AssetType = ET_BOTOM_LEVEL_ACCELERATION_STRUCTURE;
constexpr static inline auto AssetType = ET_TOP_LEVEL_ACCELERATION_STRUCTURE;
inline IAsset::E_TYPE getAssetType() const override { return AssetType; }

inline core::smart_refctd_ptr<IAsset> clone(uint32_t _depth = ~0u) const override
Expand Down
6 changes: 3 additions & 3 deletions include/nbl/video/ILogicalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
// Create a sampler object to use with ImageViews
virtual core::smart_refctd_ptr<IGPUSampler> createSampler(const IGPUSampler::SParams& _params) = 0;
// acceleration structures
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUAccelerationStructure::SCreationParams&& params)
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUBottomLevelAccelerationStructure::SCreationParams&& params)
{
if (invalidCreationParams(params))
{
Expand Down Expand Up @@ -402,7 +402,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
inline AccelerationStructureBuildSizes getAccelerationStructureBuildSizes(
const core::bitflag<IGPUBottomLevelAccelerationStructure::BUILD_FLAGS> flags,
const bool motionBlur,
const std::span<Geometry> geometries,
const std::span<const Geometry> geometries,
const uint32_t* const pMaxPrimitiveCounts
) const
{
Expand All @@ -412,7 +412,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
return {};
}

if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags, m_enabledFeatures))
if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags,m_enabledFeatures))
{
NBL_LOG_ERROR("Invalid build flags");
return {};
Expand Down
127 changes: 122 additions & 5 deletions include/nbl/video/utilities/CAssetConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
asset::ICPUSampler,
asset::ICPUShader,
asset::ICPUBuffer,
// acceleration structures,
asset::ICPUBottomLevelAccelerationStructure,
asset::ICPUTopLevelAccelerationStructure,
asset::ICPUImage,
asset::ICPUBufferView,
asset::ICPUImageView,
Expand Down Expand Up @@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
{
if (!params.valid())
return nullptr;
#ifndef _NBL_DEBUG
if (!params.optimizer)
{
using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
// shall we do others?
params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
}
#endif
Comment on lines +75 to +82
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevyuu we should move this to your ISPIRVDebloater (or trimmer as I'd like to call it) and make it an option to not run the SPIR-V optimizer multiple times for no reason

return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter(std::move(params)),core::dont_grab);
}
// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
Expand Down Expand Up @@ -149,6 +158,75 @@ class CAssetConverter : public core::IReferenceCounted
return {true,retval};
}
};
struct NBL_API2 acceleration_structure_patch_base
{
public:
enum class BuildPreference : uint8_t
{
None = 0,
FastTrace = 1,
FastBuild = 2,
Invalid = 3
};

//! select build flags
uint8_t allowUpdate : 1 = false;
uint8_t allowCompaction : 1 = false;
uint8_t allowDataAccess : 1 = false;
BuildPreference preference : 2 = BuildPreference::Invalid;
uint8_t lowMemory : 1 = false;
//! things that control the build
uint8_t hostBuild : 1 = false;
Comment thread
devshgraphicsprogramming marked this conversation as resolved.
uint8_t compactAfterBuild : 1 = false;

protected:
bool valid(const ILogicalDevice* device);

template<typename CRTP>
std::pair<bool,CRTP> combine_impl(const CRTP& _this, const CRTP& other) const
{
if (_this.preference!=other.preference || _this.preference==BuildPreference::Invalid)
return {false,_this};
CRTP retval = _this;
retval.allowUpdate |= other.allowUpdate;
retval.allowCompaction |= other.allowCompaction;
retval.allowDataAccess |= other.allowDataAccess;
retval.lowMemory |= other.lowMemory;
retval.hostBuild |= other.hostBuild;
retval.compactAfterBuild |= other.compactAfterBuild;
return {true,retval};
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
{
public:
PATCH_IMPL_BOILERPLATE(asset::ICPUBottomLevelAccelerationStructure);

using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUBottomLevelAccelerationStructure* blas) const;

protected:
inline std::pair<bool,this_t> combine(const this_t& other) const
{
return combine_impl<this_t>(*this,other);
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
{
public:
PATCH_IMPL_BOILERPLATE(asset::ICPUTopLevelAccelerationStructure);

using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUTopLevelAccelerationStructure* tlas) const;

protected:
inline std::pair<bool,this_t> combine(const this_t& other) const
{
return combine_impl<this_t>(*this,other);
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUImage>
{
Expand Down Expand Up @@ -458,6 +536,8 @@ class CAssetConverter : public core::IReferenceCounted
virtual const patch_t<asset::ICPUSampler>* operator()(const lookup_t<asset::ICPUSampler>&) const = 0;
virtual const patch_t<asset::ICPUShader>* operator()(const lookup_t<asset::ICPUShader>&) const = 0;
virtual const patch_t<asset::ICPUBuffer>* operator()(const lookup_t<asset::ICPUBuffer>&) const = 0;
virtual const patch_t<asset::ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
virtual const patch_t<asset::ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUTopLevelAccelerationStructure>&) const = 0;
virtual const patch_t<asset::ICPUImage>* operator()(const lookup_t<asset::ICPUImage>&) const = 0;
virtual const patch_t<asset::ICPUBufferView>* operator()(const lookup_t<asset::ICPUBufferView>&) const = 0;
virtual const patch_t<asset::ICPUImageView>* operator()(const lookup_t<asset::ICPUImageView>&) const = 0;
Expand Down Expand Up @@ -577,6 +657,8 @@ class CAssetConverter : public core::IReferenceCounted
bool operator()(lookup_t<asset::ICPUSampler>);
bool operator()(lookup_t<asset::ICPUShader>);
bool operator()(lookup_t<asset::ICPUBuffer>);
bool operator()(lookup_t<asset::ICPUBottomLevelAccelerationStructure>);
bool operator()(lookup_t<asset::ICPUTopLevelAccelerationStructure>);
bool operator()(lookup_t<asset::ICPUImage>);
bool operator()(lookup_t<asset::ICPUBufferView>);
bool operator()(lookup_t<asset::ICPUImageView>);
Expand Down Expand Up @@ -717,6 +799,16 @@ class CAssetConverter : public core::IReferenceCounted
return {};
}

// this a weird signature, but its for an acceleration structure backing IGPUBuffer
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBottomLevelAccelerationStructure* blas, const patch_t<asset::ICPUBottomLevelAccelerationStructure>& patch) const
{
return {};
}
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUTopLevelAccelerationStructure* tlas, const patch_t<asset::ICPUTopLevelAccelerationStructure>& patch) const
{
return {};
}

virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const patch_t<asset::ICPUImage>& patch) const
{
return {};
Expand Down Expand Up @@ -793,6 +885,7 @@ class CAssetConverter : public core::IReferenceCounted
{
// By default the last to queue to touch a GPU object will own it after any transfer or compute operations are complete.
// If you want to record a pipeline barrier that will release ownership to another family, override this.
// The overload for the IGPUBuffer may be called with a hash belonging to a Acceleration Structure, this means that its the storage buffer backing the AS
virtual inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom)
{
return IQueue::FamilyIgnored;
Expand Down Expand Up @@ -829,6 +922,10 @@ class CAssetConverter : public core::IReferenceCounted
IUtilities* utilities = nullptr;
// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
// specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
CAsyncSingleBufferSubAllocatorST<>* scratchForASBuild = nullptr;
//
IDeviceMemoryAllocator* compactedASAllocator = nullptr;
// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
uint32_t sampledImageBindingCount = 1<<10;
uint32_t storageImageBindingCount = 11<<10;
Expand All @@ -853,6 +950,13 @@ class CAssetConverter : public core::IReferenceCounted
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags() const {return m_queueFlags;}

// just enough memory to build the Acceleration Structures one by one waiting for each build to complete inbetween
inline uint64_t getMinASBuildScratchSize() const {return m_minASBuildScratchSize;}
// enough memory to build and compact the all Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
inline uint64_t getMaxASBuildScratchSize() const {return m_maxASBuildScratchSize;}
// if returns NONE means there are no acceleration structures to build
inline auto getASBuildScratchUsages() const {return m_ASBuildScratchUsages;}

//
inline operator bool() const {return bool(m_converter);}

Expand Down Expand Up @@ -917,19 +1021,32 @@ class CAssetConverter : public core::IReferenceCounted
core::smart_refctd_ptr<const AssetType> canonical;
// gpu object to transfer canonical's data to or build it from
asset_traits<AssetType>::video_t* gpuObj;
// only relevant for images
uint16_t recomputeMips = 0;
union
{
// only relevant for images
uint16_t recomputeMips = 0;
//
struct ASBuildParams
{
uint8_t host : 1;
uint8_t compact : 1;
} asBuildParams;
};
};
template<asset::Asset AssetType>
using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
using convertible_asset_types = core::type_list<
asset::ICPUBuffer,
asset::ICPUImage/*,
asset::ICPUImage,
asset::ICPUBottomLevelAccelerationStructure,
asset::ICPUTopLevelAccelerationStructure*/
asset::ICPUTopLevelAccelerationStructure
>;
core::tuple_transform_t<conversion_requests_t,convertible_asset_types> m_conversionRequests;

//
uint64_t m_minASBuildScratchSize = 0;
uint64_t m_maxASBuildScratchSize = 0;
core::bitflag<IGPUBuffer::E_USAGE_FLAGS> m_ASBuildScratchUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_NONE;
//
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;
};
Expand Down
Loading