diff --git a/CHANGELOG.md b/CHANGELOG.md index d6939c18cc..b0207be6c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,10 @@ Naga now infers the correct binding layout when a resource appears only in an as - Use highest SPIR-V version supported by Vulkan API version. By @robamler in [#7595](https://github.com/gfx-rs/wgpu/pull/7595) +#### Metal + +- Implements ray-tracing acceleration structures for metal backend. By @lichtso in [#7660](https://github.com/gfx-rs/wgpu/pull/7660) + ### Bug Fixes #### Naga diff --git a/examples/features/src/ray_cube_fragment/mod.rs b/examples/features/src/ray_cube_fragment/mod.rs index 96be61c3b7..d0c7bb083d 100644 --- a/examples/features/src/ray_cube_fragment/mod.rs +++ b/examples/features/src/ray_cube_fragment/mod.rs @@ -1,7 +1,7 @@ use bytemuck::{Pod, Zeroable}; use glam::{Mat4, Quat, Vec3}; use std::ops::IndexMut; -use std::{borrow::Cow, future::Future, iter, mem, pin::Pin, task, time::Instant}; +use std::{borrow::Cow, future::Future, iter, mem, pin::Pin, task}; use wgpu::util::DeviceExt; // from cube @@ -97,11 +97,8 @@ impl>> Future for ErrorFuture { struct Example { uniforms: Uniforms, uniform_buf: wgpu::Buffer, - blas: wgpu::Blas, - tlas_package: wgpu::TlasPackage, pipeline: wgpu::RenderPipeline, bind_group: wgpu::BindGroup, - start_inst: Instant, } impl crate::framework::Example for Example { @@ -222,22 +219,46 @@ impl crate::framework::Example for Example { let bind_group_layout = pipeline.get_bind_group_layout(0); - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: None, - layout: &bind_group_layout, - entries: &[ - wgpu::BindGroupEntry { - binding: 0, - resource: uniform_buf.as_entire_binding(), - }, - wgpu::BindGroupEntry { - binding: 1, - resource: wgpu::BindingResource::AccelerationStructure(&tlas), - }, - ], - }); + let mut tlas_package = wgpu::TlasPackage::new(tlas); - let tlas_package = wgpu::TlasPackage::new(tlas); + // scene update + { + let dist = 12.0; + + let side_count = 8; + + let anim_time = 0.0; + + for x in 0..side_count { + for y in 0..side_count { + let instance = tlas_package.index_mut((x + y * side_count) as usize); + + let x = x as f32 / (side_count - 1) as f32; + let y = y as f32 / (side_count - 1) as f32; + let x = x * 2.0 - 1.0; + let y = y * 2.0 - 1.0; + + let transform = Mat4::from_rotation_translation( + Quat::from_euler( + glam::EulerRot::XYZ, + anim_time * 0.5 * 0.342, + anim_time * 0.5 * 0.254, + anim_time * 0.5 * 0.832, + ), + Vec3 { + x: x * dist, + y: y * dist, + z: -24.0, + }, + ); + let transform = transform.transpose().to_cols_array()[..12] + .try_into() + .unwrap(); + + *instance = Some(wgpu::TlasInstance::new(&blas, transform, 0, 0xff)); + } + } + } let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); @@ -264,16 +285,26 @@ impl crate::framework::Example for Example { queue.submit(Some(encoder.finish())); - let start_inst = Instant::now(); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: uniform_buf.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::AccelerationStructure(tlas_package.tlas()), + }, + ], + }); Example { uniforms, uniform_buf, - blas, - tlas_package, pipeline, bind_group, - start_inst, } } @@ -300,50 +331,9 @@ impl crate::framework::Example for Example { fn render(&mut self, view: &wgpu::TextureView, device: &wgpu::Device, queue: &wgpu::Queue) { device.push_error_scope(wgpu::ErrorFilter::Validation); - // scene update - { - let dist = 12.0; - - let side_count = 8; - - let anim_time = self.start_inst.elapsed().as_secs_f64() as f32; - - for x in 0..side_count { - for y in 0..side_count { - let instance = self.tlas_package.index_mut((x + y * side_count) as usize); - - let x = x as f32 / (side_count - 1) as f32; - let y = y as f32 / (side_count - 1) as f32; - let x = x * 2.0 - 1.0; - let y = y * 2.0 - 1.0; - - let transform = Mat4::from_rotation_translation( - Quat::from_euler( - glam::EulerRot::XYZ, - anim_time * 0.5 * 0.342, - anim_time * 0.5 * 0.254, - anim_time * 0.5 * 0.832, - ), - Vec3 { - x: x * dist, - y: y * dist, - z: -24.0, - }, - ); - let transform = transform.transpose().to_cols_array()[..12] - .try_into() - .unwrap(); - - *instance = Some(wgpu::TlasInstance::new(&self.blas, transform, 0, 0xff)); - } - } - } - let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - encoder.build_acceleration_structures(iter::empty(), iter::once(&self.tlas_package)); - { let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: None, diff --git a/examples/features/src/ray_shadows/mod.rs b/examples/features/src/ray_shadows/mod.rs index 2054632aef..84cd5bdc68 100644 --- a/examples/features/src/ray_shadows/mod.rs +++ b/examples/features/src/ray_shadows/mod.rs @@ -116,7 +116,7 @@ impl crate::framework::Example for Example { fn required_limits() -> wgpu::Limits { wgpu::Limits { - max_push_constant_size: 12, + max_push_constant_size: 16, ..wgpu::Limits::default() } } @@ -209,7 +209,7 @@ impl crate::framework::Example for Example { bind_group_layouts: &[&bind_group_layout], push_constant_ranges: &[wgpu::PushConstantRange { stages: wgpu::ShaderStages::FRAGMENT, - range: 0..12, + range: 0..16, }], }); diff --git a/examples/features/src/ray_shadows/shader.wgsl b/examples/features/src/ray_shadows/shader.wgsl index 4ba5d42f79..3e8e8d0c3e 100644 --- a/examples/features/src/ray_shadows/shader.wgsl +++ b/examples/features/src/ray_shadows/shader.wgsl @@ -35,6 +35,7 @@ var acc_struct: acceleration_structure; struct PushConstants { light: vec3, + padding: f32, } var pc: PushConstants; diff --git a/examples/features/src/ray_traced_triangle/mod.rs b/examples/features/src/ray_traced_triangle/mod.rs index 85e1f88518..23bb61fd20 100644 --- a/examples/features/src/ray_traced_triangle/mod.rs +++ b/examples/features/src/ray_traced_triangle/mod.rs @@ -120,7 +120,7 @@ impl crate::framework::Example for Example { }); let index_buffer = device.create_buffer_init(&BufferInitDescriptor { - label: Some("vertex buffer"), + label: Some("index buffer"), contents: bytemuck::cast_slice(&indices), usage: BufferUsages::BLAS_INPUT, }); diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index a5386934ed..e17700f8ac 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -280,7 +280,7 @@ impl Global { tlas, entries: hal::AccelerationStructureEntries::Instances( hal::AccelerationStructureInstances { - buffer: Some(instance_buffer), + buffer: instance_buffer, offset: 0, count: entry.instance_count, }, @@ -584,6 +584,20 @@ impl Global { dependencies.push(blas.clone()); } + let blases = dependencies + .iter() + .map(|blas| blas.try_raw(&snatch_guard).unwrap()) + .collect::>(); + let destination_acceleration_structure = tlas.try_raw(&snatch_guard)?; + #[allow(mutable_transmutes)] + let destination_acceleration_structure = unsafe { + core::mem::transmute::< + &dyn hal::DynAccelerationStructure, + &mut dyn hal::DynAccelerationStructure, + >(destination_acceleration_structure) + }; + destination_acceleration_structure.set_blases(&blases); + build_command.tlas_s_built.push(TlasBuild { tlas: tlas.clone(), dependencies, @@ -602,7 +616,7 @@ impl Global { tlas: tlas.clone(), entries: hal::AccelerationStructureEntries::Instances( hal::AccelerationStructureInstances { - buffer: Some(tlas.instance_buffer.as_ref()), + buffer: tlas.instance_buffer.as_ref(), offset: 0, count: instance_count, }, @@ -1141,7 +1155,7 @@ fn iter_buffers<'a, 'b>( }; let triangles = hal::AccelerationStructureTriangles { - vertex_buffer: Some(vertex_buffer), + vertex_buffer, vertex_format: mesh.size.vertex_format, first_vertex: mesh.first_vertex, vertex_count: mesh.size.vertex_count, @@ -1150,7 +1164,7 @@ fn iter_buffers<'a, 'b>( let index_stride = mesh.size.index_format.unwrap().byte_size() as u32; hal::AccelerationStructureTriangleIndices:: { format: mesh.size.index_format.unwrap(), - buffer: Some(index_buffer), + buffer: index_buffer, offset: mesh.first_index.unwrap() * index_stride, count: mesh.size.index_count.unwrap(), } diff --git a/wgpu-core/src/device/ray_tracing.rs b/wgpu-core/src/device/ray_tracing.rs index 76c94f948a..8856e50602 100644 --- a/wgpu-core/src/device/ray_tracing.rs +++ b/wgpu-core/src/device/ray_tracing.rs @@ -50,7 +50,7 @@ impl Device { dyn hal::DynBuffer, > { format: desc.index_format.unwrap(), - buffer: None, + buffer: self.zero_buffer.as_ref(), offset: 0, count, }); @@ -78,7 +78,7 @@ impl Device { } entries.push(hal::AccelerationStructureTriangles:: { - vertex_buffer: None, + vertex_buffer: self.zero_buffer.as_ref(), vertex_format: desc.vertex_format, first_vertex: 0, vertex_count: desc.vertex_count, @@ -158,7 +158,7 @@ impl Device { &hal::GetAccelerationStructureBuildSizesDescriptor { entries: &hal::AccelerationStructureEntries::Instances( hal::AccelerationStructureInstances { - buffer: None, + buffer: self.zero_buffer.as_ref(), offset: 0, count: desc.max_instances, }, diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index bc24b0cfb3..10b9005326 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -75,6 +75,7 @@ metal = [ "naga/msl-out", "dep:arrayvec", "dep:block", + "dep:bytemuck", "dep:core-graphics-types", "dep:hashbrown", "dep:libc", diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index a8d3a77b91..7c27518973 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -473,15 +473,15 @@ impl Example { }; let blas_triangles = vec![hal::AccelerationStructureTriangles { - vertex_buffer: Some(&vertices_buffer), + vertex_buffer: &vertices_buffer, first_vertex: 0, vertex_format: wgpu_types::VertexFormat::Float32x3, // each vertex is 3 floats, and floats are stored raw in the array vertex_count: vertices.len() as u32 / 3, vertex_stride: 3 * 4, - indices: indices_buffer.as_ref().map(|(buf, len)| { + indices: indices_buffer.as_ref().map(|(buffer, len)| { hal::AccelerationStructureTriangleIndices { - buffer: Some(buf), + buffer, format: wgpu_types::IndexFormat::Uint32, offset: 0, count: *len as u32, @@ -493,13 +493,6 @@ impl Example { }]; let blas_entries = hal::AccelerationStructureEntries::Triangles(blas_triangles); - let mut tlas_entries = - hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { - buffer: None, - count: 3, - offset: 0, - }); - let blas_sizes = unsafe { device.get_acceleration_structure_build_sizes( &hal::GetAccelerationStructureBuildSizesDescriptor { @@ -509,6 +502,89 @@ impl Example { ) }; + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + allow_compaction: false, + }) + } + .unwrap(); + + let instances = [ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + ]; + + let instances_buffer_size = instances.len() * size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: wgpu_types::BufferUses::MAP_WRITE + | wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer); + assert!(mapping.is_coherent); + + instances_buffer + }; + + let tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: &instances_buffer, + count: 3, + offset: 0, + }); + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; @@ -521,16 +597,6 @@ impl Example { ) }; - let blas = unsafe { - device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { - label: Some("blas"), - size: blas_sizes.acceleration_structure_size, - format: hal::AccelerationStructureFormat::BottomLevel, - allow_compaction: false, - }) - } - .unwrap(); - let tlas = unsafe { device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { label: Some("tlas"), @@ -653,80 +719,6 @@ impl Example { .unwrap() }; - let instances = [ - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: 0.0, - y: 0.0, - z: 0.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: -1.0, - y: -1.0, - z: -2.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - AccelerationStructureInstance::new( - &Affine3A::from_translation(Vec3 { - x: 1.0, - y: -1.0, - z: -2.0, - }), - 0, - 0xff, - 0, - 0, - unsafe { device.get_acceleration_structure_device_address(&blas) }, - ), - ]; - - let instances_buffer_size = instances.len() * size_of::(); - - let instances_buffer = unsafe { - let instances_buffer = device - .create_buffer(&hal::BufferDescriptor { - label: Some("instances_buffer"), - size: instances_buffer_size as u64, - usage: wgpu_types::BufferUses::MAP_WRITE - | wgpu_types::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, - memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, - }) - .unwrap(); - - let mapping = device - .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) - .unwrap(); - ptr::copy_nonoverlapping( - instances.as_ptr() as *const u8, - mapping.ptr.as_ptr(), - instances_buffer_size, - ); - device.unmap_buffer(&instances_buffer); - assert!(mapping.is_coherent); - - instances_buffer - }; - - if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { - i.buffer = Some(&instances_buffer); - assert!( - instances.len() <= i.count as usize, - "Tlas allocation to small" - ); - } - let cmd_encoder_desc = hal::CommandEncoderDescriptor { label: None, queue: &queue, @@ -903,7 +895,7 @@ impl Example { ctx.encoder.begin_encoding(Some("frame")).unwrap(); let instances = hal::AccelerationStructureInstances { - buffer: Some(&self.instances_buffer), + buffer: &self.instances_buffer, count: self.instances.len() as u32, offset: 0, }; diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index f57e6b9238..d96d515901 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1476,13 +1476,8 @@ impl crate::CommandEncoder for super::CommandEncoder { let num_desc; match descriptor.entries { AccelerationStructureEntries::Instances(instances) => { - let desc_address = unsafe { - instances - .buffer - .expect("needs buffer to build") - .resource - .GetGPUVirtualAddress() - } + instances.offset as u64; + let desc_address = unsafe { instances.buffer.resource.GetGPUVirtualAddress() } + + instances.offset as u64; ty = Direct3D12::D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; inputs0 = Direct3D12::D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_0 { InstanceDescs: desc_address, @@ -1508,19 +1503,10 @@ impl crate::CommandEncoder for super::CommandEncoder { let index_count = triangle.indices.as_ref().map_or(0, |indices| indices.count); let index_address = triangle.indices.as_ref().map_or(0, |indices| unsafe { - indices - .buffer - .expect("needs buffer to build") - .resource - .GetGPUVirtualAddress() - + indices.offset as u64 + indices.buffer.resource.GetGPUVirtualAddress() + indices.offset as u64 }); let vertex_address = unsafe { - triangle - .vertex_buffer - .expect("needs buffer to build") - .resource - .GetGPUVirtualAddress() + triangle.vertex_buffer.resource.GetGPUVirtualAddress() + (triangle.first_vertex as u64 * triangle.vertex_stride) }; @@ -1555,10 +1541,7 @@ impl crate::CommandEncoder for super::CommandEncoder { geometry_desc = Vec::with_capacity(aabbs.len()); for aabb in aabbs { let aabb_address = unsafe { - aabb.buffer - .expect("needs buffer to build") - .resource - .GetGPUVirtualAddress() + aabb.buffer.resource.GetGPUVirtualAddress() + (aabb.offset as u64 * aabb.stride) }; diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 2c39b1cc59..9c1746443d 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -1134,7 +1134,9 @@ pub struct AccelerationStructure { allocation: suballocation::Allocation, } -impl crate::DynAccelerationStructure for AccelerationStructure {} +impl crate::DynAccelerationStructure for AccelerationStructure { + fn set_blases(&mut self, _blases: &[&dyn crate::DynAccelerationStructure]) {} +} impl SwapChain { unsafe fn release_resources(mut self) -> Dxgi::IDXGISwapChain3 { diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs index a8dbae94ee..c322cce28a 100644 --- a/wgpu-hal/src/dynamic/mod.rs +++ b/wgpu-hal/src/dynamic/mod.rs @@ -51,7 +51,7 @@ macro_rules! impl_dyn_resource { pub(crate) use impl_dyn_resource; /// Extension trait for `DynResource` used by implementations of various dynamic resource traits. -trait DynResourceExt { +pub(crate) trait DynResourceExt { /// # Panics /// /// - Panics if `self` is not downcastable to `T`. @@ -104,7 +104,9 @@ impl DynResourceExt for R { } } -pub trait DynAccelerationStructure: DynResource + fmt::Debug {} +pub trait DynAccelerationStructure: DynResource + fmt::Debug { + fn set_blases(&mut self, blases: &[&dyn DynAccelerationStructure]); +} pub trait DynBindGroup: DynResource + fmt::Debug {} pub trait DynBindGroupLayout: DynResource + fmt::Debug {} pub trait DynBuffer: DynResource + fmt::Debug {} @@ -159,7 +161,7 @@ impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> { match self { AccelerationStructureEntries::Instances(instances) => { AccelerationStructureEntries::Instances(AccelerationStructureInstances { - buffer: instances.buffer.map(|b| b.expect_downcast_ref()), + buffer: instances.buffer.expect_downcast_ref(), offset: instances.offset, count: instances.count, }) @@ -169,14 +171,14 @@ impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> { triangles .iter() .map(|t| AccelerationStructureTriangles { - vertex_buffer: t.vertex_buffer.map(|b| b.expect_downcast_ref()), + vertex_buffer: t.vertex_buffer.expect_downcast_ref(), vertex_format: t.vertex_format, first_vertex: t.first_vertex, vertex_count: t.vertex_count, vertex_stride: t.vertex_stride, indices: t.indices.as_ref().map(|i| { AccelerationStructureTriangleIndices { - buffer: i.buffer.map(|b| b.expect_downcast_ref()), + buffer: i.buffer.expect_downcast_ref(), format: i.format, offset: i.offset, count: i.count, @@ -197,7 +199,7 @@ impl<'a> AccelerationStructureEntries<'a, dyn DynBuffer> { entries .iter() .map(|e| AccelerationStructureAABBs { - buffer: e.buffer.map(|b| b.expect_downcast_ref()), + buffer: e.buffer.expect_downcast_ref(), offset: e.offset, count: e.count, stride: e.stride, diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index c09fc21d24..c493b57bd7 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -741,7 +741,9 @@ impl crate::DynQuerySet for QuerySet {} #[derive(Debug)] pub struct AccelerationStructure; -impl crate::DynAccelerationStructure for AccelerationStructure {} +impl crate::DynAccelerationStructure for AccelerationStructure { + fn set_blases(&mut self, _blases: &[&dyn crate::DynAccelerationStructure]) {} +} #[derive(Debug)] pub struct PipelineCache; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b469aa4f44..fa0c4650f4 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2489,7 +2489,7 @@ pub enum AccelerationStructureEntries<'a, B: DynBuffer + ?Sized> { /// * `transform` - optional transform #[derive(Clone, Debug)] pub struct AccelerationStructureTriangles<'a, B: DynBuffer + ?Sized> { - pub vertex_buffer: Option<&'a B>, + pub vertex_buffer: &'a B, pub vertex_format: wgt::VertexFormat, pub first_vertex: u32, pub vertex_count: u32, @@ -2502,7 +2502,7 @@ pub struct AccelerationStructureTriangles<'a, B: DynBuffer + ?Sized> { /// * `offset` - offset in bytes #[derive(Clone, Debug)] pub struct AccelerationStructureAABBs<'a, B: DynBuffer + ?Sized> { - pub buffer: Option<&'a B>, + pub buffer: &'a B, pub offset: u32, pub count: u32, pub stride: wgt::BufferAddress, @@ -2517,7 +2517,7 @@ pub struct AccelerationStructureCopy { /// * `offset` - offset in bytes #[derive(Clone, Debug)] pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> { - pub buffer: Option<&'a B>, + pub buffer: &'a B, pub offset: u32, pub count: u32, } @@ -2526,7 +2526,7 @@ pub struct AccelerationStructureInstances<'a, B: DynBuffer + ?Sized> { #[derive(Clone, Debug)] pub struct AccelerationStructureTriangleIndices<'a, B: DynBuffer + ?Sized> { pub format: wgt::IndexFormat, - pub buffer: Option<&'a B>, + pub buffer: &'a B, pub offset: u32, pub count: u32, } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index f41c650ca4..a19e694033 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -892,6 +892,12 @@ impl super::PrivateCapabilities { && (device.supports_family(MTLGPUFamily::Apple7) || device.supports_family(MTLGPUFamily::Mac2)), supports_shared_event: version.at_least((10, 14), (12, 0), os_is_mac), + supports_raytracing: if version.at_least((14, 0), (17, 0), os_is_mac) { + // The Rust metal crate does not expose device.supports_raytracing_from_render() yet + device.supports_raytracing() + } else { + false + }, } } @@ -993,6 +999,11 @@ impl super::PrivateCapabilities { features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); } + features.set( + F::EXPERIMENTAL_RAY_QUERY | F::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE, + self.supports_raytracing, + ); + features } @@ -1076,8 +1087,10 @@ impl super::PrivateCapabilities { // Metal Shading Language it generates, so from `wgpu_hal`'s // users' point of view, references are tightly checked. uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), - raw_tlas_instance_size: 0, - ray_tracing_scratch_buffer_alignment: 0, + raw_tlas_instance_size: size_of::< + metal::MTLIndirectAccelerationStructureInstanceDescriptor, + >(), + ray_tracing_scratch_buffer_alignment: 1, }, downlevel, } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 72a799a027..b896a2d7c1 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -6,8 +6,8 @@ use alloc::{ }; use core::ops::Range; use metal::{ - MTLIndexType, MTLLoadAction, MTLPrimitiveType, MTLScissorRect, MTLSize, MTLStoreAction, - MTLViewport, MTLVisibilityResultMode, NSRange, + MTLLoadAction, MTLPrimitiveType, MTLScissorRect, MTLSize, MTLStoreAction, MTLViewport, + MTLVisibilityResultMode, NSRange, }; // has to match `Temp::binding_sizes` @@ -17,6 +17,7 @@ impl Default for super::CommandState { fn default() -> Self { Self { blit: None, + acceleration_structure_builder: None, render: None, compute: None, raw_primitive_type: MTLPrimitiveType::Point, @@ -35,6 +36,7 @@ impl Default for super::CommandState { impl super::CommandEncoder { fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { if self.state.blit.is_none() { + self.leave_acceleration_structure_builder(); debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); @@ -125,11 +127,41 @@ impl super::CommandEncoder { } } + fn enter_acceleration_structure_builder( + &mut self, + ) -> &metal::AccelerationStructureCommandEncoderRef { + if self.state.acceleration_structure_builder.is_none() { + self.leave_blit(); + debug_assert!( + self.state.render.is_none() + && self.state.compute.is_none() + && self.state.blit.is_none() + ); + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + objc::rc::autoreleasepool(|| { + self.state.acceleration_structure_builder = Some( + cmd_buf + .new_acceleration_structure_command_encoder() + .to_owned(), + ); + }); + } + self.state.acceleration_structure_builder.as_ref().unwrap() + } + + pub(super) fn leave_acceleration_structure_builder(&mut self) { + if let Some(encoder) = self.state.acceleration_structure_builder.take() { + encoder.end_encoding(); + } + } + fn active_encoder(&mut self) -> Option<&metal::CommandEncoderRef> { if let Some(ref encoder) = self.state.render { Some(encoder) } else if let Some(ref encoder) = self.state.compute { Some(encoder) + } else if let Some(ref encoder) = self.state.acceleration_structure_builder { + Some(encoder) } else if let Some(ref encoder) = self.state.blit { Some(encoder) } else { @@ -140,6 +172,7 @@ impl super::CommandEncoder { fn begin_pass(&mut self) { self.state.reset(); self.leave_blit(); + self.leave_acceleration_structure_builder(); } } @@ -212,6 +245,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn discard_encoding(&mut self) { self.leave_blit(); + self.leave_acceleration_structure_builder(); // when discarding, we don't have a guarantee that // everything is in a good state, so check carefully if let Some(encoder) = self.state.render.take() { @@ -231,6 +265,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.leave_blit(); + self.leave_acceleration_structure_builder(); debug_assert!(self.state.render.is_none()); debug_assert!(self.state.compute.is_none()); debug_assert!(self.state.pending_timer_queries.is_empty()); @@ -402,11 +437,19 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_acceleration_structure_to_acceleration_structure( &mut self, - _src: &super::AccelerationStructure, - _dst: &super::AccelerationStructure, - _copy: wgt::AccelerationStructureCopy, + src: &super::AccelerationStructure, + dst: &super::AccelerationStructure, + copy: wgt::AccelerationStructureCopy, ) { - unimplemented!() + let command_encoder = self.enter_acceleration_structure_builder(); + match copy { + wgt::AccelerationStructureCopy::Clone => { + command_encoder.copy_acceleration_structure(&src.raw, &dst.raw); + } + wgt::AccelerationStructureCopy::Compact => { + command_encoder.copy_and_compact_acceleration_structure(&src.raw, &dst.raw); + } + }; } unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { @@ -677,23 +720,32 @@ impl crate::CommandEncoder for super::CommandEncoder { if let Some(ref encoder) = self.state.render { let mut changes_sizes_buffer = false; for index in 0..group.counters.vs.buffers { - let buf = &group.buffers[index as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_vertex_buffer( - (bg_info.base_resource_indices.vs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + match &group.buffers[index as usize] { + super::BufferResource::Buffer(binding) => { + let mut offset = binding.offset; + if let Some(dyn_index) = binding.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_vertex_buffer( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(binding.ptr.as_native()), + offset, + ); + if let Some(size) = binding.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: binding.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + super::BufferResource::AccelerationStructure(ptr) => { + encoder.set_vertex_acceleration_structure( + (bg_info.base_resource_indices.vs.buffers + index) as u64, + Some(ptr.as_native()), + ); + } } } if changes_sizes_buffer { @@ -711,23 +763,32 @@ impl crate::CommandEncoder for super::CommandEncoder { changes_sizes_buffer = false; for index in 0..group.counters.fs.buffers { - let buf = &group.buffers[(group.counters.vs.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_fragment_buffer( - (bg_info.base_resource_indices.fs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + match &group.buffers[(group.counters.vs.buffers + index) as usize] { + super::BufferResource::Buffer(binding) => { + let mut offset = binding.offset; + if let Some(dyn_index) = binding.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_fragment_buffer( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(binding.ptr.as_native()), + offset, + ); + if let Some(size) = binding.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: binding.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + super::BufferResource::AccelerationStructure(ptr) => { + encoder.set_fragment_acceleration_structure( + (bg_info.base_resource_indices.fs.buffers + index) as u64, + Some(ptr.as_native()), + ); + } } } if changes_sizes_buffer { @@ -789,22 +850,32 @@ impl crate::CommandEncoder for super::CommandEncoder { let mut changes_sizes_buffer = false; for index in 0..group.counters.cs.buffers { let buf = &group.buffers[(index_base.buffers + index) as usize]; - let mut offset = buf.offset; - if let Some(dyn_index) = buf.dynamic_index { - offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; - } - encoder.set_buffer( - (bg_info.base_resource_indices.cs.buffers + index) as u64, - Some(buf.ptr.as_native()), - offset, - ); - if let Some(size) = buf.binding_size { - let br = naga::ResourceBinding { - group: group_index, - binding: buf.binding_location, - }; - self.state.storage_buffer_length_map.insert(br, size); - changes_sizes_buffer = true; + match buf { + super::BufferResource::Buffer(binding) => { + let mut offset = binding.offset; + if let Some(dyn_index) = binding.dynamic_index { + offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; + } + encoder.set_buffer( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(binding.ptr.as_native()), + offset, + ); + if let Some(size) = binding.binding_size { + let br = naga::ResourceBinding { + group: group_index, + binding: binding.binding_location, + }; + self.state.storage_buffer_length_map.insert(br, size); + changes_sizes_buffer = true; + } + } + super::BufferResource::AccelerationStructure(ptr) => { + encoder.set_acceleration_structure( + (bg_info.base_resource_indices.cs.buffers + index) as u64, + Some(ptr.as_native()), + ); + } } } if changes_sizes_buffer { @@ -956,10 +1027,7 @@ impl crate::CommandEncoder for super::CommandEncoder { binding: crate::BufferBinding<'a, super::Buffer>, format: wgt::IndexFormat, ) { - let (stride, raw_type) = match format { - wgt::IndexFormat::Uint16 => (2, MTLIndexType::UInt16), - wgt::IndexFormat::Uint32 => (4, MTLIndexType::UInt32), - }; + let (stride, raw_type) = conv::map_index_format(format); self.state.index = Some(super::IndexState { buffer_ptr: AsNative::from(binding.buffer.raw.as_ref()), offset: binding.offset, @@ -1322,7 +1390,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn build_acceleration_structures<'a, T>( &mut self, _descriptor_count: u32, - _descriptors: T, + descriptors: T, ) where super::Api: 'a, T: IntoIterator< @@ -1333,22 +1401,60 @@ impl crate::CommandEncoder for super::CommandEncoder { >, >, { - unimplemented!() + let command_encoder = self.enter_acceleration_structure_builder(); + for descriptor in descriptors { + let acceleration_structure_descriptor = + conv::map_acceleration_structure_descriptor(descriptor.entries); + /* The Rust metal crate does not expose metal::MTLAccelerationStructureUsage yet + let mut usage = metal::MTLAccelerationStructureUsage::None; + if descriptor.flags.contains(wgt::AccelerationStructureFlags::ALLOW_UPDATE) { + usage |= metal::MTLAccelerationStructureUsage::Refit; + } + if descriptor.flags.contains(wgt::AccelerationStructureFlags::PREFER_FAST_BUILD) { + usage |= metal::MTLAccelerationStructureUsage::PreferFastBuild; + } + acceleration_structure_descriptor.set_usage(usage); + */ + match descriptor.mode { + crate::AccelerationStructureBuildMode::Build => { + command_encoder.build_acceleration_structure( + &descriptor.destination_acceleration_structure.raw, + &acceleration_structure_descriptor, + &descriptor.scratch_buffer.raw, + descriptor.scratch_buffer_offset, + ); + } + crate::AccelerationStructureBuildMode::Update => { + command_encoder.refit_acceleration_structure( + &descriptor.source_acceleration_structure.unwrap().raw, + &acceleration_structure_descriptor, + Some(&descriptor.destination_acceleration_structure.raw), + &descriptor.scratch_buffer.raw, + descriptor.scratch_buffer_offset, + ); + } + } + } } unsafe fn place_acceleration_structure_barrier( &mut self, _barriers: crate::AccelerationStructureBarrier, ) { - unimplemented!() } unsafe fn read_acceleration_structure_compact_size( &mut self, - _acceleration_structure: &super::AccelerationStructure, - _buf: &super::Buffer, + acceleration_structure: &super::AccelerationStructure, + buffer: &super::Buffer, ) { - unimplemented!() + let command_encoder = self.enter_acceleration_structure_builder(); + command_encoder.write_compacted_acceleration_structure_size_with_type( + &acceleration_structure.raw, + &buffer.raw, + 0, + metal::MTLDataType::ULong, + ); } } diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index 260b6c15a3..a17f18b7ea 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -1,9 +1,10 @@ use metal::{ - MTLBlendFactor, MTLBlendOperation, MTLBlitOption, MTLClearColor, MTLColorWriteMask, - MTLCompareFunction, MTLCullMode, MTLOrigin, MTLPrimitiveTopologyClass, MTLPrimitiveType, - MTLRenderStages, MTLResourceUsage, MTLSamplerAddressMode, MTLSamplerBorderColor, - MTLSamplerMinMagFilter, MTLSize, MTLStencilOperation, MTLStoreAction, MTLTextureType, - MTLTextureUsage, MTLVertexFormat, MTLVertexStepFunction, MTLWinding, NSRange, + MTLAttributeFormat, MTLBlendFactor, MTLBlendOperation, MTLBlitOption, MTLClearColor, + MTLColorWriteMask, MTLCompareFunction, MTLCullMode, MTLIndexType, MTLOrigin, + MTLPrimitiveTopologyClass, MTLPrimitiveType, MTLRenderStages, MTLResourceUsage, + MTLSamplerAddressMode, MTLSamplerBorderColor, MTLSamplerMinMagFilter, MTLSize, + MTLStencilOperation, MTLStoreAction, MTLTextureType, MTLTextureUsage, MTLVertexFormat, + MTLVertexStepFunction, MTLWinding, NSRange, }; pub fn map_texture_usage(format: wgt::TextureFormat, usage: wgt::TextureUses) -> MTLTextureUsage { @@ -234,6 +235,13 @@ pub fn map_vertex_format(format: wgt::VertexFormat) -> MTLVertexFormat { } } +pub fn map_index_format(format: wgt::IndexFormat) -> (u64, MTLIndexType) { + match format { + wgt::IndexFormat::Uint16 => (2, MTLIndexType::UInt16), + wgt::IndexFormat::Uint32 => (4, MTLIndexType::UInt32), + } +} + pub fn map_step_mode(mode: wgt::VertexStepMode) -> MTLVertexStepFunction { match mode { wgt::VertexStepMode::Vertex => MTLVertexStepFunction::PerVertex, @@ -353,3 +361,93 @@ pub fn map_resource_usage(ty: &wgt::BindingType) -> MTLResourceUsage { _ => unreachable!(), } } + +pub fn map_acceleration_structure_descriptor<'a>( + entries: &crate::AccelerationStructureEntries<'a, super::Buffer>, +) -> metal::AccelerationStructureDescriptor { + match entries { + crate::AccelerationStructureEntries::Instances(instances) => { + let descriptor = metal::InstanceAccelerationStructureDescriptor::descriptor(); + descriptor.set_instance_descriptor_type( + metal::MTLAccelerationStructureInstanceDescriptorType::Indirect, + ); + descriptor.set_instance_count(instances.count as u64); + descriptor.set_instance_descriptor_buffer(&instances.buffer.raw); + descriptor.set_instance_descriptor_buffer_offset(instances.offset as u64); + metal::AccelerationStructureDescriptor::from(descriptor) + } + crate::AccelerationStructureEntries::Triangles(entries) => { + let geometry_descriptors = entries + .iter() + .map(|triangles| { + let descriptor = + metal::AccelerationStructureTriangleGeometryDescriptor::descriptor(); + if let Some(indices) = triangles.indices.as_ref() { + descriptor.set_index_buffer(Some(&*indices.buffer.raw)); + descriptor.set_index_buffer_offset(indices.offset as u64); + descriptor.set_index_type(map_index_format(indices.format).1); + descriptor.set_triangle_count(indices.count as u64 / 3); + } else { + descriptor.set_triangle_count(triangles.vertex_count as u64 / 3); + } + descriptor.set_vertex_buffer(Some(&*triangles.vertex_buffer.raw)); + descriptor.set_vertex_buffer_offset( + triangles.first_vertex as u64 * triangles.vertex_stride, + ); + descriptor.set_vertex_stride(triangles.vertex_stride); + // Safety: MTLVertexFormat and MTLAttributeFormat are identical. + // https://docs.rs/metal/latest/metal/enum.MTLAttributeFormat.html + // https://docs.rs/metal/latest/metal/enum.MTLVertexFormat.html + descriptor.set_vertex_format(unsafe { + core::mem::transmute::( + map_vertex_format(triangles.vertex_format), + ) + }); + if let Some(transform) = triangles.transform.as_ref() { + descriptor.set_transformation_matrix_buffer(Some(&*transform.buffer.raw)); + descriptor.set_transformation_matrix_buffer_offset(transform.offset as u64); + } + descriptor.set_opaque( + triangles + .flags + .contains(wgt::AccelerationStructureGeometryFlags::OPAQUE), + ); + // wgt::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION + // descriptor.set_intersection_function_table_offset(offset); + metal::AccelerationStructureGeometryDescriptor::from(descriptor) + }) + .collect::>(); + let descriptor = metal::PrimitiveAccelerationStructureDescriptor::descriptor(); + descriptor.set_geometry_descriptors(metal::Array::from_owned_slice( + geometry_descriptors.as_slice(), + )); + metal::AccelerationStructureDescriptor::from(descriptor) + } + crate::AccelerationStructureEntries::AABBs(entries) => { + let geometry_descriptors = entries + .iter() + .map(|aabbs| { + let descriptor = + metal::AccelerationStructureBoundingBoxGeometryDescriptor::descriptor(); + descriptor.set_bounding_box_buffer(Some(&*aabbs.buffer.raw)); + descriptor.set_bounding_box_count(aabbs.count as u64); + descriptor.set_bounding_box_stride(aabbs.stride); + descriptor.set_bounding_box_buffer_offset(aabbs.offset as u64); + descriptor.set_opaque( + aabbs + .flags + .contains(wgt::AccelerationStructureGeometryFlags::OPAQUE), + ); + // wgt::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION + // descriptor.set_intersection_function_table_offset(offset); + metal::AccelerationStructureGeometryDescriptor::from(descriptor) + }) + .collect::>(); + let descriptor = metal::PrimitiveAccelerationStructureDescriptor::descriptor(); + descriptor.set_geometry_descriptors(metal::Array::from_owned_slice( + geometry_descriptors.as_slice(), + )); + metal::AccelerationStructureDescriptor::from(descriptor) + } + } +} diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 6ab22b0c3e..648b1d2a7c 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -2,6 +2,7 @@ use alloc::{borrow::ToOwned as _, sync::Arc, vec::Vec}; use core::{ptr::NonNull, sync::atomic}; use std::{thread, time}; +use bytemuck::TransparentWrapper; use parking_lot::Mutex; use super::{conv, PassthroughShader}; @@ -745,7 +746,10 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::Atomic => true, }; } - wgt::BindingType::AccelerationStructure { .. } => unimplemented!(), + wgt::BindingType::AccelerationStructure { .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + } } } @@ -839,6 +843,7 @@ impl crate::Device for super::Device { (entry, layout) }); for (entry, layout) in layout_and_entry_iter { + let stages = conv::map_render_stages(layout.visibility); // Bindless path if layout.count.is_some() { if !layout.visibility.contains(stage_bit) { @@ -846,8 +851,6 @@ impl crate::Device for super::Device { } let count = entry.count; - - let stages = conv::map_render_stages(layout.visibility); let uses = conv::map_resource_usage(&layout.ty); // Create argument buffer for this array @@ -895,18 +898,41 @@ impl crate::Device for super::Device { // need to be passed to useResource } } + wgt::BindingType::AccelerationStructure { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let acceleration_structures = + &desc.acceleration_structures[start..end]; + + for (idx, &acceleration_structure) in + acceleration_structures.iter().enumerate() + { + contents[idx] = acceleration_structure.raw.gpu_resource_id(); + + let use_info = bg + .resources_to_use + .entry(acceleration_structure.as_raw().cast()) + .or_default(); + use_info.stages |= stages; + use_info.uses |= uses; + use_info.visible_in_compute |= + layout.visibility.contains(wgt::ShaderStages::COMPUTE); + } + } _ => { unimplemented!(); } } - bg.buffers.push(super::BufferResource { - ptr: unsafe { NonNull::new_unchecked(buffer.as_ptr()) }, - offset: 0, - dynamic_index: None, - binding_size: None, - binding_location: layout.binding, - }); + bg.buffers.push(super::BufferResource::Buffer( + super::BufferResourceBinding { + ptr: unsafe { NonNull::new_unchecked(buffer.as_ptr()) }, + offset: 0, + dynamic_index: None, + binding_size: None, + binding_location: layout.binding, + }, + )); counter.buffers += 1; bg.argument_buffers.push(buffer) @@ -944,17 +970,19 @@ impl crate::Device for super::Device { } _ => None, }; - super::BufferResource { - ptr: source.buffer.as_raw(), - offset: source.offset, - dynamic_index: if has_dynamic_offset { - Some(dynamic_offsets_count - 1) - } else { - None + super::BufferResource::Buffer( + super::BufferResourceBinding { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, }, - binding_size, - binding_location: layout.binding, - } + ) })); counter.buffers += 1; } @@ -977,7 +1005,35 @@ impl crate::Device for super::Device { ); counter.textures += 1; } - wgt::BindingType::AccelerationStructure { .. } => unimplemented!(), + wgt::BindingType::AccelerationStructure { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.buffers.extend( + desc.acceleration_structures[start..end].iter().map( + |acceleration_structure| { + for blas in acceleration_structure.blases.iter() { + let use_info = bg + .resources_to_use + .entry( + ::from( + blas, + ), + ) + .or_default(); + use_info.stages |= stages; + use_info.uses |= metal::MTLResourceUsage::Read; + use_info.visible_in_compute |= layout + .visibility + .contains(wgt::ShaderStages::COMPUTE); + } + super::BufferResource::AccelerationStructure( + acceleration_structure.as_raw(), + ) + }, + ), + ); + counter.buffers += 1; + } } } } @@ -1574,34 +1630,92 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + descriptor: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { - unimplemented!() + let acceleration_structure_descriptor = + conv::map_acceleration_structure_descriptor(descriptor.entries); + /* The Rust metal crate does not expose metal::MTLAccelerationStructureUsage yet + let mut usage = metal::MTLAccelerationStructureUsage::None; + if descriptor.flags.contains(wgt::AccelerationStructureFlags::ALLOW_UPDATE) { + usage |= metal::MTLAccelerationStructureUsage::Refit; + } + if descriptor.flags.contains(wgt::AccelerationStructureFlags::PREFER_FAST_BUILD) { + usage |= metal::MTLAccelerationStructureUsage::PreferFastBuild; + } + acceleration_structure_descriptor.set_usage(usage); + */ + let device = self.shared.device.lock(); + let info = + device.acceleration_structure_sizes_with_descriptor(&acceleration_structure_descriptor); + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: info.acceleration_structure_size, + update_scratch_size: info.refit_scratch_buffer_size, + build_scratch_size: info.build_scratch_buffer_size, + } } unsafe fn get_acceleration_structure_device_address( &self, - _acceleration_structure: &super::AccelerationStructure, + acceleration_structure: &super::AccelerationStructure, ) -> wgt::BufferAddress { - unimplemented!() + acceleration_structure.raw.gpu_resource_id()._impl } unsafe fn create_acceleration_structure( &self, - _desc: &crate::AccelerationStructureDescriptor, + descriptor: &crate::AccelerationStructureDescriptor, ) -> Result { - unimplemented!() + // self.counters.acceleration_structures.add(1); + let device = self.shared.device.lock(); + objc::rc::autoreleasepool(|| { + Ok(super::AccelerationStructure { + raw: device.new_acceleration_structure_with_size(descriptor.size), + blases: Vec::new(), + }) + }) } unsafe fn destroy_acceleration_structure( &self, _acceleration_structure: super::AccelerationStructure, ) { - unimplemented!() + // self.counters.acceleration_structures.sub(1); } - fn tlas_instance_to_bytes(&self, _instance: TlasInstance) -> Vec { - unimplemented!() + fn tlas_instance_to_bytes(&self, instance: TlasInstance) -> Vec { + let temp = metal::MTLIndirectAccelerationStructureInstanceDescriptor { + transformation_matrix: [ + [ + instance.transform[0], + instance.transform[4], + instance.transform[8], + ], + [ + instance.transform[1], + instance.transform[5], + instance.transform[9], + ], + [ + instance.transform[2], + instance.transform[6], + instance.transform[10], + ], + [ + instance.transform[3], + instance.transform[7], + instance.transform[11], + ], + ], + options: metal::MTLAccelerationStructureInstanceOptions::None, + mask: instance.mask as u32, + intersection_function_table_offset: 0, + acceleration_structure_id: instance.blas_address, + user_id: instance.custom_data, + }; + + wgt::bytemuck_wrapper!(unsafe struct Desc(metal::MTLIndirectAccelerationStructureInstanceDescriptor)); + + bytemuck::bytes_of(&Desc::wrap(temp)).to_vec() } fn get_internal_counters(&self) -> wgt::HalCounters { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 813dbe71cd..277b298369 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -34,10 +34,11 @@ use arrayvec::ArrayVec; use bitflags::bitflags; use hashbrown::HashMap; use metal::{ - foreign_types::ForeignTypeRef as _, MTLArgumentBuffersTier, MTLBuffer, MTLCommandBufferStatus, - MTLCullMode, MTLDepthClipMode, MTLIndexType, MTLLanguageVersion, MTLPrimitiveType, - MTLReadWriteTextureTier, MTLRenderStages, MTLResource, MTLResourceUsage, MTLSamplerState, - MTLSize, MTLTexture, MTLTextureType, MTLTriangleFillMode, MTLWinding, + foreign_types::ForeignTypeRef as _, MTLAccelerationStructure, MTLArgumentBuffersTier, + MTLBuffer, MTLCommandBufferStatus, MTLCullMode, MTLDepthClipMode, MTLIndexType, + MTLLanguageVersion, MTLPrimitiveType, MTLReadWriteTextureTier, MTLRenderStages, MTLResource, + MTLResourceUsage, MTLSamplerState, MTLSize, MTLTexture, MTLTextureType, MTLTriangleFillMode, + MTLWinding, }; use naga::FastHashMap; use parking_lot::{Mutex, RwLock}; @@ -299,6 +300,7 @@ struct PrivateCapabilities { int64_atomics: bool, float_atomics: bool, supports_shared_event: bool, + supports_raytracing: bool, } #[derive(Clone, Debug)] @@ -668,7 +670,7 @@ pub struct PipelineLayout { impl crate::DynPipelineLayout for PipelineLayout {} -trait AsNative { +pub(crate) trait AsNative { type Native; fn from(native: &Self::Native) -> Self; fn as_native(&self) -> &Self::Native; @@ -678,6 +680,7 @@ type ResourcePtr = NonNull; type BufferPtr = NonNull; type TexturePtr = NonNull; type SamplerPtr = NonNull; +type AccelerationStructurePtr = NonNull; impl AsNative for ResourcePtr { type Native = metal::ResourceRef; @@ -727,8 +730,20 @@ impl AsNative for SamplerPtr { } } +impl AsNative for AccelerationStructurePtr { + type Native = metal::AccelerationStructureRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + #[derive(Debug)] -struct BufferResource { +struct BufferResourceBinding { ptr: BufferPtr, offset: wgt::BufferAddress, dynamic_index: Option, @@ -746,6 +761,12 @@ struct BufferResource { binding_location: u32, } +#[derive(Debug)] +enum BufferResource { + Buffer(BufferResourceBinding), + AccelerationStructure(AccelerationStructurePtr), +} + #[derive(Debug)] struct UseResourceInfo { uses: MTLResourceUsage, @@ -937,6 +958,7 @@ struct Temp { struct CommandState { blit: Option, + acceleration_structure_builder: Option, render: Option, compute: Option, raw_primitive_type: MTLPrimitiveType, @@ -1011,6 +1033,27 @@ pub struct PipelineCache; impl crate::DynPipelineCache for PipelineCache {} #[derive(Debug)] -pub struct AccelerationStructure; +pub struct AccelerationStructure { + raw: metal::AccelerationStructure, + blases: Vec, +} + +impl AccelerationStructure { + fn as_raw(&self) -> AccelerationStructurePtr { + unsafe { NonNull::new_unchecked(self.raw.as_ptr()) } + } +} -impl crate::DynAccelerationStructure for AccelerationStructure {} +impl crate::DynAccelerationStructure for AccelerationStructure { + fn set_blases(&mut self, blases: &[&dyn crate::DynAccelerationStructure]) { + use crate::dynamic::DynResourceExt; + self.blases = blases + .iter() + .map(|blas| { + blas.expect_downcast_ref::() + .raw + .clone() + }) + .collect::>(); + } +} diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index d0192cb516..77ab0f5876 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -60,7 +60,9 @@ impl crate::Api for Api { crate::impl_dyn_resource!(Buffer, CommandBuffer, Context, Fence, Resource); -impl crate::DynAccelerationStructure for Resource {} +impl crate::DynAccelerationStructure for Resource { + fn set_blases(&mut self, _blases: &[&dyn crate::DynAccelerationStructure]) {} +} impl crate::DynBindGroup for Resource {} impl crate::DynBindGroupLayout for Resource {} impl crate::DynBuffer for Buffer {} diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index c31192b23b..f03b8eb051 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -571,7 +571,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // TODO: Code is so large that rustfmt refuses to treat this... :( ) .data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(instances.buffer), + device_address: get_device_address(Some(instances.buffer)), }); let geometry = vk::AccelerationStructureGeometryKHR::default() @@ -600,7 +600,9 @@ impl crate::CommandEncoder for super::CommandEncoder { // index buffer we need to have IndexType::NONE_KHR as our index type. .index_type(vk::IndexType::NONE_KHR) .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(triangles.vertex_buffer), + device_address: get_device_address(Some( + triangles.vertex_buffer, + )), }) .vertex_format(conv::map_vertex_format(triangles.vertex_format)) .max_vertex(triangles.vertex_count) @@ -611,7 +613,7 @@ impl crate::CommandEncoder for super::CommandEncoder { if let Some(ref indices) = triangles.indices { triangle_data = triangle_data .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(indices.buffer), + device_address: get_device_address(Some(indices.buffer)), }) .index_type(conv::map_index_format(indices.format)); @@ -666,7 +668,7 @@ impl crate::CommandEncoder for super::CommandEncoder { for aabb in in_geometries { let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::default() .data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(aabb.buffer), + device_address: get_device_address(Some(aabb.buffer)), }) .stride(aabb.stride); diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index b492f33987..f8c4a2de17 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -773,7 +773,9 @@ pub struct AccelerationStructure { compacted_size_query: Option, } -impl crate::DynAccelerationStructure for AccelerationStructure {} +impl crate::DynAccelerationStructure for AccelerationStructure { + fn set_blases(&mut self, _blases: &[&dyn crate::DynAccelerationStructure]) {} +} #[derive(Debug)] pub struct Texture {