Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added o-voxel/notebooks/test_shape.glb
Binary file not shown.
1,313 changes: 1,313 additions & 0 deletions o-voxel/notebooks/verify_decoded_shape.ipynb

Large diffs are not rendered by default.

708 changes: 708 additions & 0 deletions o-voxel/notebooks/verify_fdg_jit_cpu_gpu.ipynb

Large diffs are not rendered by default.

815 changes: 815 additions & 0 deletions o-voxel/notebooks/verify_fdg_stage_profile_jit_cpu_gpu.ipynb

Large diffs are not rendered by default.

502 changes: 502 additions & 0 deletions o-voxel/notebooks/verify_voxelize_edge_jit_oct_vs_dda.ipynb

Large diffs are not rendered by default.

470 changes: 470 additions & 0 deletions o-voxel/notebooks/verify_voxelize_mesh_gpu_jit_open3d.ipynb

Large diffs are not rendered by default.

322 changes: 304 additions & 18 deletions o-voxel/o_voxel/convert/flexible_dual_grid.py

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions o-voxel/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
# Convert functions
"src/convert/flexible_dual_grid.cpp",
"src/convert/volumetic_attr.cpp",
"src/convert/mesh_to_flexible_dual_grid_gpu/torch_bindings.cu",
"src/convert/mesh_to_flexible_dual_grid_gpu/flexible_dual_grid_gpu.cu",
"src/convert/mesh_to_flexible_dual_grid_gpu/intersection_qef.cu",
"src/convert/mesh_to_flexible_dual_grid_gpu/voxelize_mesh_oct.cu",
"src/convert/mesh_to_flexible_dual_grid_gpu/voxel_traverse_edge_dda.cu",
## Serialization functions
"src/serialize/api.cu",
"src/serialize/hilbert.cu",
Expand Down
129 changes: 129 additions & 0 deletions o-voxel/src/convert/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,135 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> mesh_to_flexible_dual_gr
);


/**
* Extract flexible dual grid from a triangle mesh with CUDA backend.
*/
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> mesh_to_flexible_dual_grid_gpu(
const torch::Tensor& vertices,
const torch::Tensor& faces,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
float face_weight,
float boundary_weight,
float regularization_weight,
int64_t intersect_chunk_triangles,
int boundary_chunk_steps
);


/**
* Intersection occupancy only (CUDA).
*/
torch::Tensor intersection_occ_gpu(
const torch::Tensor& triangles,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
int64_t chunk_triangles
);


/**
* Intersect and build QEF terms (CPU).
*/
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> intersect_qef_cpu(
const torch::Tensor& triangles,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range
);


/**
* Intersect and build QEF terms (CUDA).
*/
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> intersect_qef_gpu(
const torch::Tensor& triangles,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
int64_t chunk_triangles
);


/**
* Octree voxelization against mesh faces (CUDA).
*/
std::tuple<torch::Tensor, torch::Tensor> voxelize_mesh_oct_gpu(
const torch::Tensor& vertices,
const torch::Tensor& faces,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range
);


/**
* Octree voxelization against edges (CUDA).
*/
std::tuple<torch::Tensor, torch::Tensor> voxelize_edge_oct_gpu(
const torch::Tensor& vertices,
const torch::Tensor& edges,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range
);


/**
* Face QEF accumulation (CPU).
*/
torch::Tensor face_qef_cpu(
const torch::Tensor& triangles,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
const torch::Tensor& voxels
);


/**
* Face QEF accumulation (CUDA).
*/
torch::Tensor face_qef_gpu(
const torch::Tensor& triangles,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
const torch::Tensor& voxels
);


/**
* Edge traversal with DDA (CUDA).
*/
std::tuple<torch::Tensor, torch::Tensor> voxel_traverse_edge_dda_gpu(
const torch::Tensor& vertices,
const torch::Tensor& edges,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
int chunk_steps
);


/**
* Boundary QEF accumulation (CPU).
*/
torch::Tensor boundary_qef_cpu(
const torch::Tensor& boundaries,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
float boundary_weight,
const torch::Tensor& voxels
);


/**
* Boundary QEF accumulation (CUDA).
*/
torch::Tensor boundary_qef_gpu(
const torch::Tensor& boundaries,
const torch::Tensor& voxel_size,
const torch::Tensor& grid_range,
float boundary_weight,
const torch::Tensor& voxels,
int chunk_steps
);


/**
* Voxelizes a triangle mesh with PBR materials
*
Expand Down
7 changes: 5 additions & 2 deletions o-voxel/src/convert/flexible_dual_grid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,11 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> mesh_to_flexible_dual_gr
// Face QEF computation
if (face_weight > 0.0f) {
start = clock();
face_qef(e_voxel_size, e_grid_min, e_grid_max, triangles, hash_table, qefs);
std::vector<Eigen::Matrix4f> face_qefs(voxels.size(), Eigen::Matrix4f::Zero());
face_qef(e_voxel_size, e_grid_min, e_grid_max, triangles, hash_table, face_qefs);
for (size_t i = 0; i < qefs.size(); ++i) {
qefs[i] += face_weight * face_qefs[i];
}
end = clock();
if (timing) std::cout << "Face QEF computation took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl;
}
Expand Down Expand Up @@ -772,4 +776,3 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> mesh_to_flexible_dual_gr
torch::from_blob(intersected.data(), {int(intersected.size()), 3}, torch::kBool).clone()
);
}

152 changes: 152 additions & 0 deletions o-voxel/src/convert/mesh_to_flexible_dual_grid_gpu/fdg_gpu_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#pragma once

#include <cuda_runtime.h>

#include <cstdint>
#include <stdexcept>
#include <string>
#include <utility>

namespace fdg_gpu {

inline void throw_cuda_error(cudaError_t error, const char* context) {
if (error == cudaSuccess) return;
throw std::runtime_error(std::string(context) + ": " + cudaGetErrorString(error));
}

struct int2_ {
int x;
int y;
};

struct int3_ {
int x;
int y;
int z;

__host__ __device__ int& operator[](int i) { return (&x)[i]; }
__host__ __device__ int operator[](int i) const { return (&x)[i]; }
};

struct bool3_ {
bool x;
bool y;
bool z;

__host__ __device__ bool& operator[](int i) { return (&x)[i]; }
__host__ __device__ bool operator[](int i) const { return (&x)[i]; }
};

template <typename T>
class DeviceBuffer {
public:
DeviceBuffer() = default;
explicit DeviceBuffer(int64_t count) { allocate(count); }
~DeviceBuffer() { release(); }

DeviceBuffer(const DeviceBuffer&) = delete;
DeviceBuffer& operator=(const DeviceBuffer&) = delete;

DeviceBuffer(DeviceBuffer&& other) noexcept
: ptr_(other.ptr_), size_(other.size_), owns_(other.owns_) {
other.ptr_ = nullptr;
other.size_ = 0;
other.owns_ = true;
}

DeviceBuffer& operator=(DeviceBuffer&& other) noexcept {
if (this != &other) {
release();
ptr_ = other.ptr_;
size_ = other.size_;
owns_ = other.owns_;
other.ptr_ = nullptr;
other.size_ = 0;
other.owns_ = true;
}
return *this;
}

void allocate(int64_t count) {
if (count < 0) {
throw std::invalid_argument("DeviceBuffer::allocate count must be non-negative");
}
release();
size_ = count;
owns_ = true;
if (count == 0) return;
throw_cuda_error(cudaMalloc(reinterpret_cast<void**>(&ptr_), static_cast<size_t>(count) * sizeof(T)),
"cudaMalloc failed in DeviceBuffer::allocate");
}

void adopt(T* ptr, int64_t count) {
release();
ptr_ = ptr;
size_ = count;
owns_ = true;
}

void clear_async(cudaStream_t stream = nullptr) {
if (size_ == 0) return;
throw_cuda_error(cudaMemsetAsync(ptr_, 0, static_cast<size_t>(size_) * sizeof(T), stream),
"cudaMemsetAsync failed in DeviceBuffer::clear_async");
}

T* data() noexcept { return ptr_; }
const T* data() const noexcept { return ptr_; }
int64_t size() const noexcept { return size_; }
bool empty() const noexcept { return size_ == 0; }

T* release_ownership() noexcept {
T* out = ptr_;
ptr_ = nullptr;
size_ = 0;
owns_ = true;
return out;
}

private:
void release() noexcept {
if (ptr_ != nullptr && owns_) {
cudaFree(ptr_);
}
ptr_ = nullptr;
size_ = 0;
owns_ = true;
}

T* ptr_ = nullptr;
int64_t size_ = 0;
bool owns_ = true;
};

struct SymQEF10 {
float q00, q01, q02, q03;
float q11, q12, q13;
float q22, q23;
float q33;
};

struct PrimitivePairResult {
int64_t size = 0;
DeviceBuffer<int32_t> prim_id;
DeviceBuffer<int32_t> voxel_i;
DeviceBuffer<int32_t> voxel_j;
DeviceBuffer<int32_t> voxel_k;
};

__host__ __device__ __forceinline__ int ceil_div_i64(int64_t n, int block) {
return static_cast<int>((n + block - 1) / block);
}

__host__ __device__ __forceinline__ uint64_t pack_voxel_key(
int x, int y, int z, int3_ grid_min, int3_ grid_max) {
const uint64_t sx = static_cast<uint64_t>(grid_max.x - grid_min.x);
const uint64_t sy = static_cast<uint64_t>(grid_max.y - grid_min.y);
const uint64_t ux = static_cast<uint64_t>(x - grid_min.x);
const uint64_t uy = static_cast<uint64_t>(y - grid_min.y);
const uint64_t uz = static_cast<uint64_t>(z - grid_min.z);
return ux + sx * (uy + sy * uz);
}

} // namespace fdg_gpu
Loading