diff --git a/README.md b/README.md index d00cc87..4233d6d 100644 --- a/README.md +++ b/README.md @@ -197,10 +197,22 @@ prediction/your_model ├── ... ``` -3. Run `ray_metrics.py` to evaluate on the RayIoU: +3. Install `rayiou_metrics` package and use `evaluate_metrics` to evaluate on the RayIoU: ``` -python ray_metrics.py --pred-dir prediction/your_model +cd rayiou_metrics +python setup.py install +``` + +``` +# usage example: +from rayiou_metrics.evaluation import evaluate_metrics + +gt_dir_root = 'data/nuscenes' +data_type = 'occ3d' # or 'openocc_v2' +pred_dir = prediction/your_model + +metrics = evaluate_metrics(gt_dir_root, pred_dir, data_type) ``` ## Timing diff --git a/ray_metrics.py b/ray_metrics.py deleted file mode 100644 index 225f4f4..0000000 --- a/ray_metrics.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import glob -import mmcv -import argparse -import numpy as np -import torch -from torch.utils.data import DataLoader -from loaders.ray_metrics import main_rayiou -from loaders.ego_pose_dataset import EgoPoseDataset -from configs.r50_nuimg_704x256_8f import occ_class_names as occ3d_class_names -from configs.r50_nuimg_704x256_8f_openocc import occ_class_names as openocc_class_names - - -def main(args): - data_infos = mmcv.load(os.path.join(args.data_root, 'nuscenes_infos_val.pkl'))['infos'] - gt_filepaths = sorted(glob.glob(os.path.join(args.data_root, args.data_type, '*/*/*.npz'))) - - # retrieve scene_name - token2scene = {} - for gt_path in gt_filepaths: - token = gt_path.split('/')[-2] - scene_name = gt_path.split('/')[-3] - token2scene[token] = scene_name - - for i in range(len(data_infos)): - scene_name = token2scene[data_infos[i]['token']] - data_infos[i]['scene_name'] = scene_name - - lidar_origins = [] - occ_gts = [] - occ_preds = [] - - for idx, batch in enumerate(DataLoader(EgoPoseDataset(data_infos), num_workers=8)): - output_origin = batch[1] - info = data_infos[idx] - - occ_path = os.path.join(args.data_root, args.data_type, info['scene_name'], info['token'], 'labels.npz') - occ_gt = np.load(occ_path, allow_pickle=True)['semantics'] - occ_gt = np.reshape(occ_gt, [200, 200, 16]).astype(np.uint8) - - occ_path = os.path.join(args.pred_dir, info['token'] + '.npz') - occ_pred = np.load(occ_path, allow_pickle=True)['pred'] - occ_pred = np.reshape(occ_pred, [200, 200, 16]).astype(np.uint8) - - lidar_origins.append(output_origin) - occ_gts.append(occ_gt) - occ_preds.append(occ_pred) - - if args.data_type == 'occ3d': - occ_class_names = occ3d_class_names - elif args.data_type == 'openocc_v2': - occ_class_names = openocc_class_names - else: - raise ValueError - - print(main_rayiou(occ_preds, occ_gts, lidar_origins, occ_class_names=occ_class_names)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--data-root", type=str, default='data/nuscenes') - parser.add_argument("--pred-dir", type=str) - parser.add_argument("--data-type", type=str, choices=['occ3d', 'openocc_v2'], default='occ3d') - args = parser.parse_args() - - torch.random.manual_seed(0) - np.random.seed(0) - - main(args) diff --git a/rayiou_metrics/lib/dvr_cpu/dvr.cpp b/rayiou_metrics/lib/dvr_cpu/dvr.cpp new file mode 100644 index 0000000..9c3d722 --- /dev/null +++ b/rayiou_metrics/lib/dvr_cpu/dvr.cpp @@ -0,0 +1,71 @@ +// Acknowledgments: https://github.com/tarashakhurana/4d-occ-forecasting +// Modified by Haisong Liu + +#include +#include +#include + +/* + * CUDA forward declarations + */ + +std::vector render_forward_cpu(torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid, + std::string phase_name); + +std::vector +render_cpu(torch::Tensor sigma, torch::Tensor origin, torch::Tensor points, + torch::Tensor tindex, std::string loss_name); + +torch::Tensor init_cpu(torch::Tensor points, torch::Tensor tindex, + const std::vector grid); + +/* + * C++ interface + */ + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +std::vector +render_forward(torch::Tensor sigma, torch::Tensor origin, torch::Tensor points, + torch::Tensor tindex, const std::vector grid, + std::string phase_name) { + CHECK_CONTIGUOUS(sigma); + CHECK_CONTIGUOUS(origin); + CHECK_CONTIGUOUS(points); + CHECK_CONTIGUOUS(tindex); + return render_forward_cpu(sigma, origin, points, tindex, grid, phase_name); +} + + +std::vector render(torch::Tensor sigma, torch::Tensor origin, + torch::Tensor points, torch::Tensor tindex, + std::string loss_name) { + CHECK_CONTIGUOUS(sigma); + CHECK_CONTIGUOUS(origin); + CHECK_CONTIGUOUS(points); + CHECK_CONTIGUOUS(tindex); + return render_cpu(sigma, origin, points, tindex, loss_name); +} + +torch::Tensor init(torch::Tensor points, torch::Tensor tindex, + const std::vector grid) { + CHECK_CONTIGUOUS(points); + CHECK_CONTIGUOUS(tindex); + return init_cpu(points, tindex, grid); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("init", &init, "Initialize"); + m.def("render", &render, "Render"); + m.def("render_forward", &render_forward, "Render (forward pass only)"); +} diff --git a/rayiou_metrics/lib/dvr_cpu/dvr_render_cpu.cpp b/rayiou_metrics/lib/dvr_cpu/dvr_render_cpu.cpp new file mode 100644 index 0000000..07ace22 --- /dev/null +++ b/rayiou_metrics/lib/dvr_cpu/dvr_render_cpu.cpp @@ -0,0 +1,650 @@ + + +// Acknowledgments: https://github.com/tarashakhurana/4d-occ-forecasting +// Modified by Haisong Liu + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_D 1446 // 700 + 700 + 45 + 1 +#define MAX_STEP 1000 + + +enum LossType {L1, L2, ABSREL}; +enum PhaseName {TEST, TRAIN}; + +struct int3{ + int x, y, z; + int3(int _x, int _y, int _z) : x(_x), y(_y), z(_z) {} + int3() {} +}; + +/* + * input shape + * sigma : N x T x H x L x W + * origin : N x T x 3 + * points : N x M x 4 + * output shape + * dist : N x M + */ +std::vector render_forward_cpu( + torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid, + std::string phase_name) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + + const auto T = grid[0]; + const auto H = grid[1]; + const auto L = grid[2]; + const auto W = grid[3]; + + const auto device = sigma.device(); + + // + // const auto dtype = points.dtype(); + // const auto options = torch::TensorOptions().dtype(dtype).device(device).requires_grad(false); + // auto pog = torch::zeros({N, T, H, L, W}, options); + + // perform rendering + auto gt_dist = -torch::ones({N, M}, device); + auto pred_dist = -torch::ones({N, M}, device); + + auto coord_index = torch::zeros({N, M, 3}, device); + + PhaseName train_phase; + if (phase_name.compare("test") == 0) { + train_phase = TEST; + } else if (phase_name.compare("train") == 0){ + train_phase = TRAIN; + } else { + std::cout << "UNKNOWN PHASE NAME: " << phase_name << std::endl; + exit(1); + } + + for (int n = 0; n < N; n++) { // batch index + for (int m = 0; m < M; m++) { // ray index + // ray end point + const int t = tindex[n][m].item(); // cancel auto to change dtype into int + + // invalid points + // assert(t < T); + assert(T == 1 || t < T); + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // if t < 0, it is a padded point + if (t < 0) continue; + + // grid shape + const int vzsize = sigma.size(2); + const int vysize = sigma.size(3); + const int vxsize = sigma.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // origin + const double xo = origin[n][t][0].item(); + const double yo = origin[n][t][1].item(); + const double zo = origin[n][t][2].item(); + + // end point + const double xe = points[n][m][0].item(); + const double ye = points[n][m][1].item(); + const double ze = points[n][m][2].item(); + + // locate the voxel where the origin resides + const int vxo = int(xo); + const int vyo = int(yo); + const int vzo = int(zo); + + const int vxe = int(xe); + const int vye = int(ye); + const int vze = int(ze); + + // NOTE: new + int vx = vxo; + int vy = vyo; + int vz = vzo; + + // origin to end + const double rx = xe - xo; + const double ry = ye - yo; + const double rz = ze - zo; + double gt_d = sqrt(rx * rx + ry * ry + rz * rz); + + // directional vector + const double dx = rx / gt_d; + const double dy = ry / gt_d; + const double dz = rz / gt_d; + + // In which direction the voxel ids are incremented. + const int stepX = (dx >= 0) ? 1 : -1; + const int stepY = (dy >= 0) ? 1 : -1; + const int stepZ = (dz >= 0) ? 1 : -1; + + // Distance along the ray to the next voxel border from the current position (tMaxX, tMaxY, tMaxZ). + const double next_voxel_boundary_x = vx + (stepX < 0 ? 0 : 1); + const double next_voxel_boundary_y = vy + (stepY < 0 ? 0 : 1); + const double next_voxel_boundary_z = vz + (stepZ < 0 ? 0 : 1); + + // tMaxX, tMaxY, tMaxZ -- distance until next intersection with voxel-border + // the value of t at which the ray crosses the first vertical voxel boundary + double tMaxX = (dx!=0) ? (next_voxel_boundary_x - xo)/dx : DBL_MAX; // + double tMaxY = (dy!=0) ? (next_voxel_boundary_y - yo)/dy : DBL_MAX; // + double tMaxZ = (dz!=0) ? (next_voxel_boundary_z - zo)/dz : DBL_MAX; // + + // tDeltaX, tDeltaY, tDeltaZ -- + // how far along the ray we must move for the horizontal component to equal the width of a voxel + // the direction in which we traverse the grid + // can only be FLT_MAX if we never go in that direction + const double tDeltaX = (dx!=0) ? stepX/dx : DBL_MAX; + const double tDeltaY = (dy!=0) ? stepY/dy : DBL_MAX; + const double tDeltaZ = (dz!=0) ? stepZ/dz : DBL_MAX; + + + int3 path[MAX_D]; + double csd[MAX_D]; // cumulative sum of sigma times delta + double p[MAX_D]; // alpha + double d[MAX_D]; + + // forward raymarching with voxel traversal + int step = 0; // total number of voxels traversed + int count = 0; // number of voxels traversed inside the voxel grid + double last_d = 0.0; // correct initialization + + // voxel traversal raycasting + bool was_inside = false; + while (true) { + bool inside = (0 <= vx && vx < vxsize) && + (0 <= vy && vy < vysize) && + (0 <= vz && vz < vzsize); + if (inside) { + was_inside = true; + path[count] = int3(vx, vy, vz); + } else if (was_inside) { // was but no longer inside + // we know we are not coming back so terminate + break; + } /*else if (last_d > gt_d) { + break; + } */ + /*else { // has not gone inside yet + // assert(count == 0); + // (1) when we have hit the destination but haven't gone inside the voxel grid + // (2) when we have traveled MAX_D voxels but haven't found one valid voxel + // handle intersection corner cases in case of infinite loop + bool hit = (vx == vxe && vy == vye && vz == vze); // this test seems brittle with corner cases + if (hit || step >= MAX_D) + break; + //if (last_d >= gt_d || step >= MAX_D) break; + } */ + // _d represents the ray distance has traveled before escaping the current voxel cell + double _d = 0.0; + // voxel traversal + if (tMaxX < tMaxY) { + if (tMaxX < tMaxZ) { + _d = tMaxX; + vx += stepX; + tMaxX += tDeltaX; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } else { + if (tMaxY < tMaxZ) { + _d = tMaxY; + vy += stepY; + tMaxY += tDeltaY; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } + if (inside) { + // get sigma at the current voxel + const int3 &v = path[count]; // use the recorded index + const double _sigma = sigma[n][ts][v.z][v.y][v.x].item(); + const double _delta = std::max(0.0, _d - last_d); // THIS TURNS OUT IMPORTANT + const double sd = _sigma * _delta; + if (count == 0) { // the first voxel inside + csd[count] = sd; + p[count] = 1 - exp(-sd); + } else { + csd[count] = csd[count-1] + sd; + p[count] = exp(-csd[count-1]) - exp(-csd[count]); + } + // record the traveled distance + d[count] = _d; + // count the number of voxels we have escaped + count ++; + } + last_d = _d; + step ++; + + if (step > MAX_STEP) { + break; + } + } + + // the total number of voxels visited should not exceed this number + assert(count <= MAX_D); + + if (count > 0) { + // compute the expected ray distance + //double exp_d = 0.0; + double exp_d = d[count-1]; + + const int3 &v_init = path[count-1]; + int x = v_init.x; + int y = v_init.y; + int z = v_init.z; + + for (int i = 0; i < count; i++) { + //printf("%f\t%f\n",p[i], d[i]); + //exp_d += p[i] * d[i]; + const int3 &v = path[i]; + const double occ = sigma[n][ts][v.z][v.y][v.x].item(); + if (occ > 0.5) { + exp_d = d[i]; + + x = v.x; + y = v.y; + z = v.z; + + break; + } + + } + //printf("%f\n",exp_d); + + // add an imaginary sample at the end point should gt_d exceeds max_d + double p_out = exp(-csd[count-1]); + double max_d = d[count-1]; + + // if (gt_d > max_d) + // exp_d += (p_out * gt_d); + + // p_out is the probability the ray escapes the voxel grid + //exp_d += (p_out * max_d); + if (train_phase == 1) { + gt_d = std::min(gt_d, max_d); + } + + // write the rendered ray distance (max_d) + pred_dist[n][m] = exp_d; + gt_dist[n][m] = gt_d; + + coord_index[n][m][0] = double(x); + coord_index[n][m][1] = double(y); + coord_index[n][m][2] = double(z); + + // // write occupancy + // for (int i = 0; i < count; i ++) { + // const int3 &v = path[i]; + // auto & occ = pog[n][t][v.z][v.y][v.x]; + // if (p[i] >= occ) { + // occ = p[i]; + // } + // } + } + } + } + + // return {pog, pred_dist, gt_dist}; + return {pred_dist, gt_dist, coord_index}; +} + +/* + * input shape + * sigma : N x T x H x L x W + * origin : N x T x 3 + * points : N x M x 4 + * output shape + * dist : N x M + * loss : N x M + * grad_sigma : N x T x H x L x W + */ +std::vector render_cpu( + torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + std::string loss_name) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + const auto T = sigma.size(1); // num of time steps + + const auto device = sigma.device(); + + // perform rendering + auto gt_dist = -torch::ones({N, M}, device); + auto pred_dist = -torch::ones({N, M}, device); + auto grad_sigma = torch::zeros_like(sigma); + // auto grad_sigma_count = torch::zeros_like(sigma); + + LossType loss_type; + if (loss_name.compare("l1") == 0) { + loss_type = L1; + } else if (loss_name.compare("l2") == 0) { + loss_type = L2; + } else if (loss_name.compare("absrel") == 0) { + loss_type = ABSREL; + } else if (loss_name.compare("bce") == 0){ + loss_type = L1; + } else { + std::cout << "UNKNOWN LOSS TYPE: " << loss_name << std::endl; + exit(1); + } + + for (int n = 0; n < N; n++) { // batch index + for (int m = 0; m < M; m++) { // ray index + const int t = tindex[n][m].item(); // cancel auto to change dtype into int + + assert(T == 1 || t < T); + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // if t < 0, it is a padded point + if (t < 0) continue; + + // grid shape + const int vzsize = sigma.size(2); + const int vysize = sigma.size(3); + const int vxsize = sigma.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // origin + const double xo = origin[n][t][0].item(); + const double yo = origin[n][t][1].item(); + const double zo = origin[n][t][2].item(); + + // end point + const double xe = points[n][m][0].item(); + const double ye = points[n][m][1].item(); + const double ze = points[n][m][2].item(); + + // locate the voxel where the origin resides + const int vxo = int(xo); + const int vyo = int(yo); + const int vzo = int(zo); + + // + const int vxe = int(xe); + const int vye = int(ye); + const int vze = int(ze); + + // NOTE: new + int vx = vxo; + int vy = vyo; + int vz = vzo; + + // origin to end + const double rx = xe - xo; + const double ry = ye - yo; + const double rz = ze - zo; + double gt_d = sqrt(rx * rx + ry * ry + rz * rz); + + // directional vector + const double dx = rx / gt_d; + const double dy = ry / gt_d; + const double dz = rz / gt_d; + + // In which direction the voxel ids are incremented. + const int stepX = (dx >= 0) ? 1 : -1; + const int stepY = (dy >= 0) ? 1 : -1; + const int stepZ = (dz >= 0) ? 1 : -1; + + // Distance along the ray to the next voxel border from the current position (tMaxX, tMaxY, tMaxZ). + const double next_voxel_boundary_x = vx + (stepX < 0 ? 0 : 1); + const double next_voxel_boundary_y = vy + (stepY < 0 ? 0 : 1); + const double next_voxel_boundary_z = vz + (stepZ < 0 ? 0 : 1); + + // tMaxX, tMaxY, tMaxZ -- distance until next intersection with voxel-border + // the value of t at which the ray crosses the first vertical voxel boundary + double tMaxX = (dx!=0) ? (next_voxel_boundary_x - xo)/dx : DBL_MAX; // + double tMaxY = (dy!=0) ? (next_voxel_boundary_y - yo)/dy : DBL_MAX; // + double tMaxZ = (dz!=0) ? (next_voxel_boundary_z - zo)/dz : DBL_MAX; // + + // tDeltaX, tDeltaY, tDeltaZ -- + // how far along the ray we must move for the horizontal component to equal the width of a voxel + // the direction in which we traverse the grid + // can only be FLT_MAX if we never go in that direction + const double tDeltaX = (dx!=0) ? stepX/dx : DBL_MAX; + const double tDeltaY = (dy!=0) ? stepY/dy : DBL_MAX; + const double tDeltaZ = (dz!=0) ? stepZ/dz : DBL_MAX; + + int3 path[MAX_D]; + double csd[MAX_D]; // cumulative sum of sigma times delta + double p[MAX_D]; // alpha + double d[MAX_D]; + double dt[MAX_D]; + + // forward raymarching with voxel traversal + int step = 0; // total number of voxels traversed + int count = 0; // number of voxels traversed inside the voxel grid + double last_d = 0.0; // correct initialization + + // voxel traversal raycasting + bool was_inside = false; + while (true) { + bool inside = (0 <= vx && vx < vxsize) && + (0 <= vy && vy < vysize) && + (0 <= vz && vz < vzsize); + if (inside) { // now inside + was_inside = true; + path[count] = int3(vx, vy, vz); + } else if (was_inside) { // was inside but no longer + // we know we are not coming back so terminate + break; + } else if (last_d > gt_d) { + break; + } /* else { // has not gone inside yet + // assert(count == 0); + // (1) when we have hit the destination but haven't gone inside the voxel grid + // (2) when we have traveled MAX_D voxels but haven't found one valid voxel + // handle intersection corner cases in case of infinite loop + // bool hit = (vx == vxe && vy == vye && vz == vze); + // if (hit || step >= MAX_D) + // break; + if (last_d >= gt_d || step >= MAX_D) break; + } */ + // _d represents the ray distance has traveled before escaping the current voxel cell + double _d = 0.0; + // voxel traversal + if (tMaxX < tMaxY) { + if (tMaxX < tMaxZ) { + _d = tMaxX; + vx += stepX; + tMaxX += tDeltaX; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } else { + if (tMaxY < tMaxZ) { + _d = tMaxY; + vy += stepY; + tMaxY += tDeltaY; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } + if (inside) { + // get sigma at the current voxel + const int3 &v = path[count]; // use the recorded index + const double _sigma = sigma[n][ts][v.z][v.y][v.x].item(); + const double _delta = std::max(0.0, _d - last_d); // THIS TURNS OUT IMPORTANT + const double sd = _sigma * _delta; + if (count == 0) { // the first voxel inside + csd[count] = sd; + p[count] = 1 - exp(-sd); + } else { + csd[count] = csd[count-1] + sd; + p[count] = exp(-csd[count-1]) - exp(-csd[count]); + } + // record the traveled distance + d[count] = _d; + dt[count] = _delta; + // count the number of voxels we have escaped + count ++; + } + last_d = _d; + step ++; + + if (step > MAX_STEP) { + break; + } + } + + // the total number of voxels visited should not exceed this number + assert(count <= MAX_D); + + // WHEN THERE IS AN INTERSECTION BETWEEN THE RAY AND THE VOXEL GRID + if (count > 0) { + // compute the expected ray distance + double exp_d = 0.0; + for (int i = 0; i < count; i ++) + exp_d += p[i] * d[i]; + + // add an imaginary sample at the end point should gt_d exceeds max_d + double p_out = exp(-csd[count-1]); + double max_d = d[count-1]; + + exp_d += (p_out * max_d); + gt_d = std::min(gt_d, max_d); + + // write the rendered ray distance (max_d) + pred_dist[n][m] = exp_d; + gt_dist[n][m] = gt_d; + + /* backward raymarching */ + double dd_dsigma[MAX_D]; + for (int i = count - 1; i >= 0; i --) { + // NOTE: probably need to double check again + if (i == count - 1) + dd_dsigma[i] = p_out * max_d; + else + dd_dsigma[i] = dd_dsigma[i+1] - exp(-csd[i]) * (d[i+1] - d[i]); + } + + for (int i = count - 1; i >= 0; i --) + dd_dsigma[i] *= dt[i]; + + // option 2: cap at the boundary + for (int i = count - 1; i >= 0; i --) + dd_dsigma[i] -= dt[i] * p_out * max_d; + + double dl_dd = 1.0; + if (loss_type == L1) + dl_dd = (exp_d >= gt_d) ? 1 : -1; + else if (loss_type == L2) + dl_dd = (exp_d - gt_d); + else if (loss_type == ABSREL) + dl_dd = (exp_d >= gt_d) ? (1.0/gt_d) : -(1.0/gt_d); + + // apply chain rule + for (int i = 0; i < count; i ++) { + const int3 &v = path[i]; + // NOTE: potential race conditions when writing gradients + grad_sigma[n][ts][v.z][v.y][v.x] += dl_dd * dd_dsigma[i]; + // grad_sigma_count[n][ts][v.z][v.y][v.x] += 1; + } + } + } + } + + + // grad_sigma_count += (grad_sigma_count == 0); + // grad_sigma /= grad_sigma_count; + + return {pred_dist, gt_dist, grad_sigma}; +} + + +/* + * input shape + * origin : N x T x 3 + * points : N x M x 3 + * tindex : N x M + * output shape + * occupancy: N x T x H x L x W + */ +torch::Tensor init_cpu( + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + + const auto T = grid[0]; + const auto H = grid[1]; + const auto L = grid[2]; + const auto W = grid[3]; + + const auto dtype = points.dtype(); + const auto device = points.device(); + const auto options = torch::TensorOptions().dtype(dtype).device(device).requires_grad(false); + auto occupancy = torch::zeros({N, T, H, L, W}, options); + + // // initialize occupancy such that every voxel with one or more points is occupied + + for (int n = 0; n < N; n++) { // batch index + for (int m = 0; m < M; m++) { // ray index + // ray end point + const int t = tindex[n][m].item(); // cancel auto to change dtype into int + + // invalid points + assert(T == 1 || t < T); + + // if t < 0, it is a padded point + if (t < 0) continue; + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // grid shape + const int vzsize = occupancy.size(2); + const int vysize = occupancy.size(3); + const int vxsize = occupancy.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // end point + const int vx = int(points[n][m][0].item()); + const int vy = int(points[n][m][1].item()); + const int vz = int(points[n][m][2].item()); + + // + if (0 <= vx && vx < vxsize && + 0 <= vy && vy < vysize && + 0 <= vz && vz < vzsize) { + occupancy[n][ts][vz][vy][vx] = 1; + } + } + } + + return occupancy; +} + + + + \ No newline at end of file diff --git a/rayiou_metrics/lib/dvr_cuda/dvr_cuda.cpp b/rayiou_metrics/lib/dvr_cuda/dvr_cuda.cpp new file mode 100644 index 0000000..b2a7c09 --- /dev/null +++ b/rayiou_metrics/lib/dvr_cuda/dvr_cuda.cpp @@ -0,0 +1,72 @@ +// Acknowledgments: https://github.com/tarashakhurana/4d-occ-forecasting +// Modified by Haisong Liu + +#include +#include +#include + +/* + * CUDA forward declarations + */ + +std::vector render_forward_cuda(torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid, + std::string phase_name); + +std::vector +render_cuda(torch::Tensor sigma, torch::Tensor origin, torch::Tensor points, + torch::Tensor tindex, std::string loss_name); + +torch::Tensor init_cuda(torch::Tensor points, torch::Tensor tindex, + const std::vector grid); + + +/* + * C++ interface + */ + +#define CHECK_CUDA(x) \ + TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") +#define CHECK_CONTIGUOUS(x) \ + TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") +#define CHECK_INPUT(x) \ + CHECK_CUDA(x); \ + CHECK_CONTIGUOUS(x) + +std::vector +render_forward(torch::Tensor sigma, torch::Tensor origin, torch::Tensor points, + torch::Tensor tindex, const std::vector grid, + std::string phase_name) { + CHECK_INPUT(sigma); + CHECK_INPUT(origin); + CHECK_INPUT(points); + CHECK_INPUT(tindex); + return render_forward_cuda(sigma, origin, points, tindex, grid, phase_name); +} + + +std::vector render(torch::Tensor sigma, torch::Tensor origin, + torch::Tensor points, torch::Tensor tindex, + std::string loss_name) { + CHECK_INPUT(sigma); + CHECK_INPUT(origin); + CHECK_INPUT(points); + CHECK_INPUT(tindex); + return render_cuda(sigma, origin, points, tindex, loss_name); +} + +torch::Tensor init(torch::Tensor points, torch::Tensor tindex, + const std::vector grid) { + CHECK_INPUT(points); + CHECK_INPUT(tindex); + return init_cuda(points, tindex, grid); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("init", &init, "Initialize"); + m.def("render", &render, "Render"); + m.def("render_forward", &render_forward, "Render (forward pass only)"); +} diff --git a/rayiou_metrics/lib/dvr_cuda/dvr_render_cuda.cu b/rayiou_metrics/lib/dvr_cuda/dvr_render_cuda.cu new file mode 100644 index 0000000..8010eb8 --- /dev/null +++ b/rayiou_metrics/lib/dvr_cuda/dvr_render_cuda.cu @@ -0,0 +1,747 @@ +// Acknowledgments: https://github.com/tarashakhurana/4d-occ-forecasting +// Modified by Haisong Liu + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_D 1446 // 700 + 700 + 45 + 1 +#define MAX_STEP 1000 + +enum LossType {L1, L2, ABSREL}; +enum PhaseName {TEST, TRAIN}; + +template +__global__ void init_cuda_kernel( + const torch::PackedTensorAccessor32 points, + const torch::PackedTensorAccessor32 tindex, + torch::PackedTensorAccessor32 occupancy) { + + // batch index + const auto n = blockIdx.y; + + // ray index + const auto c = blockIdx.x * blockDim.x + threadIdx.x; + + // num of rays + const auto M = points.size(1); + const auto T = occupancy.size(1); + + // we allocated more threads than num_rays + if (c < M) { + // ray end point + const auto t = tindex[n][c]; + + // invalid points + assert(T == 1 || t < T); + + // if t < 0, it is a padded point + if (t < 0) return; + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // grid shape + const int vzsize = occupancy.size(2); + const int vysize = occupancy.size(3); + const int vxsize = occupancy.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // end point + const int vx = int(points[n][c][0]); + const int vy = int(points[n][c][1]); + const int vz = int(points[n][c][2]); + + // + if (0 <= vx && vx < vxsize && + 0 <= vy && vy < vysize && + 0 <= vz && vz < vzsize) { + occupancy[n][ts][vz][vy][vx] = 1; + } + } +} + +template +__global__ void render_forward_cuda_kernel( + const torch::PackedTensorAccessor32 sigma, + const torch::PackedTensorAccessor32 origin, + const torch::PackedTensorAccessor32 points, + const torch::PackedTensorAccessor32 tindex, + // torch::PackedTensorAccessor32 pog, + torch::PackedTensorAccessor32 pred_dist, + torch::PackedTensorAccessor32 gt_dist, + torch::PackedTensorAccessor32 coord_index, + PhaseName train_phase) { + + // batch index + const auto n = blockIdx.y; + + // ray index + const auto c = blockIdx.x * blockDim.x + threadIdx.x; + + // num of rays + const auto M = points.size(1); + const auto T = sigma.size(1); + + // we allocated more threads than num_rays + if (c < M) { + // ray end point + const auto t = tindex[n][c]; + + // invalid points + // assert(t < T); + assert(T == 1 || t < T); + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // if t < 0, it is a padded point + if (t < 0) return; + + // grid shape + const int vzsize = sigma.size(2); + const int vysize = sigma.size(3); + const int vxsize = sigma.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // origin + const double xo = origin[n][t][0]; + const double yo = origin[n][t][1]; + const double zo = origin[n][t][2]; + + // end point + const double xe = points[n][c][0]; + const double ye = points[n][c][1]; + const double ze = points[n][c][2]; + + // locate the voxel where the origin resides + const int vxo = int(xo); + const int vyo = int(yo); + const int vzo = int(zo); + + const int vxe = int(xe); + const int vye = int(ye); + const int vze = int(ze); + + // NOTE: new + int vx = vxo; + int vy = vyo; + int vz = vzo; + + // origin to end + const double rx = xe - xo; + const double ry = ye - yo; + const double rz = ze - zo; + double gt_d = sqrt(rx * rx + ry * ry + rz * rz); + + // directional vector + const double dx = rx / gt_d; + const double dy = ry / gt_d; + const double dz = rz / gt_d; + + // In which direction the voxel ids are incremented. + const int stepX = (dx >= 0) ? 1 : -1; + const int stepY = (dy >= 0) ? 1 : -1; + const int stepZ = (dz >= 0) ? 1 : -1; + + // Distance along the ray to the next voxel border from the current position (tMaxX, tMaxY, tMaxZ). + const double next_voxel_boundary_x = vx + (stepX < 0 ? 0 : 1); + const double next_voxel_boundary_y = vy + (stepY < 0 ? 0 : 1); + const double next_voxel_boundary_z = vz + (stepZ < 0 ? 0 : 1); + + // tMaxX, tMaxY, tMaxZ -- distance until next intersection with voxel-border + // the value of t at which the ray crosses the first vertical voxel boundary + double tMaxX = (dx!=0) ? (next_voxel_boundary_x - xo)/dx : DBL_MAX; // + double tMaxY = (dy!=0) ? (next_voxel_boundary_y - yo)/dy : DBL_MAX; // + double tMaxZ = (dz!=0) ? (next_voxel_boundary_z - zo)/dz : DBL_MAX; // + + // tDeltaX, tDeltaY, tDeltaZ -- + // how far along the ray we must move for the horizontal component to equal the width of a voxel + // the direction in which we traverse the grid + // can only be FLT_MAX if we never go in that direction + const double tDeltaX = (dx!=0) ? stepX/dx : DBL_MAX; + const double tDeltaY = (dy!=0) ? stepY/dy : DBL_MAX; + const double tDeltaZ = (dz!=0) ? stepZ/dz : DBL_MAX; + + int3 path[MAX_D]; + double csd[MAX_D]; // cumulative sum of sigma times delta + double p[MAX_D]; // alpha + double d[MAX_D]; + + // forward raymarching with voxel traversal + int step = 0; // total number of voxels traversed + int count = 0; // number of voxels traversed inside the voxel grid + double last_d = 0.0; // correct initialization + + // voxel traversal raycasting + bool was_inside = false; + while (true) { + bool inside = (0 <= vx && vx < vxsize) && + (0 <= vy && vy < vysize) && + (0 <= vz && vz < vzsize); + if (inside) { + was_inside = true; + path[count] = make_int3(vx, vy, vz); + } else if (was_inside) { // was but no longer inside + // we know we are not coming back so terminate + break; + } /*else if (last_d > gt_d) { + break; + } */ + /*else { // has not gone inside yet + // assert(count == 0); + // (1) when we have hit the destination but haven't gone inside the voxel grid + // (2) when we have traveled MAX_D voxels but haven't found one valid voxel + // handle intersection corner cases in case of infinite loop + bool hit = (vx == vxe && vy == vye && vz == vze); // this test seems brittle with corner cases + if (hit || step >= MAX_D) + break; + //if (last_d >= gt_d || step >= MAX_D) break; + } */ + // _d represents the ray distance has traveled before escaping the current voxel cell + double _d = 0.0; + // voxel traversal + if (tMaxX < tMaxY) { + if (tMaxX < tMaxZ) { + _d = tMaxX; + vx += stepX; + tMaxX += tDeltaX; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } else { + if (tMaxY < tMaxZ) { + _d = tMaxY; + vy += stepY; + tMaxY += tDeltaY; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } + if (inside) { + // get sigma at the current voxel + const int3 &v = path[count]; // use the recorded index + const double _sigma = sigma[n][ts][v.z][v.y][v.x]; + const double _delta = max(0.0, _d - last_d); // THIS TURNS OUT IMPORTANT + const double sd = _sigma * _delta; + if (count == 0) { // the first voxel inside + csd[count] = sd; + p[count] = 1 - exp(-sd); + } else { + csd[count] = csd[count-1] + sd; + p[count] = exp(-csd[count-1]) - exp(-csd[count]); + } + // record the traveled distance + d[count] = _d; + // count the number of voxels we have escaped + count ++; + } + last_d = _d; + step ++; + + if (step > MAX_STEP) { + break; + } + } + + // the total number of voxels visited should not exceed this number + assert(count <= MAX_D); + + if (count > 0) { + // compute the expected ray distance + //double exp_d = 0.0; + double exp_d = d[count-1]; + + const int3 &v_init = path[count-1]; + int x = v_init.x; + int y = v_init.y; + int z = v_init.z; + + for (int i = 0; i < count; i++) { + //printf("%f\t%f\n",p[i], d[i]); + //exp_d += p[i] * d[i]; + const int3 &v = path[i]; + const double occ = sigma[n][ts][v.z][v.y][v.x]; + if (occ > 0.5) { + exp_d = d[i]; + + x = v.x; + y = v.y; + z = v.z; + + break; + } + + } + //printf("%f\n",exp_d); + + // add an imaginary sample at the end point should gt_d exceeds max_d + double p_out = exp(-csd[count-1]); + double max_d = d[count-1]; + + // if (gt_d > max_d) + // exp_d += (p_out * gt_d); + + // p_out is the probability the ray escapes the voxel grid + //exp_d += (p_out * max_d); + if (train_phase == 1) { + gt_d = min(gt_d, max_d); + } + + // write the rendered ray distance (max_d) + pred_dist[n][c] = exp_d; + gt_dist[n][c] = gt_d; + + coord_index[n][c][0] = double(x); + coord_index[n][c][1] = double(y); + coord_index[n][c][2] = double(z); + + // // write occupancy + // for (int i = 0; i < count; i ++) { + // const int3 &v = path[i]; + // auto & occ = pog[n][t][v.z][v.y][v.x]; + // if (p[i] >= occ) { + // occ = p[i]; + // } + // } + } + } +} + +/* + * input shape + * sigma : N x T x H x L x W + * origin : N x T x 3 + * points : N x M x 4 + * output shape + * dist : N x M + */ +std::vector render_forward_cuda( + torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid, + std::string phase_name) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + + const auto T = grid[0]; + const auto H = grid[1]; + const auto L = grid[2]; + const auto W = grid[3]; + + const auto device = sigma.device(); + + const int threads = 1024; + const dim3 blocks((M + threads - 1) / threads, N); + + // + // const auto dtype = points.dtype(); + // const auto options = torch::TensorOptions().dtype(dtype).device(device).requires_grad(false); + // auto pog = torch::zeros({N, T, H, L, W}, options); + + // perform rendering + auto gt_dist = -torch::ones({N, M}, device); + auto pred_dist = -torch::ones({N, M}, device); + + auto coord_index = torch::zeros({N, M, 3}, device); + + PhaseName train_phase; + if (phase_name.compare("test") == 0) { + train_phase = TEST; + } else if (phase_name.compare("train") == 0){ + train_phase = TRAIN; + } else { + std::cout << "UNKNOWN PHASE NAME: " << phase_name << std::endl; + exit(1); + } + + AT_DISPATCH_FLOATING_TYPES(sigma.type(), "render_forward_cuda", ([&] { + render_forward_cuda_kernel<<>>( + sigma.packed_accessor32(), + origin.packed_accessor32(), + points.packed_accessor32(), + tindex.packed_accessor32(), + // pog.packed_accessor32(), + pred_dist.packed_accessor32(), + gt_dist.packed_accessor32(), + coord_index.packed_accessor32(), + train_phase); + })); + + cudaDeviceSynchronize(); + + // return {pog, pred_dist, gt_dist}; + return {pred_dist, gt_dist, coord_index}; +} + +template +__global__ void render_cuda_kernel( + const torch::PackedTensorAccessor32 sigma, + const torch::PackedTensorAccessor32 origin, + const torch::PackedTensorAccessor32 points, + const torch::PackedTensorAccessor32 tindex, + // const torch::PackedTensorAccessor32 occupancy, + torch::PackedTensorAccessor32 pred_dist, + torch::PackedTensorAccessor32 gt_dist, + torch::PackedTensorAccessor32 grad_sigma, + // torch::PackedTensorAccessor32 grad_sigma_count, + LossType loss_type) { + + // batch index + const auto n = blockIdx.y; + + // ray index + const auto c = blockIdx.x * blockDim.x + threadIdx.x; + + // num of rays + const auto M = points.size(1); + const auto T = sigma.size(1); + + // we allocated more threads than num_rays + if (c < M) { + // ray end point + const auto t = tindex[n][c]; + + // invalid points + // assert(t < T); + assert(T == 1 || t < T); + + // time index for sigma + // when T = 1, we have a static sigma + const auto ts = (T == 1) ? 0 : t; + + // if t < 0, it is a padded point + if (t < 0) return; + + // grid shape + const int vzsize = sigma.size(2); + const int vysize = sigma.size(3); + const int vxsize = sigma.size(4); + // assert(vzsize + vysize + vxsize <= MAX_D); + + // origin + const double xo = origin[n][t][0]; + const double yo = origin[n][t][1]; + const double zo = origin[n][t][2]; + + // end point + const double xe = points[n][c][0]; + const double ye = points[n][c][1]; + const double ze = points[n][c][2]; + + // locate the voxel where the origin resides + const int vxo = int(xo); + const int vyo = int(yo); + const int vzo = int(zo); + + // + const int vxe = int(xe); + const int vye = int(ye); + const int vze = int(ze); + + // NOTE: new + int vx = vxo; + int vy = vyo; + int vz = vzo; + + // origin to end + const double rx = xe - xo; + const double ry = ye - yo; + const double rz = ze - zo; + double gt_d = sqrt(rx * rx + ry * ry + rz * rz); + + // directional vector + const double dx = rx / gt_d; + const double dy = ry / gt_d; + const double dz = rz / gt_d; + + // In which direction the voxel ids are incremented. + const int stepX = (dx >= 0) ? 1 : -1; + const int stepY = (dy >= 0) ? 1 : -1; + const int stepZ = (dz >= 0) ? 1 : -1; + + // Distance along the ray to the next voxel border from the current position (tMaxX, tMaxY, tMaxZ). + const double next_voxel_boundary_x = vx + (stepX < 0 ? 0 : 1); + const double next_voxel_boundary_y = vy + (stepY < 0 ? 0 : 1); + const double next_voxel_boundary_z = vz + (stepZ < 0 ? 0 : 1); + + // tMaxX, tMaxY, tMaxZ -- distance until next intersection with voxel-border + // the value of t at which the ray crosses the first vertical voxel boundary + double tMaxX = (dx!=0) ? (next_voxel_boundary_x - xo)/dx : DBL_MAX; // + double tMaxY = (dy!=0) ? (next_voxel_boundary_y - yo)/dy : DBL_MAX; // + double tMaxZ = (dz!=0) ? (next_voxel_boundary_z - zo)/dz : DBL_MAX; // + + // tDeltaX, tDeltaY, tDeltaZ -- + // how far along the ray we must move for the horizontal component to equal the width of a voxel + // the direction in which we traverse the grid + // can only be FLT_MAX if we never go in that direction + const double tDeltaX = (dx!=0) ? stepX/dx : DBL_MAX; + const double tDeltaY = (dy!=0) ? stepY/dy : DBL_MAX; + const double tDeltaZ = (dz!=0) ? stepZ/dz : DBL_MAX; + + int3 path[MAX_D]; + double csd[MAX_D]; // cumulative sum of sigma times delta + double p[MAX_D]; // alpha + double d[MAX_D]; + double dt[MAX_D]; + + // forward raymarching with voxel traversal + int step = 0; // total number of voxels traversed + int count = 0; // number of voxels traversed inside the voxel grid + double last_d = 0.0; // correct initialization + + // voxel traversal raycasting + bool was_inside = false; + while (true) { + bool inside = (0 <= vx && vx < vxsize) && + (0 <= vy && vy < vysize) && + (0 <= vz && vz < vzsize); + if (inside) { // now inside + was_inside = true; + path[count] = make_int3(vx, vy, vz); + } else if (was_inside) { // was inside but no longer + // we know we are not coming back so terminate + break; + } else if (last_d > gt_d) { + break; + } /* else { // has not gone inside yet + // assert(count == 0); + // (1) when we have hit the destination but haven't gone inside the voxel grid + // (2) when we have traveled MAX_D voxels but haven't found one valid voxel + // handle intersection corner cases in case of infinite loop + // bool hit = (vx == vxe && vy == vye && vz == vze); + // if (hit || step >= MAX_D) + // break; + if (last_d >= gt_d || step >= MAX_D) break; + } */ + // _d represents the ray distance has traveled before escaping the current voxel cell + double _d = 0.0; + // voxel traversal + if (tMaxX < tMaxY) { + if (tMaxX < tMaxZ) { + _d = tMaxX; + vx += stepX; + tMaxX += tDeltaX; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } else { + if (tMaxY < tMaxZ) { + _d = tMaxY; + vy += stepY; + tMaxY += tDeltaY; + } else { + _d = tMaxZ; + vz += stepZ; + tMaxZ += tDeltaZ; + } + } + if (inside) { + // get sigma at the current voxel + const int3 &v = path[count]; // use the recorded index + const double _sigma = sigma[n][ts][v.z][v.y][v.x]; + const double _delta = max(0.0, _d - last_d); // THIS TURNS OUT IMPORTANT + const double sd = _sigma * _delta; + if (count == 0) { // the first voxel inside + csd[count] = sd; + p[count] = 1 - exp(-sd); + } else { + csd[count] = csd[count-1] + sd; + p[count] = exp(-csd[count-1]) - exp(-csd[count]); + } + // record the traveled distance + d[count] = _d; + dt[count] = _delta; + // count the number of voxels we have escaped + count ++; + } + last_d = _d; + step ++; + + if (step > MAX_STEP) { + break; + } + } + + // the total number of voxels visited should not exceed this number + assert(count <= MAX_D); + + // WHEN THERE IS AN INTERSECTION BETWEEN THE RAY AND THE VOXEL GRID + if (count > 0) { + // compute the expected ray distance + double exp_d = 0.0; + for (int i = 0; i < count; i ++) + exp_d += p[i] * d[i]; + + // add an imaginary sample at the end point should gt_d exceeds max_d + double p_out = exp(-csd[count-1]); + double max_d = d[count-1]; + + exp_d += (p_out * max_d); + gt_d = min(gt_d, max_d); + + // write the rendered ray distance (max_d) + pred_dist[n][c] = exp_d; + gt_dist[n][c] = gt_d; + + /* backward raymarching */ + double dd_dsigma[MAX_D]; + for (int i = count - 1; i >= 0; i --) { + // NOTE: probably need to double check again + if (i == count - 1) + dd_dsigma[i] = p_out * max_d; + else + dd_dsigma[i] = dd_dsigma[i+1] - exp(-csd[i]) * (d[i+1] - d[i]); + } + + for (int i = count - 1; i >= 0; i --) + dd_dsigma[i] *= dt[i]; + + // option 2: cap at the boundary + for (int i = count - 1; i >= 0; i --) + dd_dsigma[i] -= dt[i] * p_out * max_d; + + double dl_dd = 1.0; + if (loss_type == L1) + dl_dd = (exp_d >= gt_d) ? 1 : -1; + else if (loss_type == L2) + dl_dd = (exp_d - gt_d); + else if (loss_type == ABSREL) + dl_dd = (exp_d >= gt_d) ? (1.0/gt_d) : -(1.0/gt_d); + + // apply chain rule + for (int i = 0; i < count; i ++) { + const int3 &v = path[i]; + // NOTE: potential race conditions when writing gradients + grad_sigma[n][ts][v.z][v.y][v.x] += dl_dd * dd_dsigma[i]; + // grad_sigma_count[n][ts][v.z][v.y][v.x] += 1; + } + } + } +} + +/* + * input shape + * sigma : N x T x H x L x W + * origin : N x T x 3 + * points : N x M x 4 + * output shape + * dist : N x M + * loss : N x M + * grad_sigma : N x T x H x L x W + */ +std::vector render_cuda( + torch::Tensor sigma, + torch::Tensor origin, + torch::Tensor points, + torch::Tensor tindex, + std::string loss_name) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + + const auto device = sigma.device(); + + const int threads = 1024; + const dim3 blocks((M + threads - 1) / threads, N); + + // perform rendering + auto gt_dist = -torch::ones({N, M}, device); + auto pred_dist = -torch::ones({N, M}, device); + auto grad_sigma = torch::zeros_like(sigma); + // auto grad_sigma_count = torch::zeros_like(sigma); + + LossType loss_type; + if (loss_name.compare("l1") == 0) { + loss_type = L1; + } else if (loss_name.compare("l2") == 0) { + loss_type = L2; + } else if (loss_name.compare("absrel") == 0) { + loss_type = ABSREL; + } else if (loss_name.compare("bce") == 0){ + loss_type = L1; + } else { + std::cout << "UNKNOWN LOSS TYPE: " << loss_name << std::endl; + exit(1); + } + + AT_DISPATCH_FLOATING_TYPES(sigma.type(), "render_cuda", ([&] { + render_cuda_kernel<<>>( + sigma.packed_accessor32(), + origin.packed_accessor32(), + points.packed_accessor32(), + tindex.packed_accessor32(), + // occupancy.packed_accessor32(), + pred_dist.packed_accessor32(), + gt_dist.packed_accessor32(), + grad_sigma.packed_accessor32(), + // grad_sigma_count.packed_accessor32(), + loss_type); + })); + + cudaDeviceSynchronize(); + + // grad_sigma_count += (grad_sigma_count == 0); + // grad_sigma /= grad_sigma_count; + + return {pred_dist, gt_dist, grad_sigma}; +} + + +/* + * input shape + * origin : N x T x 3 + * points : N x M x 3 + * tindex : N x M + * output shape + * occupancy: N x T x H x L x W + */ +torch::Tensor init_cuda( + torch::Tensor points, + torch::Tensor tindex, + const std::vector grid) { + + const auto N = points.size(0); // batch size + const auto M = points.size(1); // num of rays + + const auto T = grid[0]; + const auto H = grid[1]; + const auto L = grid[2]; + const auto W = grid[3]; + + const auto dtype = points.dtype(); + const auto device = points.device(); + const auto options = torch::TensorOptions().dtype(dtype).device(device).requires_grad(false); + auto occupancy = torch::zeros({N, T, H, L, W}, options); + + const int threads = 1024; + const dim3 blocks((M + threads - 1) / threads, N); + + // initialize occupancy such that every voxel with one or more points is occupied + AT_DISPATCH_FLOATING_TYPES(points.type(), "init_cuda", ([&] { + init_cuda_kernel<<>>( + points.packed_accessor32(), + tindex.packed_accessor32(), + occupancy.packed_accessor32()); + })); + + // synchronize + cudaDeviceSynchronize(); + + return occupancy; +} \ No newline at end of file diff --git a/rayiou_metrics/rayiou_metrics/__init__.py b/rayiou_metrics/rayiou_metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rayiou_metrics/rayiou_metrics/ego_infos_val.pkl b/rayiou_metrics/rayiou_metrics/ego_infos_val.pkl new file mode 100644 index 0000000..2fa2aad Binary files /dev/null and b/rayiou_metrics/rayiou_metrics/ego_infos_val.pkl differ diff --git a/rayiou_metrics/rayiou_metrics/ego_pose_dataset.py b/rayiou_metrics/rayiou_metrics/ego_pose_dataset.py new file mode 100644 index 0000000..d99e7bb --- /dev/null +++ b/rayiou_metrics/rayiou_metrics/ego_pose_dataset.py @@ -0,0 +1,87 @@ +import torch +import numpy as np +from pyquaternion import Quaternion +from torch.utils.data import Dataset +np.set_printoptions(precision=3, suppress=True) + + +def trans_matrix(T, R): + tm = np.eye(4) + tm[:3, :3] = R.rotation_matrix + tm[:3, 3] = T + return tm + + +# A helper dataset for RayIoU. It is NOT used during training. +class EgoPoseDataset(Dataset): + def __init__(self, data_infos): + super(EgoPoseDataset, self).__init__() + + self.data_infos = data_infos + self.scene_frames = {} + + for info in data_infos: + scene_name = info['scene_name'] + if scene_name not in self.scene_frames: + self.scene_frames[scene_name] = [] + self.scene_frames[scene_name].append(info) + + def __len__(self): + return len(self.data_infos) + + def get_ego_from_lidar(self, info): + ego_from_lidar = trans_matrix( + np.array(info['lidar2ego_translation']), + Quaternion(info['lidar2ego_rotation'])) + return ego_from_lidar + + def get_global_pose(self, info, inverse=False): + global_from_ego = trans_matrix( + np.array(info['ego2global_translation']), + Quaternion(info['ego2global_rotation'])) + ego_from_lidar = trans_matrix( + np.array(info['lidar2ego_translation']), + Quaternion(info['lidar2ego_rotation'])) + pose = global_from_ego.dot(ego_from_lidar) + if inverse: + pose = np.linalg.inv(pose) + return pose + + def __getitem__(self, idx): + info = self.data_infos[idx] + + ref_sample_token = info['token'] + ref_lidar_from_global = self.get_global_pose(info, inverse=True) + ref_ego_from_lidar = self.get_ego_from_lidar(info) + + scene_frame = self.scene_frames[info['scene_name']] + ref_index = scene_frame.index(info) + + # NOTE: getting output frames + output_origin_list = [] + for curr_index in range(len(scene_frame)): + # if this exists a valid target + if curr_index == ref_index: + origin_tf = np.array([0.0, 0.0, 0.0], dtype=np.float32) + else: + # transform from the current lidar frame to global and then to the reference lidar frame + global_from_curr = self.get_global_pose(scene_frame[curr_index], inverse=False) + ref_from_curr = ref_lidar_from_global.dot(global_from_curr) + origin_tf = np.array(ref_from_curr[:3, 3], dtype=np.float32) + + origin_tf_pad = np.ones([4]) + origin_tf_pad[:3] = origin_tf # pad to [4] + origin_tf = np.dot(ref_ego_from_lidar[:3], origin_tf_pad.T).T # [3] + + # origin + if np.abs(origin_tf[0]) < 39 and np.abs(origin_tf[1]) < 39: + output_origin_list.append(origin_tf) + + # select 8 origins + if len(output_origin_list) > 8: + select_idx = np.round(np.linspace(0, len(output_origin_list) - 1, 8)).astype(np.int64) + output_origin_list = [output_origin_list[i] for i in select_idx] + + output_origin_tensor = torch.from_numpy(np.stack(output_origin_list)) # [T, 3] + + return (ref_sample_token, output_origin_tensor) diff --git a/rayiou_metrics/rayiou_metrics/evaluation.py b/rayiou_metrics/rayiou_metrics/evaluation.py new file mode 100644 index 0000000..df4de8e --- /dev/null +++ b/rayiou_metrics/rayiou_metrics/evaluation.py @@ -0,0 +1,72 @@ +import os +import glob +import mmcv +import numpy as np +import pkg_resources +from torch.utils.data import DataLoader +from .ray_metrics import main_rayiou +from .ego_pose_dataset import EgoPoseDataset + +openocc_class_names = [ + 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', + 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier', + 'driveable_surface', 'other_flat', 'sidewalk', + 'terrain', 'manmade', 'vegetation', 'free' +] +occ3d_class_names = [ + 'others', 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', + 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck', + 'driveable_surface', 'other_flat', 'sidewalk', + 'terrain', 'manmade', 'vegetation', 'free' +] + +def evaluate_metrics(data_root, pred_dir, data_type): + data_path = pkg_resources.resource_filename('rayiou_metrics', 'ego_infos_val.pkl') + data_infos = mmcv.load(data_path)['infos'] + gt_filepaths = sorted(glob.glob(os.path.join(data_root, data_type, '*/*/*.npz'))) + + # retrieve scene_name + token2scene = {} + for gt_path in gt_filepaths: + token = gt_path.split('/')[-2] + scene_name = gt_path.split('/')[-3] + token2scene[token] = scene_name + + for i in range(len(data_infos)): + scene_name = token2scene[data_infos[i]['token']] + data_infos[i]['scene_name'] = scene_name + + lidar_origins = [] + occ_gts = [] + occ_preds = [] + + for idx, batch in enumerate(DataLoader(EgoPoseDataset(data_infos), num_workers=8)): + output_origin = batch[1] + info = data_infos[idx] + + occ_path = os.path.join(data_root, data_type, info['scene_name'], info['token'], 'labels.npz') + occ_gt = np.load(occ_path, allow_pickle=True)['semantics'] + occ_gt = np.reshape(occ_gt, [200, 200, 16]).astype(np.uint8) + + occ_path = os.path.join(pred_dir, info['token'] + '.npz') + occ_pred = np.load(occ_path, allow_pickle=True)['pred'] + occ_pred = np.reshape(occ_pred, [200, 200, 16]).astype(np.uint8) + + lidar_origins.append(output_origin) + occ_gts.append(occ_gt) + occ_preds.append(occ_pred) + + if data_type == 'occ3d': + occ_class_names = occ3d_class_names + elif data_type == 'openocc_v2': + occ_class_names = openocc_class_names + else: + raise ValueError("Invalid data_type. Support ['occ3d', 'openocc_v2']") + + metrics = main_rayiou(occ_preds, occ_gts, lidar_origins, occ_class_names=occ_class_names) + + print('--- Evaluation Results ---') + for k, v in metrics.items(): + print('%s: %.4f' % (k, v)) + + return metrics diff --git a/rayiou_metrics/rayiou_metrics/ray_metrics.py b/rayiou_metrics/rayiou_metrics/ray_metrics.py new file mode 100644 index 0000000..c3f8e88 --- /dev/null +++ b/rayiou_metrics/rayiou_metrics/ray_metrics.py @@ -0,0 +1,309 @@ +# Acknowledgments: https://github.com/tarashakhurana/4d-occ-forecasting +# Modified by Haisong Liu +import functools +import math +import copy +import multiprocessing +import numpy as np +import torch +from torch.utils.cpp_extension import load +from tqdm import tqdm +from prettytable import PrettyTable +from .ray_pq import Metric_RayPQ + + +# Detect the device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +if torch.cuda.is_available(): + import dvr_cuda_ops as dvr +else: + import dvr_cpu_ops as dvr + +_pc_range = [-40, -40, -1.0, 40, 40, 5.4] +_voxel_size = 0.4 + + +# https://github.com/tarashakhurana/4d-occ-forecasting/blob/ff986082cd6ea10e67ab7839bf0e654736b3f4e2/test_fgbg.py#L29C1-L46C16 +def get_rendered_pcds(origin, points, tindex, pred_dist): + pcds = [] + + for t in range(len(origin)): + mask = (tindex == t) + # skip the ones with no data + if not mask.any(): + continue + _pts = points[mask, :3] + # use ground truth lidar points for the raycasting direction + v = _pts - origin[t][None, :] + d = v / np.sqrt((v ** 2).sum(axis=1, keepdims=True)) + pred_pts = origin[t][None, :] + d * pred_dist[mask][:, None] + pcds.append(torch.from_numpy(pred_pts)) + + return pcds + + +def meshgrid3d(occ_size, pc_range): + W, H, D = occ_size + + xs = torch.linspace(0.5, W - 0.5, W).view(W, 1, 1).expand(W, H, D) / W + ys = torch.linspace(0.5, H - 0.5, H).view(1, H, 1).expand(W, H, D) / H + zs = torch.linspace(0.5, D - 0.5, D).view(1, 1, D).expand(W, H, D) / D + xs = xs * (pc_range[3] - pc_range[0]) + pc_range[0] + ys = ys * (pc_range[4] - pc_range[1]) + pc_range[1] + zs = zs * (pc_range[5] - pc_range[2]) + pc_range[2] + xyz = torch.stack((xs, ys, zs), -1) + + return xyz + + +def generate_lidar_rays(): + # prepare lidar ray angles + pitch_angles = [] + for k in range(10): + angle = math.pi / 2 - math.atan(k + 1) + pitch_angles.append(-angle) + + # nuscenes lidar fov: [0.2107773983152201, -0.5439104895672159] (rad) + while pitch_angles[-1] < 0.21: + delta = pitch_angles[-1] - pitch_angles[-2] + pitch_angles.append(pitch_angles[-1] + delta) + + lidar_rays = [] + for pitch_angle in pitch_angles: + for azimuth_angle in np.arange(0, 360, 1): + azimuth_angle = np.deg2rad(azimuth_angle) + + x = np.cos(pitch_angle) * np.cos(azimuth_angle) + y = np.cos(pitch_angle) * np.sin(azimuth_angle) + z = np.sin(pitch_angle) + + lidar_rays.append((x, y, z)) + + return np.array(lidar_rays, dtype=np.float32) + + +def process_one_sample(sem_pred, lidar_rays, output_origin, instance_pred=None, occ_class_names=None): + # lidar origin in ego coordinate + # lidar_origin = torch.tensor([[[0.9858, 0.0000, 1.8402]]]) + T = output_origin.shape[1] + pred_pcds_t = [] + + free_id = len(occ_class_names) - 1 + occ_pred = copy.deepcopy(sem_pred) + occ_pred[sem_pred < free_id] = 1 + occ_pred[sem_pred == free_id] = 0 + occ_pred = occ_pred.permute(2, 1, 0) + occ_pred = occ_pred[None, None, :].contiguous().float() + + offset = torch.Tensor(_pc_range[:3])[None, None, :] + scaler = torch.Tensor([_voxel_size] * 3)[None, None, :] + + lidar_tindex = torch.zeros([1, lidar_rays.shape[0]]) + + for t in range(T): + lidar_origin = output_origin[:, t:t+1, :] # [1, 1, 3] + lidar_endpts = lidar_rays[None] + lidar_origin # [1, 15840, 3] + + output_origin_render = ((lidar_origin - offset) / scaler).float() # [1, 1, 3] + output_points_render = ((lidar_endpts - offset) / scaler).float() # [1, N, 3] + output_tindex_render = lidar_tindex # [1, N], all zeros + + with torch.no_grad(): + pred_dist, _, coord_index = dvr.render_forward( + occ_pred.to(device), + output_origin_render.to(device), + output_points_render.to(device), + output_tindex_render.to(device), + [1, 16, 200, 200], + "test" + ) + pred_dist *= _voxel_size + + pred_pcds = get_rendered_pcds( + lidar_origin[0].cpu().numpy(), + lidar_endpts[0].cpu().numpy(), + lidar_tindex[0].cpu().numpy(), + pred_dist[0].cpu().numpy() + ) + coord_index = coord_index[0, :, :].int().cpu() # [N, 3] + + pred_label = sem_pred[coord_index[:, 0], coord_index[:, 1], coord_index[:, 2]][:, None] # [N, 1] + pred_dist = pred_dist[0, :, None].cpu() + + if instance_pred is not None: + pred_instance = instance_pred[coord_index[:, 0], coord_index[:, 1], coord_index[:, 2]][:, None] # [N, 1] + pred_pcds = torch.cat([pred_label.float(), pred_instance.float(), pred_dist], dim=-1) + else: + pred_pcds = torch.cat([pred_label.float(), pred_dist], dim=-1) + + pred_pcds_t.append(pred_pcds) + + pred_pcds_t = torch.cat(pred_pcds_t, dim=0) + + return pred_pcds_t.numpy() + +# for pool.starmap when device is cpu +def process_pred_gt(sem_pred, sem_gt, lidar_origins, lidar_rays, occ_class_names): + sem_pred = torch.from_numpy(np.reshape(sem_pred, [200, 200, 16])) + sem_gt = torch.from_numpy(np.reshape(sem_gt, [200, 200, 16])) + + pcd_pred = process_one_sample(sem_pred, lidar_rays, lidar_origins, occ_class_names=occ_class_names) + pcd_gt = process_one_sample(sem_gt, lidar_rays, lidar_origins, occ_class_names=occ_class_names) + + # evalute on non-free rays + valid_mask = (pcd_gt[:, 0].astype(np.int32) != len(occ_class_names) - 1) + pcd_pred = pcd_pred[valid_mask] + pcd_gt = pcd_gt[valid_mask] + + assert pcd_pred.shape == pcd_gt.shape + + return pcd_pred, pcd_gt + +def calc_rayiou(pcd_pred_list, pcd_gt_list, occ_class_names): + thresholds = [1, 2, 4] + + gt_cnt = np.zeros([len(occ_class_names)]) + pred_cnt = np.zeros([len(occ_class_names)]) + tp_cnt = np.zeros([len(thresholds), len(occ_class_names)]) + + for pcd_pred, pcd_gt in zip(pcd_pred_list, pcd_gt_list): + for j, threshold in enumerate(thresholds): + # L1 + depth_pred = pcd_pred[:, 1] + depth_gt = pcd_gt[:, 1] + l1_error = np.abs(depth_pred - depth_gt) + tp_dist_mask = (l1_error < threshold) + + for i, cls in enumerate(occ_class_names): + cls_id = occ_class_names.index(cls) + cls_mask_pred = (pcd_pred[:, 0] == cls_id) + cls_mask_gt = (pcd_gt[:, 0] == cls_id) + + gt_cnt_i = cls_mask_gt.sum() + pred_cnt_i = cls_mask_pred.sum() + if j == 0: + gt_cnt[i] += gt_cnt_i + pred_cnt[i] += pred_cnt_i + + tp_cls = cls_mask_gt & cls_mask_pred # [N] + tp_mask = np.logical_and(tp_cls, tp_dist_mask) + tp_cnt[j][i] += tp_mask.sum() + + iou_list = [] + for j, threshold in enumerate(thresholds): + iou_list.append((tp_cnt[j] / (gt_cnt + pred_cnt - tp_cnt[j]))[:-1]) + + return iou_list + + +def main_rayiou(sem_pred_list, sem_gt_list, lidar_origin_list, occ_class_names): + if device.type == 'cuda': + torch.cuda.empty_cache() + + # generate lidar rays + lidar_rays = generate_lidar_rays() + lidar_rays = torch.from_numpy(lidar_rays) + + if device.type == 'cuda': + pcd_pred_list, pcd_gt_list = [], [] + for sem_pred, sem_gt, lidar_origins in tqdm(zip(sem_pred_list, sem_gt_list, lidar_origin_list), ncols=50): + + pcd_pred, pcd_gt = process_pred_gt(sem_pred, sem_gt, lidar_origins, lidar_rays, occ_class_names) + + pcd_pred_list.append(pcd_pred) + pcd_gt_list.append(pcd_gt) + + else: + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + + iterable_input = zip(sem_pred_list, sem_gt_list, lidar_origin_list) + warpped_func = functools.partial(process_pred_gt, lidar_rays=lidar_rays, occ_class_names=occ_class_names) + result = pool.starmap(warpped_func, tqdm(iterable_input, total=len(sem_gt_list), ncols=50)) + + pool.close() + pool.join() + + pcd_pred_list, pcd_gt_list = zip(*result) + + iou_list = calc_rayiou(pcd_pred_list, pcd_gt_list, occ_class_names) + rayiou = np.nanmean(iou_list) + rayiou_0 = np.nanmean(iou_list[0]) + rayiou_1 = np.nanmean(iou_list[1]) + rayiou_2 = np.nanmean(iou_list[2]) + + table = PrettyTable([ + 'Class Names', + 'RayIoU@1', 'RayIoU@2', 'RayIoU@4' + ]) + table.float_format = '.3' + + for i in range(len(occ_class_names) - 1): + table.add_row([ + occ_class_names[i], + iou_list[0][i], iou_list[1][i], iou_list[2][i] + ], divider=(i == len(occ_class_names) - 2)) + + table.add_row(['MEAN', rayiou_0, rayiou_1, rayiou_2]) + + print(table) + + if device.type == 'cuda': + torch.cuda.empty_cache() + + return { + 'RayIoU': rayiou, + 'RayIoU@1': rayiou_0, + 'RayIoU@2': rayiou_1, + 'RayIoU@4': rayiou_2, + } + + +def main_raypq(sem_pred_list, sem_gt_list, inst_pred_list, inst_gt_list, lidar_origin_list, occ_class_names): + if device.type == 'cuda': + torch.cuda.empty_cache() + + eval_metrics_pq = Metric_RayPQ( + occ_class_names=occ_class_names, + num_classes=len(occ_class_names), + thresholds=[1, 2, 4] + ) + + # generate lidar rays + lidar_rays = generate_lidar_rays() + lidar_rays = torch.from_numpy(lidar_rays) + + for sem_pred, sem_gt, inst_pred, inst_gt, lidar_origins in \ + tqdm(zip(sem_pred_list, sem_gt_list, inst_pred_list, inst_gt_list, lidar_origin_list), ncols=50): + sem_pred = torch.from_numpy(np.reshape(sem_pred, [200, 200, 16])) + sem_gt = torch.from_numpy(np.reshape(sem_gt, [200, 200, 16])) + + inst_pred = torch.from_numpy(np.reshape(inst_pred, [200, 200, 16])) + inst_gt = torch.from_numpy(np.reshape(inst_gt, [200, 200, 16])) + + pcd_pred = process_one_sample(sem_pred, lidar_rays, lidar_origins, instance_pred=inst_pred, occ_class_names=occ_class_names) + pcd_gt = process_one_sample(sem_gt, lidar_rays, lidar_origins, instance_pred=inst_gt, occ_class_names=occ_class_names) + + # evalute on non-free rays + valid_mask = (pcd_gt[:, 0].astype(np.int32) != len(occ_class_names) - 1) + pcd_pred = pcd_pred[valid_mask] + pcd_gt = pcd_gt[valid_mask] + + assert pcd_pred.shape == pcd_gt.shape + + sem_gt = pcd_gt[:, 0].astype(np.int32) + sem_pred = pcd_pred[:, 0].astype(np.int32) + + instances_gt = pcd_gt[:, 1].astype(np.int32) + instances_pred = pcd_pred[:, 1].astype(np.int32) + + # L1 + depth_gt = pcd_gt[:, 2] + depth_pred = pcd_pred[:, 2] + l1_error = np.abs(depth_pred - depth_gt) + + eval_metrics_pq.add_batch(sem_pred, sem_gt, instances_pred, instances_gt, l1_error) + + if device.type == 'cuda': + torch.cuda.empty_cache() + + return eval_metrics_pq.count_pq() diff --git a/rayiou_metrics/rayiou_metrics/ray_pq.py b/rayiou_metrics/rayiou_metrics/ray_pq.py new file mode 100644 index 0000000..de66860 --- /dev/null +++ b/rayiou_metrics/rayiou_metrics/ray_pq.py @@ -0,0 +1,193 @@ +import numpy as np +from prettytable import PrettyTable + + +class Metric_RayPQ: + def __init__(self, + occ_class_names, + num_classes=18, + thresholds=[1, 2, 4]): + """ + Args: + ignore_index (llist): Class ids that not be considered in pq counting. + """ + if num_classes == 18 or num_classes == 17: + self.class_names = occ_class_names + else: + raise ValueError + + self.num_classes = num_classes + self.id_offset = 2 ** 16 + self.eps = 1e-5 + self.thresholds = thresholds + + self.min_num_points = 10 + self.include = np.array( + [n for n in range(self.num_classes - 1)], + dtype=int) + self.cnt = 0 + + # panoptic stuff + self.pan_tp = np.zeros([len(self.thresholds), num_classes], dtype=int) + self.pan_iou = np.zeros([len(self.thresholds), num_classes], dtype=np.double) + self.pan_fp = np.zeros([len(self.thresholds), num_classes], dtype=int) + self.pan_fn = np.zeros([len(self.thresholds), num_classes], dtype=int) + + def add_batch(self,semantics_pred,semantics_gt,instances_pred,instances_gt, l1_error): + self.cnt += 1 + self.add_panoptic_sample(semantics_pred, semantics_gt, instances_pred, instances_gt, l1_error) + + def add_panoptic_sample(self, semantics_pred, semantics_gt, instances_pred, instances_gt, l1_error): + """Add one sample of panoptic predictions and ground truths for + evaluation. + + Args: + semantics_pred (np.ndarray): Semantic predictions. + semantics_gt (np.ndarray): Semantic ground truths. + instances_pred (np.ndarray): Instance predictions. + instances_gt (np.ndarray): Instance ground truths. + """ + # get instance_class_id from instance_gt + instance_class_ids = [self.num_classes - 1] + for i in range(1, instances_gt.max() + 1): + class_id = np.unique(semantics_gt[instances_gt == i]) + # assert class_id.shape[0] == 1, "each instance must belong to only one class" + if class_id.shape[0] == 1: + instance_class_ids.append(class_id[0]) + else: + instance_class_ids.append(self.num_classes - 1) + instance_class_ids = np.array(instance_class_ids) + + instance_count = 1 + final_instance_class_ids = [] + final_instances = np.zeros_like(instances_gt) # empty space has instance id "0" + + for class_id in range(self.num_classes - 1): + if np.sum(semantics_gt == class_id) == 0: + continue + + if self.class_names[class_id] in ['car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'motorcycle', 'bicycle', 'pedestrian']: + # treat as instances + for instance_id in range(len(instance_class_ids)): + if instance_class_ids[instance_id] != class_id: + continue + final_instances[instances_gt == instance_id] = instance_count + instance_count += 1 + final_instance_class_ids.append(class_id) + else: + # treat as semantics + final_instances[semantics_gt == class_id] = instance_count + instance_count += 1 + final_instance_class_ids.append(class_id) + + instances_gt = final_instances + + # avoid zero (ignored label) + instances_pred = instances_pred + 1 + instances_gt = instances_gt + 1 + + for j, threshold in enumerate(self.thresholds): + tp_dist_mask = l1_error < threshold + # for each class (except the ignored ones) + for cl in self.include: + # get a class mask + pred_inst_in_cl_mask = semantics_pred == cl + gt_inst_in_cl_mask = semantics_gt == cl + + # get instance points in class (makes outside stuff 0) + pred_inst_in_cl = instances_pred * pred_inst_in_cl_mask.astype(int) + gt_inst_in_cl = instances_gt * gt_inst_in_cl_mask.astype(int) + + # generate the areas for each unique instance prediction + unique_pred, counts_pred = np.unique( + pred_inst_in_cl[pred_inst_in_cl > 0], return_counts=True) + id2idx_pred = {id: idx for idx, id in enumerate(unique_pred)} + matched_pred = np.array([False] * unique_pred.shape[0]) + + # generate the areas for each unique instance gt_np + unique_gt, counts_gt = np.unique( + gt_inst_in_cl[gt_inst_in_cl > 0], return_counts=True) + id2idx_gt = {id: idx for idx, id in enumerate(unique_gt)} + matched_gt = np.array([False] * unique_gt.shape[0]) + + # generate intersection using offset + valid_combos = np.logical_and(pred_inst_in_cl > 0, + gt_inst_in_cl > 0) + # add dist_mask + valid_combos = np.logical_and(valid_combos, tp_dist_mask) + + id_offset_combo = pred_inst_in_cl[ + valid_combos] + self.id_offset * gt_inst_in_cl[valid_combos] + unique_combo, counts_combo = np.unique( + id_offset_combo, return_counts=True) + + # generate an intersection map + # count the intersections with over 0.5 IoU as TP + gt_labels = unique_combo // self.id_offset + pred_labels = unique_combo % self.id_offset + gt_areas = np.array([counts_gt[id2idx_gt[id]] for id in gt_labels]) + pred_areas = np.array( + [counts_pred[id2idx_pred[id]] for id in pred_labels]) + intersections = counts_combo + unions = gt_areas + pred_areas - intersections + ious = intersections.astype(float) / unions.astype(float) + + tp_indexes = ious > 0.5 + self.pan_tp[j][cl] += np.sum(tp_indexes) + self.pan_iou[j][cl] += np.sum(ious[tp_indexes]) + + matched_gt[[id2idx_gt[id] for id in gt_labels[tp_indexes]]] = True + matched_pred[[id2idx_pred[id] + for id in pred_labels[tp_indexes]]] = True + + # count the FN + if len(counts_gt) > 0: + self.pan_fn[j][cl] += np.sum( + np.logical_and(counts_gt >= self.min_num_points, + ~matched_gt)) + + # count the FP + if len(matched_pred) > 0: + self.pan_fp[j][cl] += np.sum( + np.logical_and(counts_pred >= self.min_num_points, + ~matched_pred)) + + def count_pq(self): + sq_all = self.pan_iou.astype(np.double) / np.maximum( + self.pan_tp.astype(np.double), self.eps) + rq_all = self.pan_tp.astype(np.double) / np.maximum( + self.pan_tp.astype(np.double) + 0.5 * self.pan_fp.astype(np.double) + + 0.5 * self.pan_fn.astype(np.double), self.eps) + pq_all = sq_all * rq_all + + # mask classes not occurring in dataset + mask = (self.pan_tp + self.pan_fp + self.pan_fn) > 0 + pq_all[~mask] = float('nan') + + table = PrettyTable([ + 'Class Names', + 'RayPQ@%d' % self.thresholds[0], + 'RayPQ@%d' % self.thresholds[1], + 'RayPQ@%d' % self.thresholds[2] + ]) + table.float_format = '.3' + + for i in range(len(self.class_names) - 1): + table.add_row([ + self.class_names[i], + pq_all[0][i], pq_all[1][i], pq_all[2][i], + ], divider=(i == len(self.class_names) - 2)) + + table.add_row([ + 'MEAN', + np.nanmean(pq_all[0]), np.nanmean(pq_all[1]), np.nanmean(pq_all[2]) + ]) + + print(table) + + return { + 'RayPQ': np.nanmean(pq_all), + 'RayPQ@1': np.nanmean(pq_all[0]), + 'RayPQ@2': np.nanmean(pq_all[1]), + 'RayPQ@4': np.nanmean(pq_all[2]), + } diff --git a/rayiou_metrics/setup.py b/rayiou_metrics/setup.py new file mode 100644 index 0000000..dd21913 --- /dev/null +++ b/rayiou_metrics/setup.py @@ -0,0 +1,52 @@ +from setuptools import find_packages, setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension +import torch + +ext_modules = [] + +if torch.cuda.is_available(): + ext_modules.append( + CUDAExtension( + name='dvr_cuda_ops', + sources=[ + 'lib/dvr_cuda/dvr_cuda.cpp', + 'lib/dvr_cuda/dvr_render_cuda.cu' + ], + extra_compile_args={'cxx': ['-g'], 'nvcc': ['-allow-unsupported-compiler']} + ) + ) + +ext_modules.append( + CppExtension( + name='dvr_cpu_ops', + sources=[ + 'lib/dvr_cpu/dvr.cpp', + 'lib/dvr_cpu/dvr_render_cpu.cpp' + ] + ) +) + +setup( + name='rayiou_metrics', + version='1.0', + description='A package for calculating ray metrics with CPU and CUDA support.', + packages=find_packages(), + package_data={ + 'rayiou_metrics': ['ego_infos_val.pkl'] # Include .pkl in the package + }, + include_package_data=True, + ext_modules=ext_modules, + cmdclass={ + 'build_ext': BuildExtension + }, + install_requires=[ + 'torch', + 'numpy' + ], + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + ], + python_requires='>=3.6', +)