diff --git a/notebook/demo_aligned_pointmap.ipynb b/notebook/demo_aligned_pointmap.ipynb new file mode 100644 index 00000000..ea7166de --- /dev/null +++ b/notebook/demo_aligned_pointmap.ipynb @@ -0,0 +1,264 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright (c) Meta Platforms, Inc. and affiliates." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Imports and Model Loading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import uuid\n", + "import imageio\n", + "import numpy as np\n", + "import torch\n", + "from IPython.display import Image as ImageDisplay\n", + "from inference import Inference, ready_gaussian_for_video_rendering, load_image, load_masks, display_image, make_scene, render_video, interactive_visualizer\n", + "import imageio.v3 as iio\n", + "from pytorch3d.transforms import quaternion_to_matrix, Transform3d\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PATH = os.getcwd()\n", + "TAG = \"hf\"\n", + "config_path = f\"{PATH}/../checkpoints/{TAG}/pipeline.yaml\"\n", + "inference = Inference(config_path, compile=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Load input image to lift to 3D (multiple objects)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "IMAGE_PATH = f\"{PATH}/images/nocs_0003_0354/rgb.png\"\n", + "IMAGE_NAME = os.path.basename(os.path.dirname(IMAGE_PATH))\n", + "\n", + "image = load_image(IMAGE_PATH)\n", + "masks = load_masks(os.path.dirname(IMAGE_PATH), extension=\".png\")\n", + "\n", + "display_image(image, masks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Generating Pointmap from Depth Image " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "depth_path = f\"{PATH}/images/nocs_0003_0354/depth.png\"\n", + "depth = iio.imread(depth_path).astype(np.float32)\n", + "depth = depth / 1000.0 #convert to mm -> m\n", + "depth[depth <= 0] = np.nan \n", + "\n", + "H, W = depth.shape\n", + "\n", + "K = np.array([\n", + " [591.012500, 0.0, 322.525000],\n", + " [0.0, 590.167750, 244.110840],\n", + " [0.0, 0.0, 1.0]\n", + "], dtype=np.float32)\n", + "\n", + "fx = K[0, 0]\n", + "fy = K[1, 1]\n", + "cx = K[0, 2]\n", + "cy = K[1, 2]\n", + "\n", + "u = np.arange(W)\n", + "v = np.arange(H)\n", + "uu, vv = np.meshgrid(u, v)\n", + "\n", + "Z = depth\n", + "X = (uu - cx) * Z / fx\n", + "Y = (vv - cy) * Z / fy\n", + "\n", + "# Convert to right-handed PyTorch3D coordinates\n", + "pointmap = np.stack([-X, -Y, Z], axis=-1)\n", + "pointmaP = torch.tensor(pointmap, dtype=torch.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Generate Gaussian Splats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "outputs = [inference(image, mask, seed=42,pointmap=pointmaP) for mask in masks]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Mesh Alignment & Coordinate Frame Conversion" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "R_yup_to_zup = torch.tensor([[-1,0,0],[0,0,1],[0,1,0]], dtype=torch.float32)\n", + "R_flip_z = torch.tensor([[1,0,0],[0,1,0],[0,0,-1]], dtype=torch.float32)\n", + "R_pytorch3d_to_cam = torch.tensor([[-1,0,0],[0,-1,0],[0,0,1]], dtype=torch.float32)\n", + "\n", + "def transform_mesh_vertices(vertices, rotation, translation, scale):\n", + "\n", + " if isinstance(vertices, np.ndarray):\n", + " vertices = torch.tensor(vertices, dtype=torch.float32)\n", + "\n", + " vertices = vertices.unsqueeze(0) # batch dimension [1, N, 3]\n", + " vertices = vertices @ R_flip_z.to(vertices.device) \n", + " vertices = vertices @ R_yup_to_zup.to(vertices.device)\n", + " R_mat = quaternion_to_matrix(rotation.to(vertices.device))\n", + " tfm = Transform3d(dtype=vertices.dtype, device=vertices.device)\n", + " tfm = (\n", + " tfm.scale(scale)\n", + " .rotate(R_mat)\n", + " .translate(translation[0], translation[1], translation[2])\n", + " )\n", + " vertices_world = tfm.transform_points(vertices)\n", + " vertices_world = vertices_world @ R_pytorch3d_to_cam.to(vertices_world.device)\n", + " \n", + " return vertices_world[0] # remove batch dimension\n", + "\n", + "\n", + "for i, out in enumerate(outputs):\n", + " mesh = out[\"glb\"]\n", + " vertices = mesh.vertices\n", + " vertices_tensor = torch.tensor(vertices)\n", + "\n", + " S = out[\"scale\"][0].cpu().float()\n", + " T = out[\"translation\"][0].cpu().float()\n", + " R = out[\"rotation\"].squeeze().cpu().float()\n", + "\n", + " vertices_transformed = transform_mesh_vertices(vertices, R, T, S)\n", + " mesh.vertices = vertices_transformed.cpu().numpy().astype(np.float32)\n", + "\n", + " save_path = f\"{PATH}/meshes/multi/{IMAGE_NAME}/object_{i}.ply\"\n", + " os.makedirs(os.path.dirname(save_path), exist_ok=True)\n", + " mesh.export(save_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Visualize Gaussian Splat of the Scene\n", + "### a. Animated Gif" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scene_gs = make_scene(*outputs)\n", + "scene_gs = ready_gaussian_for_video_rendering(scene_gs,fix_alignment=False)\n", + "\n", + "# export gaussian splatting (as point cloud)\n", + "scene_gs.save_ply(f\"{PATH}/gaussians/multi/{IMAGE_NAME}.ply\")\n", + "\n", + "video = render_video(\n", + " scene_gs,\n", + " r=1,\n", + " fov=60,\n", + " resolution=512,\n", + ")[\"color\"]\n", + " \n", + "# save video as gif\n", + "imageio.mimsave(\n", + " os.path.join(f\"{PATH}/gaussians/multi/{IMAGE_NAME}.gif\"),\n", + " video,\n", + " format=\"GIF\",\n", + " duration=1000 / 30, # default assuming 30fps from the input MP4\n", + " loop=0, # 0 means loop indefinitely\n", + ")\n", + "\n", + "# notebook display\n", + "ImageDisplay(url=f\"gaussians/multi/{IMAGE_NAME}.gif?cache_invalidator={uuid.uuid4()}\",)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Interactive Visualizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# might take a while to load (black screen)\n", + "interactive_visualizer(f\"{PATH}/gaussians/single/{IMAGE_NAME}.ply\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sam3d-objects", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebook/images/nocs_0003_0354/0.png b/notebook/images/nocs_0003_0354/0.png new file mode 100644 index 00000000..22e7a5d3 Binary files /dev/null and b/notebook/images/nocs_0003_0354/0.png differ diff --git a/notebook/images/nocs_0003_0354/1.png b/notebook/images/nocs_0003_0354/1.png new file mode 100644 index 00000000..c4f37a48 Binary files /dev/null and b/notebook/images/nocs_0003_0354/1.png differ diff --git a/notebook/images/nocs_0003_0354/2.png b/notebook/images/nocs_0003_0354/2.png new file mode 100644 index 00000000..6c39f510 Binary files /dev/null and b/notebook/images/nocs_0003_0354/2.png differ diff --git a/notebook/images/nocs_0003_0354/3.png b/notebook/images/nocs_0003_0354/3.png new file mode 100644 index 00000000..5e6724eb Binary files /dev/null and b/notebook/images/nocs_0003_0354/3.png differ diff --git a/notebook/images/nocs_0003_0354/4.png b/notebook/images/nocs_0003_0354/4.png new file mode 100644 index 00000000..0df04b36 Binary files /dev/null and b/notebook/images/nocs_0003_0354/4.png differ diff --git a/notebook/images/nocs_0003_0354/5.png b/notebook/images/nocs_0003_0354/5.png new file mode 100644 index 00000000..d1c6d73a Binary files /dev/null and b/notebook/images/nocs_0003_0354/5.png differ diff --git a/notebook/images/nocs_0003_0354/cam_K.txt b/notebook/images/nocs_0003_0354/cam_K.txt new file mode 100644 index 00000000..17de6a52 --- /dev/null +++ b/notebook/images/nocs_0003_0354/cam_K.txt @@ -0,0 +1,3 @@ +591.012500 0.000000 322.525000 +0.000000 590.167750 244.110840 +0.000000 0.000000 1.000000 \ No newline at end of file diff --git a/notebook/images/nocs_0003_0354/depth.png b/notebook/images/nocs_0003_0354/depth.png new file mode 100644 index 00000000..37918f96 Binary files /dev/null and b/notebook/images/nocs_0003_0354/depth.png differ diff --git a/notebook/images/nocs_0003_0354/label.png b/notebook/images/nocs_0003_0354/label.png new file mode 100644 index 00000000..e256550d Binary files /dev/null and b/notebook/images/nocs_0003_0354/label.png differ diff --git a/notebook/images/nocs_0003_0354/rgb.png b/notebook/images/nocs_0003_0354/rgb.png new file mode 100644 index 00000000..2c269494 Binary files /dev/null and b/notebook/images/nocs_0003_0354/rgb.png differ