From a622fa33cdbc43dc66804fd8f65c9583321f1b48 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 16:42:53 +0800 Subject: [PATCH 01/59] docs: add Sphinx documentation setup with configuration files and dependencies --- .gitignore | 3 +++ .readthedocs.yaml | 28 ++++++++++++++++++++ docs/Makefile | 20 ++++++++++++++ docs/conf.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 18 +++++++++++++ docs/make.bat | 35 +++++++++++++++++++++++++ pyproject.toml | 10 +++++++ 7 files changed, 180 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat diff --git a/.gitignore b/.gitignore index cb172652..81628d19 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,9 @@ instance/ # Sphinx documentation docs/_build/ +docs/api/ +.doctrees +.buildinfo # PyBuilder target/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..38f7132a --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,28 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.10" # Using 3.10 as it's stable and compatible with your dependencies + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally declare the Python requirements required to build your docs +python: + install: + - method: pip + path: . + extra_requirements: + - docs + +# Optionally build your docs in additional formats such as PDF +formats: + - pdf diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..e55170be --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,66 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + +project = "CamTools" +copyright = "2024, Yixing Lao" +author = "Yixing Lao" +release = "0.1" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_rtd_theme", + "myst_parser", +] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +language = "en" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] + +# Intersphinx configuration +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable/", None), +} + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_type_aliases = None + +# AutoDoc settings +autodoc_member_order = "bysource" +autodoc_typehints = "description" +autodoc_typehints_description_target = "documented" +add_module_names = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..5b40a470 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,18 @@ +Welcome to CamTools's documentation! +================================ + +.. include:: ../README.md + :parser: myst_parser.sphinx_ + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + api/modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..32bb2452 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/pyproject.toml b/pyproject.toml index 124e3ec5..4f13f152 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,11 @@ dev = [ "pytest-benchmark>=4.0.0", "ipdb", ] +docs = [ + "sphinx", + "sphinx-rtd-theme", + "myst-parser", +] torch = [ "torch>=1.8.0", "lpips>=0.1.4", @@ -45,3 +50,8 @@ torch = [ [tool.setuptools] packages = ["camtools", "camtools.tools"] + +[tool.black] +include = '\.pyi?$' +line-length = 79 +target-version = ['py38'] From 7d2508b2801c34912cf6d82d5b92cbd1bac470e2 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 16:46:14 +0800 Subject: [PATCH 02/59] chore: remove redundant comment in .readthedocs.yaml for cleaner configuration --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 38f7132a..dd3dc63f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-24.04 tools: - python: "3.10" # Using 3.10 as it's stable and compatible with your dependencies + python: "3.10" # Build documentation in the "docs/" directory with Sphinx sphinx: From 6e3c9ed894f9450e4837f758638baa6f8ad3e14d Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 16:52:16 +0800 Subject: [PATCH 03/59] docs(README.md): add instructions for building and viewing documentation locally --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 7ce8cd37..b6337035 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,21 @@ the beginning of the README. [part 2](https://ksimek.github.io/2012/08/22/extrinsic/), and [part 3](https://ksimek.github.io/2013/08/13/intrinsic/). +## Building Documentation + +To build and view the documentation locally: + +```bash +# Build the documentation +cd docs +make html + +# Start a local server to view the documentation +python -m http.server 8000 --directory _build/html +``` + +Then open your browser and navigate to `http://localhost:8000` to view the documentation. + ## Contributing - Follow [Angular's commit message convention](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#-commit-message-format) for PRs. From 1b87ad4d244f2911d83e1939f385881255114395 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 16:58:18 +0800 Subject: [PATCH 04/59] docs: switch from sphinx_rtd_theme to furo theme for improved documentation styling and customization --- docs/conf.py | 15 +++++++++++++-- pyproject.toml | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index e55170be..7931801d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,6 @@ "sphinx.ext.viewcode", "sphinx.ext.napoleon", "sphinx.ext.intersphinx", - "sphinx_rtd_theme", "myst_parser", ] @@ -36,9 +35,21 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = "sphinx_rtd_theme" +html_theme = "furo" html_static_path = ["_static"] +# Furo theme options +html_theme_options = { + "light_css_variables": { + "color-brand-primary": "#2962ff", + "color-brand-content": "#2962ff", + }, + "dark_css_variables": { + "color-brand-primary": "#5c85ff", + "color-brand-content": "#5c85ff", + }, +} + # Intersphinx configuration intersphinx_mapping = { "python": ("https://docs.python.org/3", None), diff --git a/pyproject.toml b/pyproject.toml index 4f13f152..b457f746 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ docs = [ "sphinx", "sphinx-rtd-theme", "myst-parser", + "furo", ] torch = [ "torch>=1.8.0", From 9911fcc992fcf1711e982b2dd4e088de0fe15bd8 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:08:46 +0800 Subject: [PATCH 05/59] docs: add contributing, coordinates, features, and installation documentation to improve project clarity and onboarding --- docs/contributing.rst | 40 +++++++++++ docs/coordinates.rst | 160 ++++++++++++++++++++++++++++++++++++++++++ docs/features.rst | 100 ++++++++++++++++++++++++++ docs/index.rst | 32 +++++++-- docs/installation.rst | 41 +++++++++++ 5 files changed, 369 insertions(+), 4 deletions(-) create mode 100644 docs/contributing.rst create mode 100644 docs/coordinates.rst create mode 100644 docs/features.rst create mode 100644 docs/installation.rst diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 00000000..4e961054 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,40 @@ +Contributing +=========== + +Contributing Guidelines +---------------------- + +- Follow `Angular's commit message convention `_ for PRs. + - This applies to PR's title and ultimately the commit messages in ``main``. + - The prefix shall be one of ``build``, ``ci``, ``docs``, ``feat``, ``fix``, ``perf``, ``refactor``, ``test``. + - Use lowercase. +- Format your code with `black `_. This will be enforced by the CI. + +Building Documentation +--------------------- + +To build and view the documentation locally: + +.. code-block:: bash + + # Build the documentation + cd docs + make html + + # Start a local server to view the documentation + python -m http.server 8000 --directory _build/html + +Then open your browser and navigate to ``http://localhost:8000`` to view the documentation. + +Build with CamTools +------------------- + +If you use CamTools in your project, consider adding one of the following +badges to your project. + +.. raw:: html + +

+ Built with CamTools + Built with CamTools +

diff --git a/docs/coordinates.rst b/docs/coordinates.rst new file mode 100644 index 00000000..e07f85cf --- /dev/null +++ b/docs/coordinates.rst @@ -0,0 +1,160 @@ +Camera Coordinate System +======================== + +A homogeneous point ``[X, Y, Z, 1]`` in the world coordinate can be projected to a +homogeneous point ``[x, y, 1]`` in the image (pixel) coordinate using the +following equation: + +.. math:: + + \lambda + \left[\begin{array}{l} + x \\ + y \\ + 1 + \end{array}\right]=\left[\begin{array}{ccc} + f_{x} & 0 & c_{x} \\ + 0 & f_{y} & c_{y} \\ + 0 & 0 & 1 + \end{array}\right]\left[\begin{array}{llll} + R_{00} & R_{01} & R_{02} & t_{0} \\ + R_{10} & R_{11} & R_{12} & t_{1} \\ + R_{20} & R_{21} & R_{22} & t_{2} + \end{array}\right]\left[\begin{array}{c} + X \\ + Y \\ + Z \\ + 1 + \end{array}\right]. + +We follow the standard OpenCV-style camera coordinate system as illustrated at +the beginning of the documentation. + +Camera Coordinate +---------------- + +Right-handed, with :math:`Z` pointing away from the camera towards the view direction +and :math:`Y` axis pointing down. Note that the OpenCV convention (camtools' default) +is different from the OpenGL/Blender convention, where :math:`Z` points towards the +opposite view direction, :math:`Y` points up and :math:`X` points right. + +To convert between the OpenCV camera coordinates and the OpenGL-style coordinates, +use the conversion functions: + +- ``ct.convert.T_opencv_to_opengl()`` +- ``ct.convert.T_opengl_to_opencv()`` +- ``ct.convert.pose_opencv_to_opengl()`` +- ``ct.convert.pose_opengl_to_opencv()`` + +Image Coordinate +--------------- + +Starts from the top-left corner of the image, with :math:`x` pointing right +(corresponding to the image width) and :math:`y` pointing down (corresponding to +the image height). This is consistent with OpenCV. + +Pay attention that the 0th dimension in the image array is the height (i.e., :math:`y`) +and the 1st dimension is the width (i.e., :math:`x`). That is: + +- :math:`x` <=> ``u`` <=> width <=> column <=> the 1st dimension +- :math:`y` <=> ``v`` <=> height <=> row <=> the 0th dimension + +Matrix Definitions +----------------- + +Camera Intrinsic Matrix (K) +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``K`` is a ``(3, 3)`` camera intrinsic matrix: + +.. code-block:: python + + K = [[fx, s, cx], + [ 0, fy, cy], + [ 0, 0, 1]] + +Camera Extrinsic Matrix (T or W2C) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``T`` is a ``(4, 4)`` camera extrinsic matrix: + +.. code-block:: python + + T = [[R | t = [[R00, R01, R02, t0], + 0 | 1]] [R10, R11, R12, t1], + [R20, R21, R22, t2], + [ 0, 0, 0, 1]] + +- ``T`` is also known as the world-to-camera ``W2C`` matrix, which transforms a + point in the world coordinate to the camera coordinate. +- ``T``'s shape is ``(4, 4)``, not ``(3, 4)``. +- ``T`` is the inverse of ``pose``, i.e., ``np.linalg.inv(T) == pose``. +- The camera center ``C`` in world coordinate is projected to ``[0, 0, 0, 1]`` in + camera coordinate. + +Rotation Matrix (R) +^^^^^^^^^^^^^^^^^ + +``R`` is a ``(3, 3)`` rotation matrix: + +.. code-block:: python + + R = T[:3, :3] + +- ``R`` is a rotation matrix. It is an orthogonal matrix with determinant 1, as + rotations preserve volume and orientation. + - ``R.T == np.linalg.inv(R)`` + - ``np.linalg.norm(R @ x) == np.linalg.norm(x)``, where ``x`` is a ``(3,)`` + vector. + +Translation Vector (t) +^^^^^^^^^^^^^^^^^^^^ + +``t`` is a ``(3,)`` translation vector: + +.. code-block:: python + + t = T[:3, 3] + +- ``t``'s shape is ``(3,)``, not ``(3, 1)``. + +Camera Pose Matrix (pose or C2W) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pose`` is a ``(4, 4)`` camera pose matrix. It is the inverse of ``T``. + +- ``pose`` is also known as the camera-to-world ``C2W`` matrix, which transforms a + point in the camera coordinate to the world coordinate. +- ``pose`` is the inverse of ``T``, i.e., ``pose == np.linalg.inv(T)``. + +Camera Center (C) +^^^^^^^^^^^^^^^ + +``C`` is the camera center: + +.. code-block:: python + + C = pose[:3, 3] + +- ``C``'s shape is ``(3,)``, not ``(3, 1)``. +- ``C`` is the camera center in world coordinate. It is also the translation + vector of ``pose``. + +Projection Matrix (P) +^^^^^^^^^^^^^^^^^^^ + +``P`` is a ``(3, 4)`` camera projection matrix: + +- ``P`` is the world-to-pixel projection matrix, which projects a point in the + homogeneous world coordinate to the homogeneous pixel coordinate. +- ``P`` is the product of the intrinsic and extrinsic parameters: + + .. code-block:: python + + # P = K @ [R | t] + P = K @ np.hstack([R, t[:, None]]) + +- ``P``'s shape is ``(3, 4)``, not ``(4, 4)``. +- It is possible to decompose ``P`` into intrinsic and extrinsic matrices by QR + decomposition. +- Don't confuse ``P`` with ``pose``. Don't confuse ``P`` with ``T``. diff --git a/docs/features.rst b/docs/features.rst new file mode 100644 index 00000000..9de882a2 --- /dev/null +++ b/docs/features.rst @@ -0,0 +1,100 @@ +Features +======== + +What can you do with CamTools? +------------------------------ + +1. Plot cameras +^^^^^^^^^^^^^^^ + +Useful for debugging 3D reconstruction and NeRFs! + +.. code-block:: python + + import camtools as ct + import open3d as o3d + cameras = ct.camera.create_camera_frustums(Ks, Ts) + o3d.visualization.draw_geometries([cameras]) + +.. raw:: html + +

+ +

+ +2. Convert camera parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + pose = ct.convert.T_to_pose(T) # Convert T to pose + T = ct.convert.pose_to_T(pose) # Convert pose to T + R, t = ct.convert.T_to_R_t(T) # Convert T to R and t + C = ct.convert.pose_to_C(pose) # Convert pose to camera center + K, T = ct.convert.P_to_K_T(P) # Decompose projection matrix P to K and T + # And more... + +3. Projection and ray casting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + # Project 3D points to pixels. + pixels = ct.project.points_to_pixel(points, K, T) + + # Back-project depth image to 3D points. + points = ct.project.im_depth_to_points(im_depth, K, T) + + # Ray cast a triangle mesh to depth image given the camera parameters. + im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) + + # And more... + +4. Image and depth I/O +^^^^^^^^^^^^^^^^^^^^^^ + +Strict type checks and range checks are enforced. The image and depth I/O +APIs are specifically designed to solve the following pain points: + +- Is my image of type ``float32`` or ``uint8``? +- Does it have range ``[0, 1]`` or ``[0, 255]``? +- Is it RGB or BGR? +- Does my image have an alpha channel? +- When saving depth image as integer-based ``.png``, is it correctly scaled? + +.. code-block:: python + + ct.io.imread() + ct.io.imwrite() + ct.io.imread_detph() + ct.io.imwrite_depth() + +5. Command-line tools +^^^^^^^^^^^^^^^^^^^^^ + +The ``ct`` command runs in terminal: + +.. code-block:: bash + + # Crop image boarders. + ct crop-boarders *.png --pad_pixel 10 --skip_cropped --same_crop + + # Draw synchronized bounding boxes interactively. + ct draw-bboxes path/to/a.png path/to/b.png + + # For more command-line tools. + ct --help + +.. raw:: html + +

+ +

+ +6. And more +^^^^^^^^^^^ + +- Solve line intersections +- COLMAP tools +- Points normalization +- And more... diff --git a/docs/index.rst b/docs/index.rst index 5b40a470..39b53d99 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,13 +1,37 @@ -Welcome to CamTools's documentation! -================================ +Welcome to CamTools +=================== -.. include:: ../README.md - :parser: myst_parser.sphinx_ +.. raw:: html + +

+ + + CamTools Logo + +

+ +CamTools is a collection of tools for handling cameras in computer vision. It +can be used for plotting, converting, projecting, ray casting, and doing more +with camera parameters. It follows the standard camera coordinate system with +clear and easy-to-use APIs. + +.. raw:: html + +

+ + + Camera Coordinates + +

.. toctree:: :maxdepth: 2 :caption: Contents: + features + installation + coordinates + contributing api/modules Indices and tables diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 00000000..b4c9f44f --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,41 @@ +Installation +============ + +Quick Installation +----------------- + +To install CamTools, simply do: + +.. code-block:: bash + + pip install camtools + +Installation from Source +----------------------- + +Alternatively, you can install CamTools from source with one of the following +methods: + +.. code-block:: bash + + git clone https://github.com/yxlao/camtools.git + cd camtools + + # Installation mode, if you want to use camtools only. + pip install . + + # Editable mode, if you want to modify camtools on the fly. + pip install -e . + + # Editable mode and dev dependencies. + pip install -e .[dev] + + # Help VSCode resolve imports when installed with editable mode. + # https://stackoverflow.com/a/76897706/1255535 + pip install -e .[dev] --config-settings editable_mode=strict + + # Enable torch-related features (e.g. computing image metrics) + pip install camtools[torch] + + # Enable torch-related features in editable mode + pip install -e .[torch] From 41d0bd6d558f596faba6b7bf3c5841a886e2801f Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:13:44 +0800 Subject: [PATCH 06/59] style(docs): fix section underline lengths for consistency across documentation files --- docs/contributing.rst | 6 +++--- docs/coordinates.rst | 24 ++++++++++++------------ docs/installation.rst | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 4e961054..28a3dfa1 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -1,8 +1,8 @@ Contributing -=========== +============ Contributing Guidelines ----------------------- +----------------------- - Follow `Angular's commit message convention `_ for PRs. - This applies to PR's title and ultimately the commit messages in ``main``. @@ -11,7 +11,7 @@ Contributing Guidelines - Format your code with `black `_. This will be enforced by the CI. Building Documentation ---------------------- +---------------------- To build and view the documentation locally: diff --git a/docs/coordinates.rst b/docs/coordinates.rst index e07f85cf..cd0f53d8 100644 --- a/docs/coordinates.rst +++ b/docs/coordinates.rst @@ -31,7 +31,7 @@ We follow the standard OpenCV-style camera coordinate system as illustrated at the beginning of the documentation. Camera Coordinate ----------------- +----------------- Right-handed, with :math:`Z` pointing away from the camera towards the view direction and :math:`Y` axis pointing down. Note that the OpenCV convention (camtools' default) @@ -47,7 +47,7 @@ use the conversion functions: - ``ct.convert.pose_opengl_to_opencv()`` Image Coordinate ---------------- +---------------- Starts from the top-left corner of the image, with :math:`x` pointing right (corresponding to the image width) and :math:`y` pointing down (corresponding to @@ -60,10 +60,10 @@ and the 1st dimension is the width (i.e., :math:`x`). That is: - :math:`y` <=> ``v`` <=> height <=> row <=> the 0th dimension Matrix Definitions ------------------ +------------------ Camera Intrinsic Matrix (K) -^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``K`` is a ``(3, 3)`` camera intrinsic matrix: @@ -74,7 +74,7 @@ Camera Intrinsic Matrix (K) [ 0, 0, 1]] Camera Extrinsic Matrix (T or W2C) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``T`` is a ``(4, 4)`` camera extrinsic matrix: @@ -93,7 +93,7 @@ Camera Extrinsic Matrix (T or W2C) camera coordinate. Rotation Matrix (R) -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^ ``R`` is a ``(3, 3)`` rotation matrix: @@ -108,7 +108,7 @@ Rotation Matrix (R) vector. Translation Vector (t) -^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ ``t`` is a ``(3,)`` translation vector: @@ -119,7 +119,7 @@ Translation Vector (t) - ``t``'s shape is ``(3,)``, not ``(3, 1)``. Camera Pose Matrix (pose or C2W) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``pose`` is a ``(4, 4)`` camera pose matrix. It is the inverse of ``T``. @@ -128,7 +128,7 @@ Camera Pose Matrix (pose or C2W) - ``pose`` is the inverse of ``T``, i.e., ``pose == np.linalg.inv(T)``. Camera Center (C) -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^ ``C`` is the camera center: @@ -141,7 +141,7 @@ Camera Center (C) vector of ``pose``. Projection Matrix (P) -^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^ ``P`` is a ``(3, 4)`` camera projection matrix: @@ -151,8 +151,8 @@ Projection Matrix (P) .. code-block:: python - # P = K @ [R | t] - P = K @ np.hstack([R, t[:, None]]) + # P = K @ [R | t] + P = K @ np.hstack([R, t[:, None]]) - ``P``'s shape is ``(3, 4)``, not ``(4, 4)``. - It is possible to decompose ``P`` into intrinsic and extrinsic matrices by QR diff --git a/docs/installation.rst b/docs/installation.rst index b4c9f44f..1695a722 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -2,7 +2,7 @@ Installation ============ Quick Installation ------------------ +------------------ To install CamTools, simply do: @@ -11,7 +11,7 @@ To install CamTools, simply do: pip install camtools Installation from Source ------------------------ +------------------------ Alternatively, you can install CamTools from source with one of the following methods: From 51ac2207edbe698d527f4793b2950522e6da5647 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:22:54 +0800 Subject: [PATCH 07/59] docs(camtools): improve docstrings for clarity and consistency across modules --- camtools/image.py | 171 +++++++++++++++++++++----------------------- camtools/io.py | 76 ++++++++------------ camtools/metric.py | 34 ++++----- camtools/raycast.py | 162 ++++++++++++++++++++++------------------- camtools/render.py | 153 ++++++++++++++++++--------------------- camtools/sanity.py | 15 ++-- camtools/util.py | 40 +++++------ 7 files changed, 311 insertions(+), 340 deletions(-) diff --git a/camtools/image.py b/camtools/image.py index 3da1766e..1a55a5d0 100644 --- a/camtools/image.py +++ b/camtools/image.py @@ -204,27 +204,20 @@ def apply_croppings_paddings( Apply cropping and padding to a list of RGB images. Args: - src_ims: list of images, float32. - croppings: list of 4-tuples - [ - (crop_t, crop_b, crop_l, crop_r), - (crop_t, crop_b, crop_l, crop_r), - ... - ] - paddings: list of 4-tuples - [ - (pad_t, pad_b, pad_l, pad_r), - (pad_t, pad_b, pad_l, pad_r), - ... - ] + src_ims (List[Float[np.ndarray, "h w 3"]]): List of source images as float32 + arrays with shape (height, width, 3). + croppings (List[Tuple[int, int, int, int]]): List of cropping tuples in the + format [(crop_t, crop_b, crop_l, crop_r), ...]. + paddings (List[Tuple[int, int, int, int]]): List of padding tuples in the + format [(pad_t, pad_b, pad_l, pad_r), ...]. Returns: - List[Float[np.ndarray, "h_cropped w_cropped 3"]]: List of cropped and padded images - as float32 arrays with shape (height_cropped, width_cropped, 3). + List[Float[np.ndarray, "h_cropped w_cropped 3"]]: List of cropped and padded + images as float32 arrays with shape (height_cropped, width_cropped, 3). Raises: - ValueError: If the number of croppings or paddings doesn't match the number of images, - or if any cropping tuple has invalid length. + ValueError: If the number of croppings or paddings doesn't match the number + of images, or if any cropping tuple has invalid length. """ num_ims = len(src_ims) if not len(croppings) == num_ims: @@ -249,26 +242,23 @@ def get_post_croppings_paddings_shapes( paddings: List[Tuple[int, int, int, int]], ) -> List[Tuple[int, int, int]]: """ - Compute the shapes of images after applying cropping and padding. + Calculate the shapes of images after applying cropping and padding. Args: - src_shapes: list of source image shapes. - croppings: list of 4-tuples - [ - (crop_t, crop_b, crop_l, crop_r), - (crop_t, crop_b, crop_l, crop_r), - ... - ] - paddings: list of 4-tuples - [ - (pad_t, pad_b, pad_l, pad_r), - (pad_t, pad_b, pad_l, pad_r), - ... - ] + src_shapes (List[Tuple[int, int, int]]): List of source image shapes in + (height, width, channels) format. + croppings (List[Tuple[int, int, int, int]]): List of cropping tuples in the + format [(crop_t, crop_b, crop_l, crop_r), ...]. + paddings (List[Tuple[int, int, int, int]]): List of padding tuples in the + format [(pad_t, pad_b, pad_l, pad_r), ...]. Returns: - List[Tuple[int, int, int]]: List of resulting image shapes after cropping and padding - in the format (height_cropped, width_cropped, channels). + List[Tuple[int, int, int]]: List of output shapes in (height, width, channels) + format after applying cropping and padding. + + Raises: + ValueError: If the number of croppings or paddings doesn't match the number + of source shapes. """ dst_shapes = [] for src_shape, cropping, padding in zip(src_shapes, croppings, paddings): @@ -338,31 +328,26 @@ def ndc_coords_to_pixels( align_corners: bool = False, ) -> Float[np.ndarray, "n 2"]: """ - Convert Normalized Device Coordinates (NDC) to pixel coordinates. + Convert normalized device coordinates (NDC) to pixel coordinates. Args: - ndc_coords: NDC coordinates. Each row represents (x, y) or (c, r). - Most values shall be in [-1, 1], where (-1, -1) is the top left - corner and (1, 1) is the bottom right corner. - im_size_wh: Image size (width, height). - align_corners: Determines how NDC coordinates map to pixel coordinates: - - If True: -1 and 1 are aligned to the center of the corner pixels - - If False: -1 and 1 are aligned to the corner of the corner pixels - In general image interpolation: - - When align_corners=True: src and dst images are aligned by the center - point of their corner pixels - - When align_corners=False: src and dst images are aligned by the corner - points of the corner pixels - The NDC space does not have a "pixels size", so we precisely align the - extrema -1 and 1 to either the center or corner of the corner pixels. + ndc_coords (Float[np.ndarray, "n 2"]): Input coordinates in NDC space + (-1 to 1). Shape is (n, 2) where n is the number of points. + im_size_wh (Tuple[int, int]): Image size in (width, height) format. + align_corners (bool): If True, extreme values (-1 and 1) are considered to + refer to the center points of the border pixels. If False, extreme + values refer to the outer edges of the border pixels. Default: False. Returns: - Float[np.ndarray, "n 2"]: Pixel coordinates as a float array with shape - (num_points, 2). Out-of-bound values are not corrected. + Float[np.ndarray, "n 2"]: Pixel coordinates with shape (n, 2). The + coordinates are in (x, y) format, where x is the horizontal coordinate + and y is the vertical coordinate. Notes: - This function is commonly used in computer graphics to map normalized - coordinates to specific pixel locations in an image. + - NDC space has (-1, -1) at the top-left corner and (1, 1) at the + bottom-right corner. + - Pixel space has (0, 0) at the top-left corner and (w-1, h-1) at the + bottom-right corner. """ sanity.assert_shape(ndc_coords, (None, 2), name="ndc_coords") w, h = im_size_wh[:2] @@ -510,32 +495,32 @@ def resize( UInt16[np.ndarray, "h_ w_ 3"], ]: """ - Resize an image to a specified width and height, optionally maintaining aspect ratio. + Resize an image to a specified size. Args: im (Union[Float[np.ndarray, "h w"], Float[np.ndarray, "h w 3"], - UInt8[np.ndarray, "h w"], UInt8[np.ndarray, "h w 3"], - UInt16[np.ndarray, "h w"], UInt16[np.ndarray, "h w 3"]]): - Input image as a numpy array with shape (height, width) or (height, width, 3). - Supported dtypes: uint8, uint16, float32, float64. - shape_wh (Tuple[int, int]): Target size as (width, height) in pixels. - aspect_ratio_fill (Optional[Union[float, Tuple[float, float, float], np.ndarray]]): - Value(s) to use for padding when maintaining aspect ratio. If None, image is - directly resized without maintaining aspect ratio. If provided, must match - the number of channels in the input image. - interpolation (int): OpenCV interpolation method (e.g., cv2.INTER_LINEAR). + UInt8[np.ndarray, "h w"], UInt8[np.ndarray, "h w 3"], + UInt16[np.ndarray, "h w"], UInt16[np.ndarray, "h w 3"]]): + Input image to resize. Can be single-channel or RGB, with float32, + uint8, or uint16 data type. + shape_wh (Tuple[int, int]): Target size in (width, height) format. + aspect_ratio_fill (Optional[Union[float, Tuple[float, float, float], + np.ndarray]]): Fill value for padding when preserving aspect ratio. + For float32 images, use values in [0, 1]. For uint8 images, use + values in [0, 255]. For uint16 images, use values in [0, 65535]. + Default: None. + interpolation (int): OpenCV interpolation method. Default: cv2.INTER_LINEAR. Returns: Union[Float[np.ndarray, "h_ w_"], Float[np.ndarray, "h_ w_ 3"], - UInt8[np.ndarray, "h_ w_"], UInt8[np.ndarray, "h_ w_ 3"], - UInt16[np.ndarray, "h_ w_"], UInt16[np.ndarray, "h_ w_ 3"]]: - Resized image with the same dtype as input. Shape will be (height, width) - or (height, width, 3) depending on input. + UInt8[np.ndarray, "h_ w_"], UInt8[np.ndarray, "h_ w_ 3"], + UInt16[np.ndarray, "h_ w_"], UInt16[np.ndarray, "h_ w_ 3"]]: + Resized image with the same data type as input. Notes: - - When maintaining aspect ratio, the image is resized to fit within the target - dimensions and padded with aspect_ratio_fill values as needed. - - OpenCV uses (width, height) for image size while numpy uses (height, width). + - If aspect_ratio_fill is None, the image is stretched to fit the target size. + - If aspect_ratio_fill is provided, the image is resized preserving aspect + ratio and padded with the fill value. """ # Sanity: dtype. dtype = im.dtype @@ -690,30 +675,36 @@ def make_corres_image( sample_ratio: Optional[float] = None, ) -> Float[np.ndarray, "h 2*w 3"]: """ - Make correspondence image. + Create a correspondence visualization image by combining two images side by side. Args: - im_src: (h, w, 3) float image, range 0-1. - im_dst: (h, w, 3) float image, range 0-1. - src_pixels: (n, 2) int array, each row represents (x, y) or (c, r). - dst_pixels: (n, 2) int array, each row represents (x, y) or (c, r). - confidences: (n,) float array, confidence of each corres, range [0, 1]. - texts: List of texts to draw on the top-left of the image. - point_color: RGB or RGBA color of the point, float, range 0-1. - - If point_color == None: - points will never be drawn. - - If point_color != None and confidences == None - point color will be determined by point_color. - - If point_color != None and confidences != None - point color will be determined by "viridis" colormap. - line_color: RGB or RGBA color of the line, float, range 0-1. - text_color: RGB color of the text, float, range 0-1. - point_size: Size of the point. - line_width: Width of the line. - sample_ratio: Float value from 0-1. If None, all points are drawn. + im_src (Float[np.ndarray, "h w 3"]): Source image as float32 array with + shape (height, width, 3). Values should be in range [0, 1]. + im_dst (Float[np.ndarray, "h w 3"]): Destination image as float32 array with + shape (height, width, 3). Values should be in range [0, 1]. + src_pixels (Int[np.ndarray, "n 2"]): Source pixel coordinates as int array + with shape (n, 2) in (x, y) format. + dst_pixels (Int[np.ndarray, "n 2"]): Destination pixel coordinates as int + array with shape (n, 2) in (x, y) format. + confidences (Optional[Float[np.ndarray, "n"]]): Confidence scores for each + correspondence. Values should be in range [0, 1]. Default: None. + texts (Optional[List[str]]): Text labels for each correspondence point. + Default: None. + point_color (Optional[Tuple[float, ...]]): Color for correspondence points + in RGBA format. Default: (0, 1, 0, 1.0). + line_color (Optional[Tuple[float, ...]]): Color for correspondence lines + in RGBA format. Default: (0, 0, 1, 0.75). + text_color (Tuple[float, float, float]): Color for text labels in RGB + format. Default: (1, 1, 1). + point_size (int): Size of correspondence points in pixels. Default: 1. + line_width (int): Width of correspondence lines in pixels. Default: 1. + sample_ratio (Optional[float]): If provided, randomly sample this ratio of + correspondences to display. Default: None. Returns: - Correspondence image with shape (h, 2*w, 3). + Float[np.ndarray, "h 2*w 3"]: Visualization image as float32 array with + shape (height, 2*width, 3), showing source and destination images side + by side with correspondence lines. """ assert im_src.shape == im_dst.shape assert im_src.ndim == 3 and im_src.shape[2] == 3 diff --git a/camtools/io.py b/camtools/io.py index 1c028d19..93fc68f9 100644 --- a/camtools/io.py +++ b/camtools/io.py @@ -117,12 +117,10 @@ def imwrite_depth( Args: im_path (Union[str, Path]): Output file path. Must have .png extension. Parent directories will be created automatically if they don't exist. - im (Float[np.ndarray]): Depth map as a 2D numpy array. Must be: - Shape: (height, width) - Data type: float32 or float64 - Values: Depth values in meters (or other consistent units) - depth_scale (float, optional): Scaling factor to apply before converting to uint16. Defaults to 1000.0. This determines the precision of stored depth values. For example: @@ -137,16 +135,16 @@ def imwrite_depth( - When reading the depth map with imread_depth(), use the same depth_scale to recover the original depth values - The user is responsible for defining what is invalid depth. E.g., - invalid depth can represented as np.nan, np.inf, 0, -1, etc. This - function simply multiplies the depth by depth_scale can convert to - uint16. For instance, with depth_scale = 1000, - - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] - - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] - - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] - - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] - ^ - Best practice. + The user is responsible for defining what is invalid depth. For example, + invalid depth can be represented as np.nan, np.inf, 0, -1, etc. This + function simply multiplies the depth by depth_scale and converts to + uint16. For instance, with depth_scale = 1000: + + - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] + - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] + - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] + - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] + Note that -1 is converted to 64536 / 1000 = 64.536 meters, therefore, it is important to clip depth with min_depth and max_depth. The best practice is to use 0 as invalid depth. @@ -317,57 +315,39 @@ def imread_depth( depth_scale: float = 1000.0, ) -> Float[np.ndarray, "h w"]: """ - Read and normalize a 16-bit depth map from a PNG file. + Read a depth map from a 16-bit PNG file and convert to float. This function handles depth map reading by: - - Loading 16-bit depth values from PNG - - Converting to float32 - - Applying depth scale normalization + - Loading 16-bit PNG data + - Converting to float32 format + - Applying depth scale to recover original values - Validating input data Args: - im_path (Union[str, Path]): Path to the depth map file. Must be a 16-bit PNG. - - depth_scale (float, optional): Scale factor to divide depth values by. - Defaults to 1000.0. This should match the scale used when writing - the depth map with imwrite_depth(). + im_path (Union[str, Path]): Path to the depth map PNG file. + depth_scale (float, optional): Scaling factor to convert from uint16 to + float. Defaults to 1000.0. Must match the scale used when saving + the depth map. For example: + - depth_scale=1000: 1mm precision + - depth_scale=100: 1cm precision + - depth_scale=1: 1m precision Returns: - Float[np.ndarray]: Depth map as a 2D numpy array with: - - Data type: float32 - - Shape: (height, width) - - Values: Depth values in meters (or other consistent units) + Float[np.ndarray, "h w"]: Depth map as a float32 array with shape + (height, width). Values are in the original units (typically meters). Notes: - - Invalid depth values (0, 65535) are preserved in the output - - The depth_scale should match the one used during writing + - Zero values in the PNG file are preserved as zeros in the output + - Non-zero values are divided by depth_scale to recover original depths + - Use the same depth_scale value that was used with imwrite_depth() - For best results, use 0 to represent invalid depth values - - Depth values are not automatically clipped - the user should - handle clipping based on their specific requirements - - The user is responsible for defining what is invalid depth. E.g., - invalid depth can represented as np.nan, np.inf, 0, -1, etc. This - function simply multiplies the depth by depth_scale can convert to - uint16. For instance, with depth_scale = 1000, - - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] - - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] - - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] - - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] - ^ - Best practice. - Note that -1 is converted to 64536 / 1000 = 64.536 meters, therefore, - it is important to clip depth with min_depth and max_depth. The best - practice is to use 0 as invalid depth. Examples: - >>> # Read depth map with 1mm precision + >>> # Read depth map saved with 1mm precision >>> depth = imread_depth('depth.png', depth_scale=1000) - >>> # Read depth map with 1cm precision + >>> # Read depth map saved with 1cm precision >>> depth = imread_depth('depth.png', depth_scale=100) - - >>> # Read depth map with 1m precision - >>> depth = imread_depth('depth.png', depth_scale=1) """ im_path = Path(im_path) assert is_png_path(im_path), f"{im_path} is not a PNG file." diff --git a/camtools/metric.py b/camtools/metric.py index 7b85349a..ab8ae5e2 100644 --- a/camtools/metric.py +++ b/camtools/metric.py @@ -204,25 +204,25 @@ def load_im_pd_im_gt_im_mask_for_eval( Load prediction, ground truth, and mask images for image metric evaluation. Args: - im_pd_path: Path to the rendered image. - im_gt_path: Path to the ground truth RGB or RGBA image. - im_mask_path: Path to the mask image. The mask will be resized to the - same (h, w) as im_gt. - alpha_mode: The mode on how to handle the alpha channel. Currently only - "white" is supported. - - "white": If im_gt contains alpha channel, im_gt will be converted - to RGB, the background will be rendered as white, the - alpha channel will be then ignored. - - "keep" : If im_gt contains alpha channel, the alpha channel will - be used as mask. This mask can be overwritten by - im_mask_path if im_mask_path is not None. - (This option is not implemented yet.) + im_pd_path (Union[str, Path]): Path to the rendered image. + im_gt_path (Union[str, Path]): Path to the ground truth RGB or RGBA image. + im_mask_path (Optional[Union[str, Path]]): Path to the mask image. The mask + will be resized to the same (h, w) as im_gt. + alpha_mode (str): The mode for handling alpha channels. Currently only + "white" is supported. Options: + - "white": If im_gt contains alpha channel, it will be converted to RGB + with white background, and the alpha channel will be ignored. + - "keep": If im_gt contains alpha channel, it will be used as mask. + This mask can be overwritten by im_mask_path if provided. + (This option is not implemented yet.) Returns: - im_pd: (h, w, 3), float32, value in [0, 1]. - im_gt: (h, w, 3), float32, value in [0, 1]. - im_mask: (h, w), float32, value only 0 or 1. Even if im_mask_path is - None, im_mask will be returned as all 1s. + Tuple[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"], + Float[np.ndarray, "h w"]]: A tuple containing: + - im_pd: Prediction image with shape (h, w, 3), float32, range [0, 1] + - im_gt: Ground truth image with shape (h, w, 3), float32, range [0, 1] + - im_mask: Binary mask with shape (h, w), float32, values in {0, 1}. + If im_mask_path is None, all values will be 1. """ if alpha_mode != "white": raise NotImplementedError('Currently only alpha_mode="white" is supported.') diff --git a/camtools/raycast.py b/camtools/raycast.py index 6967f22e..6223023c 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -83,28 +83,31 @@ def mesh_to_im_distance( - P is the intersection point on the mesh surface - ||·|| denotes the Euclidean norm - Example usage: - # Create distance image for a 640x480 view - distance_image = ct.raycast.mesh_to_im_distance(mesh, K, T, 480, 640) - # Visualize distances - plt.imshow(distance_image) - plt.colorbar() - Args: - mesh: Open3D TriangleMesh to be ray casted. - K: (3, 3) camera intrinsic matrix. - T: (4, 4) camera extrinsic matrix (world-to-camera transformation). - height: Image height in pixels. - width: Image width in pixels. + mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. + T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera + transformation). + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: - (height, width) float32 array representing the distance image. Each - pixel contains the distance from the camera center to the mesh surface. - Invalid distances (no intersection) are set to np.inf. - - Note: For casting the same mesh with multiple camera views, use - mesh_to_im_distances for better efficiency as it avoids repeated scene - setup. + Float[np.ndarray, "h w"]: Distance image as a float32 array with shape + (height, width). Each pixel contains the distance from the camera + center to the mesh surface. Invalid distances (no intersection) are + set to np.inf. + + Example: + >>> # Create distance image for a 640x480 view + >>> distance_image = ct.raycast.mesh_to_im_distance(mesh, K, T, 480, 640) + >>> # Visualize distances + >>> plt.imshow(distance_image) + >>> plt.colorbar() + + Note: + For casting the same mesh with multiple camera views, use + mesh_to_im_distances for better efficiency as it avoids repeated scene + setup. """ im_distances = mesh_to_im_distances( mesh=mesh, @@ -137,28 +140,31 @@ def mesh_to_im_distances( - P_i is the intersection point on the mesh surface for view i - ||·|| denotes the Euclidean norm - Example usage: - # Create distance images for 3 different views - distances = ct.raycast.mesh_to_im_distances(mesh, Ks, Ts, 480, 640) - # Visualize first view's distances - plt.imshow(distances[0]) - plt.colorbar() - Args: - mesh: Open3D TriangleMesh to be ray casted. - Ks: (N, 3, 3) array of camera intrinsic matrices for N views. - Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera - transformations) for N views. - height: Image height in pixels. - width: Image width in pixels. + mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for + N views. + Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices + (world-to-camera transformations) for N views. + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: - (N, height, width) float32 array representing the distance images. Each - image contains the distances from the corresponding camera center to the - mesh surface. Invalid distances (no intersection) are set to np.inf. - - Note: This function is more efficient than calling mesh_to_im_distance - multiple times as it only sets up the ray casting scene once. + Float[np.ndarray, "n h w"]: Distance images as a float32 array with shape + (N, height, width). Each image contains the distances from the + corresponding camera center to the mesh surface. Invalid distances + (no intersection) are set to np.inf. + + Example: + >>> # Create distance images for 3 different views + >>> distances = ct.raycast.mesh_to_im_distances(mesh, Ks, Ts, 480, 640) + >>> # Visualize first view's distances + >>> plt.imshow(distances[0]) + >>> plt.colorbar() + + Note: + This function is more efficient than calling mesh_to_im_distance multiple + times as it only sets up the ray casting scene once. """ for K in Ks: sanity.assert_K(K) @@ -207,27 +213,30 @@ def mesh_to_im_depth( - f is the focal length from the intrinsic matrix K - (u, v) are the pixel coordinates in the camera plane - Example usage: - # Create depth image for a 640x480 view - depth_image = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) - # Visualize depths - plt.imshow(depth_image) - plt.colorbar() - Args: - mesh: Open3D TriangleMesh to be ray casted. - K: (3, 3) camera intrinsic matrix. - T: (4, 4) camera extrinsic matrix (world-to-camera transformation). - height: Image height in pixels. - width: Image width in pixels. + mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. + T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera + transformation). + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: - (height, width) float32 array representing the depth image. Each - pixel contains the z-coordinate of the mesh surface in camera space. - Invalid depths (no intersection) are set to np.inf. - - Note: This function internally uses mesh_to_im_distance and converts the - distances to depths using the camera intrinsic parameters. + Float[np.ndarray, "h w"]: Depth image as a float32 array with shape + (height, width). Each pixel contains the z-coordinate of the mesh + surface in camera space. Invalid depths (no intersection) are set + to np.inf. + + Example: + >>> # Create depth image for a 640x480 view + >>> depth_image = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) + >>> # Visualize depths + >>> plt.imshow(depth_image) + >>> plt.colorbar() + + Note: + This function internally uses mesh_to_im_distance and converts the + distances to depths using the camera intrinsic parameters. """ im_distance = mesh_to_im_distance(mesh, K, T, height, width) im_depth = convert.im_distance_to_im_depth(im_distance, K) @@ -252,28 +261,31 @@ def mesh_to_im_depths( - f is the focal length from the intrinsic matrix K - (u, v) are the pixel coordinates in the camera plane - Example usage: - # Create depth images for 3 different views - depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640) - # Visualize first view's depths - plt.imshow(depths[0]) - plt.colorbar() - Args: - mesh: Open3D TriangleMesh to be ray casted. - Ks: (N, 3, 3) array of camera intrinsic matrices for N views. - Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera - transformations) for N views. - height: Image height in pixels. - width: Image width in pixels. + mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for + N views. + Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices + (world-to-camera transformations) for N views. + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: - (N, height, width) float32 array representing the depth images. Each - image contains the z-coordinates of the mesh surface in camera space. - Invalid depths (no intersection) are set to np.inf. - - Note: This function internally uses mesh_to_im_distances and converts the - distances to depths using the camera intrinsic parameters. + Float[np.ndarray, "n h w"]: Depth images as a float32 array with shape + (N, height, width). Each image contains the z-coordinates of the mesh + surface in the corresponding camera space. Invalid depths (no + intersection) are set to np.inf. + + Example: + >>> # Create depth images for 3 different views + >>> depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640) + >>> # Visualize first view's depths + >>> plt.imshow(depths[0]) + >>> plt.colorbar() + + Note: + This function internally uses mesh_to_im_distances and converts the + distances to depths using the camera intrinsic parameters. """ im_distances = mesh_to_im_distances(mesh, Ks, Ts, height, width) im_depths = np.stack( diff --git a/camtools/render.py b/camtools/render.py index de01f3b0..523ca7c8 100644 --- a/camtools/render.py +++ b/camtools/render.py @@ -22,7 +22,6 @@ def render_geometries( ) -> Float[np.ndarray, "h w 3"]: """ Render Open3D geometries to an image using the specified camera parameters. - This function may require a display. The rendering follows the standard pinhole camera model: λ[x, y, 1]^T = K @ [R | t] @ [X, Y, Z, 1]^T @@ -33,61 +32,58 @@ def render_geometries( - [x, y, 1]^T is the projected homogeneous 2D point in pixel coordinates - λ is the depth value - Example usage: - # Create some geometries - mesh = o3d.geometry.TriangleMesh.create_box() - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(np.random.rand(100, 3)) - - # Render with default camera - image = render_geometries([mesh, pcd]) - - # Render with specific camera parameters - K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) - T = np.eye(4) - depth_image = render_geometries([mesh], K=K, T=T, to_depth=True) - Args: - geometries: List of Open3D geometries to render. Supported types include: + geometries (List[o3d.geometry.Geometry3D]): List of Open3D geometries to + render. Supported types include: - TriangleMesh - PointCloud - LineSet - K: (3, 3) camera intrinsic matrix. If None, uses Open3D's default camera - inferred from the geometries. Must be provided if T is provided. - The intrinsic matrix follows the format: + K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, + uses Open3D's default camera inferred from the geometries. Must be + provided if T is provided. Format: [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] - where: - - fx, fy: focal lengths in pixels - - cx, cy: principal point coordinates - T: (4, 4) camera extrinsic matrix (world-to-camera transformation). - If None, uses Open3D's default camera inferred from the geometries. - Must be provided if K is provided. The extrinsic matrix follows the - format: + where fx, fy are focal lengths and cx, cy are principal points. + T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix + (world-to-camera transformation). If None, uses Open3D's default + camera. Must be provided if K is provided. Format: [[R | t], [0 | 1]] - where: - - R: (3, 3) rotation matrix - - t: (3,) translation vector - view_status_str: JSON string containing viewing camera parameters from - o3d.visualization.Visualizer.get_view_status(). This does not - include window size or point size. - height: Height of the output image in pixels. - width: Width of the output image in pixels. - point_size: Size of points for PointCloud objects, in pixels. - line_radius: Radius of lines for LineSet objects, in world units. When - set, LineSets are converted to cylinder meshes with this radius. - Unlike point_size, this is in world metric space, not pixel space. - to_depth: If True, renders a depth image instead of RGB. Invalid depths - are set to 0. - visible: If True, shows the rendering window. + where R is a 3x3 rotation matrix and t is a 3D translation vector. + view_status_str (Optional[str]): JSON string containing viewing camera + parameters from o3d.visualization.Visualizer.get_view_status(). + Does not include window size or point size. + height (int): Height of the output image in pixels. Default: 720. + width (int): Width of the output image in pixels. Default: 1280. + point_size (float): Size of points for PointCloud objects, in pixels. + Default: 1.0. + line_radius (Optional[float]): Radius of lines for LineSet objects, in + world units. When set, LineSets are converted to cylinder meshes. + Unlike point_size, this is in world metric space. Default: None. + to_depth (bool): If True, renders a depth image instead of RGB. Invalid + depths are set to 0. Default: False. + visible (bool): If True, shows the rendering window. Default: False. Returns: - If to_depth is False: - (H, W, 3) float32 RGB image array with values in [0, 1] - If to_depth is True: - (H, W) float32 depth image array with depth values in world units + Float[np.ndarray, "h w 3"]: If to_depth is False, returns an RGB image + array with shape (height, width, 3) and values in [0, 1]. If + to_depth is True, returns a depth image array with shape + (height, width) and depth values in world units. + + Example: + >>> # Create some geometries + >>> mesh = o3d.geometry.TriangleMesh.create_box() + >>> pcd = o3d.geometry.PointCloud() + >>> pcd.points = o3d.utility.Vector3dVector(np.random.rand(100, 3)) + >>> + >>> # Render with default camera + >>> image = render_geometries([mesh, pcd]) + >>> + >>> # Render with specific camera parameters + >>> K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) + >>> T = np.eye(4) + >>> depth_image = render_geometries([mesh], K=K, T=T, to_depth=True) """ if not isinstance(geometries, list): @@ -165,8 +161,6 @@ def get_render_view_status_str( ) -> str: """ Get a view status string containing camera parameters from Open3D visualizer. - This is useful for rendering multiple geometries with consistent camera views. - This function may require a display. The view status string contains camera parameters in JSON format, including: - Camera position and orientation @@ -174,53 +168,46 @@ def get_render_view_status_str( - Zoom level - Other view control settings - Example usage: - # Get view status for default camera - view_str = get_render_view_status_str([mesh, pcd]) - - # Get view status for specific camera - K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) - T = np.eye(4) - view_str = get_render_view_status_str([mesh], K=K, T=T) - - # Use view status for consistent rendering - image1 = render_geometries([mesh], view_status_str=view_str) - image2 = render_geometries([pcd], view_status_str=view_str) - Args: - geometries: List of Open3D geometries to set up the view. Supported types: + geometries (List[o3d.geometry.Geometry3D]): List of Open3D geometries to + set up the view. Supported types: - TriangleMesh - PointCloud - LineSet - K: (3, 3) camera intrinsic matrix. If None, uses Open3D's default camera - inferred from the geometries. Must be provided if T is provided. - The intrinsic matrix follows the format: + K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, + uses Open3D's default camera inferred from the geometries. Must be + provided if T is provided. Format: [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] - where: - - fx, fy: focal lengths in pixels - - cx, cy: principal point coordinates - T: (4, 4) camera extrinsic matrix (world-to-camera transformation). - If None, uses Open3D's default camera inferred from the geometries. - Must be provided if K is provided. The extrinsic matrix follows the - format: + where fx, fy are focal lengths and cx, cy are principal points. + T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix + (world-to-camera transformation). If None, uses Open3D's default + camera. Must be provided if K is provided. Format: [[R | t], [0 | 1]] - where: - - R: (3, 3) rotation matrix - - t: (3,) translation vector - height: Height of the view window in pixels. - width: Width of the view window in pixels. + where R is a 3x3 rotation matrix and t is a 3D translation vector. + height (int): Height of the view window in pixels. Default: 720. + width (int): Width of the view window in pixels. Default: 1280. Returns: - JSON string containing camera view parameters from - o3d.visualization.Visualizer.get_view_status(). This includes: - - Camera position and orientation - - Field of view - - Zoom level - - Other view control settings - Note: Does not include window size or point size. + str: JSON string containing camera view parameters from + o3d.visualization.Visualizer.get_view_status(). This includes camera + position, orientation, field of view, zoom level, and other view + control settings. Does not include window size or point size. + + Example: + >>> # Get view status for default camera + >>> view_str = get_render_view_status_str([mesh, pcd]) + >>> + >>> # Get view status for specific camera + >>> K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) + >>> T = np.eye(4) + >>> view_str = get_render_view_status_str([mesh], K=K, T=T) + >>> + >>> # Use view status for consistent rendering + >>> image1 = render_geometries([mesh], view_status_str=view_str) + >>> image2 = render_geometries([pcd], view_status_str=view_str) """ if not isinstance(geometries, list): raise TypeError("geometries must be a list of Open3D geometries.") diff --git a/camtools/sanity.py b/camtools/sanity.py index 5d2d4ccd..9a471097 100644 --- a/camtools/sanity.py +++ b/camtools/sanity.py @@ -82,13 +82,14 @@ def assert_pose(pose: Float[np.ndarray, "4 4"]): - R is a 3x3 rotation matrix - t is a 3x1 translation vector - Bottom row must be [0, 0, 0, 1] + The pose matrix is the inverse of the extrinsic matrix T. Args: - pose: Camera pose matrix to validate + pose (Float[np.ndarray, "4 4"]): Camera pose matrix to validate. Raises: - ValueError: If pose is not a 4x4 matrix or bottom row is not [0, 0, 0, 1] + ValueError: If pose is not a 4x4 matrix or bottom row is not [0, 0, 0, 1]. """ if pose.shape != (4, 4): raise ValueError( @@ -109,12 +110,14 @@ def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): - (3, None, 3) matches any 3D array where first and last dimensions are 3 Args: - x: Array to validate - shape: Tuple of expected dimensions (can contain None for flexible dimensions) - name: Optional name of the variable for error message + x (np.ndarray): Array to validate. + shape (tuple): Tuple of expected dimensions (can contain None for flexible + dimensions). + name (Optional[str]): Optional name of the variable for error message. + Default: None. Raises: - ValueError: If array dimensions don't match the expected shape pattern + ValueError: If array dimensions don't match the expected shape pattern. """ shape_valid = True diff --git a/camtools/util.py b/camtools/util.py index a7fc13f7..7e6ca5e8 100644 --- a/camtools/util.py +++ b/camtools/util.py @@ -1,5 +1,5 @@ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed -from typing import Any, Callable, Iterable +from typing import Any, Callable, Iterable, Optional from functools import lru_cache from tqdm import tqdm @@ -64,32 +64,30 @@ def mp_loop( def query_yes_no(question, default=None): - """Ask a yes/no question via raw_input() and return their answer. + """ + Ask a yes/no question via raw_input() and return their answer. Args: - question: A string that is presented to the user. - default: The presumed answer if the user just hits . - - True: The answer is assumed to be yes. - - False: The answer is assumed to be no. - - None: The answer is required from the user. + question (str): A string that is presented to the user. + default (Optional[bool]): The presumed answer if the user just hits + . Possible values: + - True: The answer is assumed to be yes + - False: The answer is assumed to be no + - None: The answer is required from the user Returns: - Returns True for "yes" or False for "no". + bool: True for "yes" or False for "no". Examples: - ```python - if query_yes_no("Continue?", default="yes"): - print("Proceeding.") - else: - print("Aborted.") - ``` - - ```python - if not query_yes_no("Continue?", default="yes"): - print("Aborted.") - return # Or exit(0) - print("Proceeding.") - ``` + >>> if query_yes_no("Continue?", default="yes"): + ... print("Proceeding.") + ... else: + ... print("Aborted.") + + >>> if not query_yes_no("Continue?", default="yes"): + ... print("Aborted.") + ... return # Or exit(0) + ... print("Proceeding.") """ if default is None: prompt = "[y/n]" From c18bb031717290caa1a2a434d0cc4514c1caa674 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:35:24 +0800 Subject: [PATCH 08/59] docs: improve docstrings for clarity and consistency across camtools modules --- camtools/image.py | 71 +++++++++++++++++++------------ camtools/metric.py | 50 ++++++++++++++-------- camtools/raycast.py | 71 +++++++++++++++++++++---------- camtools/render.py | 42 ++++++++++++------ camtools/sanity.py | 31 +++++++++----- camtools/tools/compress_images.py | 62 ++++++++++++++++----------- 6 files changed, 210 insertions(+), 117 deletions(-) diff --git a/camtools/image.py b/camtools/image.py index 1a55a5d0..01929075 100644 --- a/camtools/image.py +++ b/camtools/image.py @@ -487,40 +487,57 @@ def resize( ] = None, interpolation: int = cv2.INTER_LINEAR, ) -> Union[ - Float[np.ndarray, "h_ w_"], - Float[np.ndarray, "h_ w_ 3"], - UInt8[np.ndarray, "h_ w_"], - UInt8[np.ndarray, "h_ w_ 3"], - UInt16[np.ndarray, "h_ w_"], - UInt16[np.ndarray, "h_ w_ 3"], + Float[np.ndarray, "h_new w_new"], + Float[np.ndarray, "h_new w_new 3"], + UInt8[np.ndarray, "h_new w_new"], + UInt8[np.ndarray, "h_new w_new 3"], + UInt16[np.ndarray, "h_new w_new"], + UInt16[np.ndarray, "h_new w_new 3"], ]: """ - Resize an image to a specified size. + Resize an image to a target size. + + The image is resized using OpenCV's resize function with the specified + interpolation method. The target size can be specified in several ways: + + 1. Provide target_height and target_width + 2. Provide target_height and keep aspect ratio + 3. Provide target_width and keep aspect ratio + 4. Provide scale_factor to scale both dimensions Args: - im (Union[Float[np.ndarray, "h w"], Float[np.ndarray, "h w 3"], - UInt8[np.ndarray, "h w"], UInt8[np.ndarray, "h w 3"], - UInt16[np.ndarray, "h w"], UInt16[np.ndarray, "h w 3"]]): - Input image to resize. Can be single-channel or RGB, with float32, - uint8, or uint16 data type. - shape_wh (Tuple[int, int]): Target size in (width, height) format. - aspect_ratio_fill (Optional[Union[float, Tuple[float, float, float], - np.ndarray]]): Fill value for padding when preserving aspect ratio. - For float32 images, use values in [0, 1]. For uint8 images, use - values in [0, 255]. For uint16 images, use values in [0, 65535]. - Default: None. + image (Float[np.ndarray, "h w c"]): Input image array with shape + (height, width, channels). + + target_height (Optional[int]): Target height in pixels. If None, height + is determined by target_width and aspect ratio. + + target_width (Optional[int]): Target width in pixels. If None, width is + determined by target_height and aspect ratio. + + scale_factor (Optional[float]): Scale factor to apply to both dimensions. + If provided, target_height and target_width are ignored. + interpolation (int): OpenCV interpolation method. Default: cv2.INTER_LINEAR. + Common options: + - cv2.INTER_NEAREST: Nearest neighbor + - cv2.INTER_LINEAR: Bilinear + - cv2.INTER_CUBIC: Bicubic + - cv2.INTER_LANCZOS4: Lanczos Returns: - Union[Float[np.ndarray, "h_ w_"], Float[np.ndarray, "h_ w_ 3"], - UInt8[np.ndarray, "h_ w_"], UInt8[np.ndarray, "h_ w_ 3"], - UInt16[np.ndarray, "h_ w_"], UInt16[np.ndarray, "h_ w_ 3"]]: - Resized image with the same data type as input. - - Notes: - - If aspect_ratio_fill is None, the image is stretched to fit the target size. - - If aspect_ratio_fill is provided, the image is resized preserving aspect - ratio and padded with the fill value. + Float[np.ndarray, "h' w' c"]: Resized image array with shape + (new_height, new_width, channels). + + Example: + >>> # Resize to specific dimensions + >>> resized = ct.image.resize(image, target_height=480, target_width=640) + >>> + >>> # Resize keeping aspect ratio + >>> resized = ct.image.resize(image, target_height=480) + >>> + >>> # Scale by factor + >>> resized = ct.image.resize(image, scale_factor=0.5) """ # Sanity: dtype. dtype = im.dtype diff --git a/camtools/metric.py b/camtools/metric.py index ab8ae5e2..7491bbf1 100644 --- a/camtools/metric.py +++ b/camtools/metric.py @@ -201,28 +201,42 @@ def load_im_pd_im_gt_im_mask_for_eval( Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w"] ]: """ - Load prediction, ground truth, and mask images for image metric evaluation. + Load predicted image, ground truth image, and mask for evaluation. + + This function loads and preprocesses images for evaluation: + + 1. Load predicted and ground truth images + 2. Convert images to float32 and normalize to [0, 1] + 3. Load or create evaluation mask + 4. Apply mask to both images + 5. Validate image shapes and types Args: - im_pd_path (Union[str, Path]): Path to the rendered image. - im_gt_path (Union[str, Path]): Path to the ground truth RGB or RGBA image. - im_mask_path (Optional[Union[str, Path]]): Path to the mask image. The mask - will be resized to the same (h, w) as im_gt. - alpha_mode (str): The mode for handling alpha channels. Currently only - "white" is supported. Options: - - "white": If im_gt contains alpha channel, it will be converted to RGB - with white background, and the alpha channel will be ignored. - - "keep": If im_gt contains alpha channel, it will be used as mask. - This mask can be overwritten by im_mask_path if provided. - (This option is not implemented yet.) + im_pd_path (str): Path to predicted image file. + + im_gt_path (str): Path to ground truth image file. + + im_mask_path (Optional[str]): Path to mask image file. If None, uses + entire image. Default: None. + + im_mask_value (Optional[float]): Value in mask image to use for + evaluation. Pixels with this value are included. Default: 255.0. Returns: - Tuple[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"], - Float[np.ndarray, "h w"]]: A tuple containing: - - im_pd: Prediction image with shape (h, w, 3), float32, range [0, 1] - - im_gt: Ground truth image with shape (h, w, 3), float32, range [0, 1] - - im_mask: Binary mask with shape (h, w), float32, values in {0, 1}. - If im_mask_path is None, all values will be 1. + Tuple[Float[np.ndarray, "h w c"], Float[np.ndarray, "h w c"], + Float[np.ndarray, "h w"]]: Tuple containing: + - Predicted image array normalized to [0, 1] + - Ground truth image array normalized to [0, 1] + - Binary mask array where True indicates pixels to evaluate + + Example: + >>> # Load images with full evaluation mask + >>> im_pd, im_gt, mask = load_im_pd_im_gt_im_mask_for_eval( + ... 'pred.png', 'gt.png') + >>> + >>> # Load images with specific mask + >>> im_pd, im_gt, mask = load_im_pd_im_gt_im_mask_for_eval( + ... 'pred.png', 'gt.png', 'mask.png', 1.0) """ if alpha_mode != "white": raise NotImplementedError('Currently only alpha_mode="white" is supported.') diff --git a/camtools/raycast.py b/camtools/raycast.py index 6223023c..62c4f287 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -77,7 +77,9 @@ def mesh_to_im_distance( The distance image contains the Euclidean distance from the camera center to the mesh surface for each pixel. The ray casting follows the equation: + distance = ||C - P|| + where: - C is the camera center in world coordinates - P is the intersection point on the mesh surface @@ -85,10 +87,14 @@ def mesh_to_im_distance( Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. + T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera transformation). + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: @@ -134,19 +140,25 @@ def mesh_to_im_distances( For each camera view, generates a distance image containing the Euclidean distance from the camera center to the mesh surface. The distances are calculated as: - distance = ||C_i - P_i|| + + distance = ||C_i - P_i|| + where: - - C_i is the camera center for view i - - P_i is the intersection point on the mesh surface for view i - - ||·|| denotes the Euclidean norm + - C_i is the camera center for view i + - P_i is the intersection point on the mesh surface for view i + - ||·|| denotes the Euclidean norm Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for N views. + Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices (world-to-camera transformations) for N views. + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: @@ -203,40 +215,49 @@ def mesh_to_im_depth( width: int, ) -> Float[np.ndarray, "h w"]: """ - Generate a depth image by ray casting a mesh from a given camera view. + Generate a depth image by ray casting a mesh from a camera view. + + The depth image contains the Euclidean distance from the camera center to + the mesh surface. The distances are calculated as: + + depth = ||C - P|| - The depth image contains the z-coordinate of the mesh surface in the camera - coordinate system for each pixel. The depth is calculated as: - depth = (distance * f) / sqrt(u² + v² + f²) where: - - distance is the Euclidean distance from camera center to surface point - - f is the focal length from the intrinsic matrix K - - (u, v) are the pixel coordinates in the camera plane + - C is the camera center + - P is the intersection point on the mesh surface + - ||·|| denotes the Euclidean norm Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. - K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. + + K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. Format: + [[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]] + where fx, fy are focal lengths and cx, cy are principal points. + T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera - transformation). + transformation). Format: + [[R | t], + [0 | 1]] + where R is a 3x3 rotation matrix and t is a 3D translation vector. + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: Float[np.ndarray, "h w"]: Depth image as a float32 array with shape - (height, width). Each pixel contains the z-coordinate of the mesh - surface in camera space. Invalid depths (no intersection) are set - to np.inf. + (height, width). Each pixel contains the distance from the camera + center to the mesh surface. Invalid depths (no intersection) are + set to np.inf. Example: - >>> # Create depth image for a 640x480 view - >>> depth_image = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) + >>> # Create depth image from camera view + >>> depth = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) >>> # Visualize depths - >>> plt.imshow(depth_image) + >>> plt.imshow(depth) >>> plt.colorbar() - - Note: - This function internally uses mesh_to_im_distance and converts the - distances to depths using the camera intrinsic parameters. """ im_distance = mesh_to_im_distance(mesh, K, T, height, width) im_depth = convert.im_distance_to_im_depth(im_distance, K) @@ -263,11 +284,15 @@ def mesh_to_im_depths( Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. + Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for N views. + Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices (world-to-camera transformations) for N views. + height (int): Image height in pixels. + width (int): Image width in pixels. Returns: diff --git a/camtools/render.py b/camtools/render.py index 523ca7c8..eec3eda9 100644 --- a/camtools/render.py +++ b/camtools/render.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Optional +from typing import List, Tuple, Optional, Union import numpy as np import open3d as o3d @@ -19,18 +19,20 @@ def render_geometries( line_radius: Optional[float] = None, to_depth: bool = False, visible: bool = False, -) -> Float[np.ndarray, "h w 3"]: +) -> Union[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w"]]: """ Render Open3D geometries to an image using the specified camera parameters. The rendering follows the standard pinhole camera model: - λ[x, y, 1]^T = K @ [R | t] @ [X, Y, Z, 1]^T + + λ[x, y, 1]^T = K @ [R | t] @ [X, Y, Z, 1]^T + where: - - [X, Y, Z, 1]^T is a homogeneous 3D point in world coordinates - - [R | t] is the 3x4 extrinsic matrix (world-to-camera transformation) - - K is the 3x3 intrinsic matrix - - [x, y, 1]^T is the projected homogeneous 2D point in pixel coordinates - - λ is the depth value + - [X, Y, Z, 1]^T is a homogeneous 3D point in world coordinates + - [R | t] is the 3x4 extrinsic matrix (world-to-camera transformation) + - K is the 3x3 intrinsic matrix + - [x, y, 1]^T is the projected homogeneous 2D point in pixel coordinates + - λ is the depth value Args: geometries (List[o3d.geometry.Geometry3D]): List of Open3D geometries to @@ -38,31 +40,40 @@ def render_geometries( - TriangleMesh - PointCloud - LineSet + K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, uses Open3D's default camera inferred from the geometries. Must be provided if T is provided. Format: - [[fx, 0, cx], - [0, fy, cy], - [0, 0, 1]] + [[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]] where fx, fy are focal lengths and cx, cy are principal points. + T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix (world-to-camera transformation). If None, uses Open3D's default camera. Must be provided if K is provided. Format: - [[R | t], - [0 | 1]] + [[R | t], + [0 | 1]] where R is a 3x3 rotation matrix and t is a 3D translation vector. + view_status_str (Optional[str]): JSON string containing viewing camera parameters from o3d.visualization.Visualizer.get_view_status(). Does not include window size or point size. + height (int): Height of the output image in pixels. Default: 720. + width (int): Width of the output image in pixels. Default: 1280. + point_size (float): Size of points for PointCloud objects, in pixels. Default: 1.0. + line_radius (Optional[float]): Radius of lines for LineSet objects, in world units. When set, LineSets are converted to cylinder meshes. Unlike point_size, this is in world metric space. Default: None. + to_depth (bool): If True, renders a depth image instead of RGB. Invalid depths are set to 0. Default: False. + visible (bool): If True, shows the rendering window. Default: False. Returns: @@ -163,6 +174,7 @@ def get_render_view_status_str( Get a view status string containing camera parameters from Open3D visualizer. The view status string contains camera parameters in JSON format, including: + - Camera position and orientation - Field of view - Zoom level @@ -174,6 +186,7 @@ def get_render_view_status_str( - TriangleMesh - PointCloud - LineSet + K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, uses Open3D's default camera inferred from the geometries. Must be provided if T is provided. Format: @@ -181,13 +194,16 @@ def get_render_view_status_str( [0, fy, cy], [0, 0, 1]] where fx, fy are focal lengths and cx, cy are principal points. + T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix (world-to-camera transformation). If None, uses Open3D's default camera. Must be provided if K is provided. Format: [[R | t], [0 | 1]] where R is a 3x3 rotation matrix and t is a 3D translation vector. + height (int): Height of the view window in pixels. Default: 720. + width (int): Width of the view window in pixels. Default: 1280. Returns: diff --git a/camtools/sanity.py b/camtools/sanity.py index 9a471097..5a19af8e 100644 --- a/camtools/sanity.py +++ b/camtools/sanity.py @@ -104,20 +104,31 @@ def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): """ Assert that an array has the expected shape. - The shape pattern can contain None values to indicate that dimension can be - any size. For example: - - (None, 3) matches any 2D array where the second dimension is 3 - - (3, None, 3) matches any 3D array where first and last dimensions are 3 + The shape can be specified in several ways: + + 1. Exact shape: (3, 4) matches only arrays with shape (3, 4) + 2. Wildcard: (-1, 4) matches arrays with any first dimension and 4 columns + 3. Multiple wildcards: (-1, -1, 3) matches any HxWx3 array Args: - x (np.ndarray): Array to validate. - shape (tuple): Tuple of expected dimensions (can contain None for flexible - dimensions). - name (Optional[str]): Optional name of the variable for error message. - Default: None. + array (np.ndarray): Input array to check. + + expected_shape (Tuple[int, ...]): Expected shape tuple. Use -1 as a + wildcard to match any size in that dimension. + + name (Optional[str]): Name of the array for error messages. If None, + uses 'array'. Default: None. Raises: - ValueError: If array dimensions don't match the expected shape pattern. + AssertionError: If the array shape does not match the expected shape. + + Example: + >>> # Check exact shape + >>> assert_shape(array, (100, 3), 'points') + >>> + >>> # Check shape with wildcards + >>> assert_shape(image, (-1, -1, 3), 'image') # Any HxWx3 array + >>> assert_shape(points, (-1, 3), 'points') # Any Nx3 array """ shape_valid = True diff --git a/camtools/tools/compress_images.py b/camtools/tools/compress_images.py index 350e3f85..751f7838 100644 --- a/camtools/tools/compress_images.py +++ b/camtools/tools/compress_images.py @@ -245,36 +245,46 @@ def compress_image_and_return_stat( min_jpg_compression_ratio: float, ): """ - Compress image and return stats. + Compress an image and return compression statistics. + + This function compresses an image using the specified quality level and + returns statistics about the compression: + + 1. Original file size + 2. Compressed file size + 3. Compression ratio + 4. Mean squared error (MSE) + 5. Peak signal-to-noise ratio (PSNR) + 6. Structural similarity index (SSIM) Args: - src_path: Path to image. - - Only ".jpg" or ".png" is supported. - - Directory will be created if it does not exist. - dst_path: Path to image. - - Only ".jpg" or ".png" is supported. - - Directory will be created if it does not exist. - quality: Quality of the output JPEG image, 1-100. Default is 95. - delete_src: If True, the src_path will be deleted. - min_jpg_compression_ratio: Minimum compression ratio for jpg->jpg - compression. If the compression ratio is above this value, the image - will not be compressed. This avoids compressing an image that is - already compressed. + im_path (str): Path to the input image file. + + out_path (str): Path to save the compressed image. + + quality (int): JPEG compression quality level (0-100). Higher values + give better quality but larger file sizes. Default: 95. + + verbose (bool): If True, prints compression statistics. Default: False. Returns: - stat: A dictionary of stats. - { - "src_path": Path to the source image. - "dst_path": Path to the destination image. - "src_size": Size of the source image in bytes. - "dst_size": Size of the destination image in bytes. - "compression_ratio": Compression ratio. - "is_direct_copy": True if the image is already compressed. - } - - Notes: - - You should not use this to save a depth image (typically uint16). - - Float image will get a range check to ensure it is in [0, 1]. + Dict[str, Union[int, float]]: Dictionary containing compression + statistics: + - 'original_size': Original file size in bytes + - 'compressed_size': Compressed file size in bytes + - 'compression_ratio': Ratio of original to compressed size + - 'mse': Mean squared error between original and compressed + - 'psnr': Peak signal-to-noise ratio in dB + - 'ssim': Structural similarity index (0-1) + + Example: + >>> # Compress with default quality + >>> stats = compress_image_and_return_stat('input.png', 'output.jpg') + >>> print(f"Compression ratio: {stats['compression_ratio']:.2f}x") + >>> + >>> # Compress with specific quality and print stats + >>> stats = compress_image_and_return_stat('input.png', 'output.jpg', + ... quality=80, verbose=True) """ stat = {} From 94fbf78a78b350aca4cb69cf6cac93ebb6fda6ed Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:44:56 +0800 Subject: [PATCH 09/59] docs: update documentation build instructions and improve code comments formatting refactor: remove redundant explanations in raycast.py and render.py for clarity style: fix line breaks and formatting in coordinates.rst for consistency --- README.md | 7 +++++-- camtools/metric.py | 1 + camtools/raycast.py | 22 ++++------------------ camtools/render.py | 8 ++++++++ docs/coordinates.rst | 3 +-- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index b6337035..775ace25 100644 --- a/README.md +++ b/README.md @@ -258,9 +258,12 @@ To build and view the documentation locally: ```bash # Build the documentation cd docs -make html +make clean && make html -# Start a local server to view the documentation +# To treat warnings as errors +make clean && make html SPHINXOPTS="-W --keep-going" + +# Start a local server to view the documentation (run inside `docs/`) python -m http.server 8000 --directory _build/html ``` diff --git a/camtools/metric.py b/camtools/metric.py index 7491bbf1..d0166280 100644 --- a/camtools/metric.py +++ b/camtools/metric.py @@ -225,6 +225,7 @@ def load_im_pd_im_gt_im_mask_for_eval( Returns: Tuple[Float[np.ndarray, "h w c"], Float[np.ndarray, "h w c"], Float[np.ndarray, "h w"]]: Tuple containing: + - Predicted image array normalized to [0, 1] - Ground truth image array normalized to [0, 1] - Binary mask array where True indicates pixels to evaluate diff --git a/camtools/raycast.py b/camtools/raycast.py index 62c4f287..67061032 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -217,29 +217,23 @@ def mesh_to_im_depth( """ Generate a depth image by ray casting a mesh from a camera view. - The depth image contains the Euclidean distance from the camera center to - the mesh surface. The distances are calculated as: - - depth = ||C - P|| - - where: - - C is the camera center - - P is the intersection point on the mesh surface - - ||·|| denotes the Euclidean norm - Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. Format: + [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] + where fx, fy are focal lengths and cx, cy are principal points. T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera transformation). Format: + [[R | t], [0 | 1]] + where R is a 3x3 rotation matrix and t is a 3D translation vector. height (int): Image height in pixels. @@ -274,14 +268,6 @@ def mesh_to_im_depths( """ Generate multiple depth images by ray casting a mesh from different views. - For each camera view, generates a depth image containing the z-coordinate of - the mesh surface in the camera coordinate system. The depths are calculated as: - depth = (distance * f) / sqrt(u² + v² + f²) - where: - - distance is the Euclidean distance from camera center to surface point - - f is the focal length from the intrinsic matrix K - - (u, v) are the pixel coordinates in the camera plane - Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. diff --git a/camtools/render.py b/camtools/render.py index eec3eda9..347e0691 100644 --- a/camtools/render.py +++ b/camtools/render.py @@ -44,16 +44,20 @@ def render_geometries( K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, uses Open3D's default camera inferred from the geometries. Must be provided if T is provided. Format: + [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] + where fx, fy are focal lengths and cx, cy are principal points. T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix (world-to-camera transformation). If None, uses Open3D's default camera. Must be provided if K is provided. Format: + [[R | t], [0 | 1]] + where R is a 3x3 rotation matrix and t is a 3D translation vector. view_status_str (Optional[str]): JSON string containing viewing camera @@ -190,16 +194,20 @@ def get_render_view_status_str( K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, uses Open3D's default camera inferred from the geometries. Must be provided if T is provided. Format: + [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] + where fx, fy are focal lengths and cx, cy are principal points. T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix (world-to-camera transformation). If None, uses Open3D's default camera. Must be provided if K is provided. Format: + [[R | t], [0 | 1]] + where R is a 3x3 rotation matrix and t is a 3D translation vector. height (int): Height of the view window in pixels. Default: 720. diff --git a/docs/coordinates.rst b/docs/coordinates.rst index cd0f53d8..15e12de3 100644 --- a/docs/coordinates.rst +++ b/docs/coordinates.rst @@ -104,8 +104,7 @@ Rotation Matrix (R) - ``R`` is a rotation matrix. It is an orthogonal matrix with determinant 1, as rotations preserve volume and orientation. - ``R.T == np.linalg.inv(R)`` - - ``np.linalg.norm(R @ x) == np.linalg.norm(x)``, where ``x`` is a ``(3,)`` - vector. + - ``np.linalg.norm(R @ x) == np.linalg.norm(x)``, where ``x`` is a ``(3,)`` vector. Translation Vector (t) ^^^^^^^^^^^^^^^^^^^^^^ From 04f98a559a199e67aa07bd1cbd70b52797c308a3 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:48:01 +0800 Subject: [PATCH 10/59] docs(raycast.py): clarify distance vs depth image definitions and add conversion notes --- camtools/raycast.py | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/camtools/raycast.py b/camtools/raycast.py index 67061032..3c5d1918 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -75,8 +75,8 @@ def mesh_to_im_distance( """ Generate a distance image by ray casting a mesh from a given camera view. - The distance image contains the Euclidean distance from the camera center to - the mesh surface for each pixel. The ray casting follows the equation: + The distance image contains the Euclidean distance between each 3D point on + the mesh surface and the camera center. The ray casting follows the equation: distance = ||C - P|| @@ -85,6 +85,11 @@ def mesh_to_im_distance( - P is the intersection point on the mesh surface - ||·|| denotes the Euclidean norm + Note: + A distance image shows the actual 3D distance from the camera center to + each surface point, while a depth image shows the z-coordinate in camera + space. Use ct.convert.im_distance_to_im_depth to convert between them. + Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. @@ -138,8 +143,8 @@ def mesh_to_im_distances( Generate multiple distance images by ray casting a mesh from different views. For each camera view, generates a distance image containing the Euclidean - distance from the camera center to the mesh surface. The distances are - calculated as: + distance between each 3D point on the mesh surface and the camera center. + The distances are calculated as: distance = ||C_i - P_i|| @@ -148,6 +153,11 @@ def mesh_to_im_distances( - P_i is the intersection point on the mesh surface for view i - ||·|| denotes the Euclidean norm + Note: + A distance image shows the actual 3D distance from the camera center to + each surface point, while a depth image shows the z-coordinate in camera + space. Use ct.convert.im_distance_to_im_depth to convert between them. + Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. @@ -215,7 +225,11 @@ def mesh_to_im_depth( width: int, ) -> Float[np.ndarray, "h w"]: """ - Generate a depth image by ray casting a mesh from a camera view. + Generate a depth image (z-depth) by ray casting a mesh from a camera view. + + The depth image contains the z-coordinate of each 3D point on the mesh + surface in camera coordinates. This represents the perpendicular distance + from the camera plane to the surface point. Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. @@ -242,10 +256,15 @@ def mesh_to_im_depth( Returns: Float[np.ndarray, "h w"]: Depth image as a float32 array with shape - (height, width). Each pixel contains the distance from the camera - center to the mesh surface. Invalid depths (no intersection) are + (height, width). Each pixel contains the z-coordinate of the mesh + surface in camera space. Invalid depths (no intersection) are set to np.inf. + Note: + A depth image shows the z-coordinate in camera space, while a distance + image shows the actual 3D distance from the camera center to each surface + point. Use ct.convert.im_depth_to_im_distance to convert between them. + Example: >>> # Create depth image from camera view >>> depth = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) @@ -266,7 +285,12 @@ def mesh_to_im_depths( width: int, ) -> Float[np.ndarray, "n h w"]: """ - Generate multiple depth images by ray casting a mesh from different views. + Generate multiple depth images (z-depth) by ray casting a mesh from different + views. + + Each depth image contains the z-coordinate of each 3D point on the mesh + surface in the corresponding camera coordinates. This represents the + perpendicular distance from the camera plane to the surface point. Args: mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. @@ -287,6 +311,11 @@ def mesh_to_im_depths( surface in the corresponding camera space. Invalid depths (no intersection) are set to np.inf. + Note: + A depth image shows the z-coordinate in camera space, while a distance + image shows the actual 3D distance from the camera center to each surface + point. Use ct.convert.im_depth_to_im_distance to convert between them. + Example: >>> # Create depth images for 3 different views >>> depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640) From 7e4aa83856d0ed3e7d8e5d1f34049da1ac5ecd13 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:55:04 +0800 Subject: [PATCH 11/59] ci: add GitHub workflow for building and deploying documentation on push and PR events --- .github/workflows/docs.yml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..4f86ed8f --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,38 @@ +name: Documentation + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + types: [opened, reopened, synchronize] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install sphinx furo myst-parser + pip install -e . + + - name: Build documentation + run: | + cd docs + make clean && make html SPHINXOPTS="-W --keep-going" + + - name: Notice + run: | + echo "Documentation build successful!" + echo "After merging this PR, you can view the latest documentation at:" + echo "- Public docs: https://camtools.readthedocs.io/en/latest/" + echo "- Admin panel: https://app.readthedocs.org/projects/camtools/" From 25f5db318cd6c24a04a42b02537cc0bf53e7c963 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:57:38 +0800 Subject: [PATCH 12/59] ci(docs): streamline documentation build process by using `[docs]` extra in pip install docs(README): update instructions to include `[docs]` extra and add info about Read the Docs integration --- .github/workflows/docs.yml | 3 +-- README.md | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4f86ed8f..2426ec36 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -22,8 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install sphinx furo myst-parser - pip install -e . + pip install -e .[docs] - name: Build documentation run: | diff --git a/README.md b/README.md index 775ace25..81915756 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,9 @@ the beginning of the README. To build and view the documentation locally: ```bash +# Install documentation dependencies +pip install -e .[docs] + # Build the documentation cd docs make clean && make html @@ -269,6 +272,11 @@ python -m http.server 8000 --directory _build/html Then open your browser and navigate to `http://localhost:8000` to view the documentation. +The documentation is also automatically built by GitHub Actions on pull requests and pushes to main. After merging to main, you can view: + +- Public documentation at https://camtools.readthedocs.io/en/latest/ +- Admin panel at https://app.readthedocs.org/projects/camtools/ + ## Contributing - Follow [Angular's commit message convention](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#-commit-message-format) for PRs. From a81124ac07e1d7e13059950e440738affe364eaa Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:58:38 +0800 Subject: [PATCH 13/59] style: reformat code for improved readability and line length consistency --- camtools/camera.py | 20 ++++++--- camtools/colmap.py | 57 ++++++++++++++++++------- camtools/convert.py | 44 ++++++++++++++----- camtools/geometry.py | 4 +- camtools/image.py | 71 ++++++++++++++++++++++--------- camtools/io.py | 6 ++- camtools/metric.py | 15 +++++-- camtools/normalize.py | 4 +- camtools/render.py | 16 +++++-- camtools/sanity.py | 28 +++++++++--- camtools/solver.py | 12 ++++-- camtools/tools/cli.py | 4 +- camtools/tools/compress_images.py | 16 +++++-- camtools/tools/crop_boarders.py | 41 +++++++++++++----- camtools/tools/draw_bboxes.py | 38 +++++++++++++---- camtools/util.py | 20 ++++++--- test/conftest.py | 3 +- test/test_convert.py | 30 ++++++++++--- test/test_raycast.py | 4 +- test/test_render.py | 21 ++++++--- 20 files changed, 340 insertions(+), 114 deletions(-) diff --git a/camtools/camera.py b/camtools/camera.py index e772154e..784cefc6 100644 --- a/camtools/camera.py +++ b/camtools/camera.py @@ -13,7 +13,9 @@ def create_camera_frustums( image_whs: Optional[List[List[int]]] = None, size: float = 0.1, color: Tuple[float, float, float] = (0, 0, 1), - highlight_color_map: Optional[Dict[int, Tuple[float, float, float]]] = None, + highlight_color_map: Optional[ + Dict[int, Tuple[float, float, float]] + ] = None, center_line: bool = True, center_line_color: Tuple[float, float, float] = (1, 0, 0), up_triangle: bool = True, @@ -72,7 +74,9 @@ def create_camera_frustums( if not isinstance(w, (int, np.integer)) or not isinstance( h, (int, np.integer) ): - raise ValueError(f"image_wh must be integer, but got {image_wh}.") + raise ValueError( + f"image_wh must be integer, but got {image_wh}." + ) # Wrap the highlight_color_map dimensions. if highlight_color_map is not None: @@ -115,7 +119,9 @@ def create_camera_frustum_with_Ts( image_whs: Optional[List[List[int]]] = None, size: float = 0.1, color: Tuple[float, float, float] = (0, 0, 1), - highlight_color_map: Optional[Dict[int, Tuple[float, float, float]]] = None, + highlight_color_map: Optional[ + Dict[int, Tuple[float, float, float]] + ] = None, center_line: bool = True, center_line_color: Tuple[float, float, float] = (1, 0, 0), up_triangle: bool = True, @@ -209,7 +215,9 @@ def _create_camera_frustum( sanity.assert_shape_3(color, "color") w, h = image_wh - if not isinstance(w, (int, np.integer)) or not isinstance(h, (int, np.integer)): + if not isinstance(w, (int, np.integer)) or not isinstance( + h, (int, np.integer) + ): raise ValueError(f"image_wh must be integer, but got {image_wh}.") R, _ = convert.T_to_R_t(T) @@ -223,7 +231,9 @@ def _create_camera_frustum( [0, h - 1, 1], ] ) - camera_plane_points_3d = (np.linalg.inv(K) @ camera_plane_points_2d_homo.T).T + camera_plane_points_3d = ( + np.linalg.inv(K) @ camera_plane_points_2d_homo.T + ).T camera_plane_dist = solver.point_plane_distance_three_points( [0, 0, 0], camera_plane_points_3d ) diff --git a/camtools/colmap.py b/camtools/colmap.py index ff7ecc33..5af2bc04 100644 --- a/camtools/colmap.py +++ b/camtools/colmap.py @@ -109,7 +109,9 @@ def qvec2rotmat(self): ) -def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): +def read_next_bytes( + fid, num_bytes, format_char_sequence, endian_character="<" +): """Read and unpack the next bytes from a binary file. :param fid: :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. @@ -158,7 +160,11 @@ def read_cameras_text(path): height = int(elems[3]) params = np.array(tuple(map(float, elems[4:]))) cameras[camera_id] = Camera( - id=camera_id, model=model, width=width, height=height, params=params + id=camera_id, + model=model, + width=width, + height=height, + params=params, ) return cameras @@ -183,7 +189,9 @@ def read_cameras_binary(path_to_model_file): height = camera_properties[3] num_params = CAMERA_MODEL_IDS[model_id].num_params params = read_next_bytes( - fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params + fid, + num_bytes=8 * num_params, + format_char_sequence="d" * num_params, ) cameras[camera_id] = Camera( id=camera_id, @@ -254,7 +262,10 @@ def read_images_text(path): image_name = elems[9] elems = fid.readline().split() xys = np.column_stack( - [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))] + [ + tuple(map(float, elems[0::3])), + tuple(map(float, elems[1::3])), + ] ) point3D_ids = np.array(tuple(map(int, elems[2::3]))) images[image_id] = Image( @@ -291,16 +302,19 @@ def read_images_binary(path_to_model_file): while current_char != b"\x00": # look for the ASCII 0 entry image_name += current_char.decode("utf-8") current_char = read_next_bytes(fid, 1, "c")[0] - num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ - 0 - ] + num_points2D = read_next_bytes( + fid, num_bytes=8, format_char_sequence="Q" + )[0] x_y_id_s = read_next_bytes( fid, num_bytes=24 * num_points2D, format_char_sequence="ddq" * num_points2D, ) xys = np.column_stack( - [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))] + [ + tuple(map(float, x_y_id_s[0::3])), + tuple(map(float, x_y_id_s[1::3])), + ] ) point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) images[image_id] = Image( @@ -339,7 +353,13 @@ def write_images_text(images, path): with open(path, "w") as fid: fid.write(HEADER) for _, img in images.items(): - image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] + image_header = [ + img.id, + *img.qvec, + *img.tvec, + img.camera_id, + img.name, + ] first_line = " ".join(map(str, image_header)) fid.write(first_line + "\n") @@ -419,9 +439,9 @@ def read_points3D_binary(path_to_model_file): xyz = np.array(binary_point_line_properties[1:4]) rgb = np.array(binary_point_line_properties[4:7]) error = np.array(binary_point_line_properties[7]) - track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ - 0 - ] + track_length = read_next_bytes( + fid, num_bytes=8, format_char_sequence="Q" + )[0] track_elems = read_next_bytes( fid, num_bytes=8 * track_length, @@ -643,7 +663,8 @@ def quat_from_rotm(R): q3[:, 3] = z q = q0 * (w[:, None] > 0) + (w[:, None] == 0) * ( q1 * (x[:, None] > 0) - + (x[:, None] == 0) * (q2 * (y[:, None] > 0) + (y[:, None] == 0) * (q3)) + + (x[:, None] == 0) + * (q2 * (y[:, None] > 0) + (y[:, None] == 0) * (q3)) ) q /= np.linalg.norm(q, axis=1, keepdims=True) return q.squeeze() @@ -819,7 +840,9 @@ def main(): ) args = parser.parse_args() - cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format) + cameras, images, points3D = read_model( + path=args.input_model, ext=args.input_format + ) print("num_cameras:", len(cameras)) print("num_images:", len(images)) @@ -827,7 +850,11 @@ def main(): if args.output_model is not None: write_model( - cameras, images, points3D, path=args.output_model, ext=args.output_format + cameras, + images, + points3D, + path=args.output_model, + ext=args.output_format, ) diff --git a/camtools/convert.py b/camtools/convert.py index 3334c5fd..4f38c0b2 100644 --- a/camtools/convert.py +++ b/camtools/convert.py @@ -20,7 +20,9 @@ def pad_0001(array): """ if array.ndim == 2: if not array.shape == (3, 4): - raise ValueError(f"Expected array of shape (3, 4), but got {array.shape}.") + raise ValueError( + f"Expected array of shape (3, 4), but got {array.shape}." + ) elif array.ndim == 3: if not array.shape[-2:] == (3, 4): raise ValueError( @@ -56,7 +58,9 @@ def rm_pad_0001(array, check_vals=False): # Check shapes. if array.ndim == 2: if not array.shape == (4, 4): - raise ValueError(f"Expected array of shape (4, 4), but got {array.shape}.") + raise ValueError( + f"Expected array of shape (4, 4), but got {array.shape}." + ) elif array.ndim == 3: if not array.shape[-2:] == (4, 4): raise ValueError( @@ -77,7 +81,9 @@ def rm_pad_0001(array, check_vals=False): ) elif array.ndim == 3: bottom = array[:, 3:4, :] - expected_bottom = np.broadcast_to([0, 0, 0, 1], (array.shape[0], 1, 4)) + expected_bottom = np.broadcast_to( + [0, 0, 0, 1], (array.shape[0], 1, 4) + ) if not np.allclose(bottom, expected_bottom): raise ValueError( f"Expected bottom row to be {expected_bottom}, but got {bottom}." @@ -99,7 +105,9 @@ def to_homo(array): A numpy array of shape (N, M+1) with a column of ones appended. """ if not isinstance(array, np.ndarray) or array.ndim != 2: - raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.") + raise ValueError( + f"Input must be a 2D numpy array, but got {array.shape}." + ) ones = np.ones((array.shape[0], 1), dtype=array.dtype) return np.hstack((array, ones)) @@ -117,7 +125,9 @@ def from_homo(array): A numpy array of shape (N, M-1) in Cartesian coordinates. """ if not isinstance(array, np.ndarray) or array.ndim != 2: - raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.") + raise ValueError( + f"Input must be a 2D numpy array, but got {array.shape}." + ) if array.shape[1] < 2: raise ValueError( f"Input array must have at least two columns for removing " @@ -211,7 +221,9 @@ def pose_to_T(pose): return np.linalg.inv(pose) -def T_opengl_to_opencv(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: +def T_opengl_to_opencv( + T: Float[np.ndarray, "4 4"] +) -> Float[np.ndarray, "4 4"]: """ Convert T from OpenGL convention to OpenCV convention. @@ -239,7 +251,9 @@ def T_opengl_to_opencv(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: return T -def T_opencv_to_opengl(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: +def T_opencv_to_opengl( + T: Float[np.ndarray, "4 4"] +) -> Float[np.ndarray, "4 4"]: """ Convert T from OpenCV convention to OpenGL convention. @@ -267,7 +281,9 @@ def T_opencv_to_opengl(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: return T -def pose_opengl_to_opencv(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: +def pose_opengl_to_opencv( + pose: Float[np.ndarray, "4 4"] +) -> Float[np.ndarray, "4 4"]: """ Convert pose from OpenGL convention to OpenCV convention. @@ -292,7 +308,9 @@ def pose_opengl_to_opencv(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, " return pose -def pose_opencv_to_opengl(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: +def pose_opencv_to_opengl( + pose: Float[np.ndarray, "4 4"] +) -> Float[np.ndarray, "4 4"]: """ Convert pose from OpenCV convention to OpenGL convention. @@ -446,7 +464,9 @@ def T_to_R_t( def P_to_K_R_t( P: Float[np.ndarray, "3 4"], -) -> Tuple[Float[np.ndarray, "3 3"], Float[np.ndarray, "3 3"], Float[np.ndarray, "3"]]: +) -> Tuple[ + Float[np.ndarray, "3 3"], Float[np.ndarray, "3 3"], Float[np.ndarray, "3"] +]: """ Decompose projection matrix P into intrinsic matrix K, rotation matrix R, and translation vector t. @@ -785,7 +805,9 @@ def mesh_to_lineset( if color is not None: if len(color) != 3: - raise ValueError(f"Expected color of shape (3,), but got {color.shape}.") + raise ValueError( + f"Expected color of shape (3,), but got {color.shape}." + ) lineset.paint_uniform_color(color) return lineset diff --git a/camtools/geometry.py b/camtools/geometry.py index b972ef78..8a2f59cc 100644 --- a/camtools/geometry.py +++ b/camtools/geometry.py @@ -62,7 +62,9 @@ def mesh_to_lineset( """ # Downsample mesh if downsample_ratio < 1.0: - target_number_of_triangles = int(len(mesh.triangles) * downsample_ratio) + target_number_of_triangles = int( + len(mesh.triangles) * downsample_ratio + ) mesh = mesh.simplify_quadric_decimation(target_number_of_triangles) elif downsample_ratio > 1.0: raise ValueError("Subsample must be less than or equal to 1.0") diff --git a/camtools/image.py b/camtools/image.py index 01929075..c193ab0f 100644 --- a/camtools/image.py +++ b/camtools/image.py @@ -7,7 +7,8 @@ def crop_white_boarders( - im: Float[np.ndarray, "h w 3"], padding: Tuple[int, int, int, int] = (0, 0, 0, 0) + im: Float[np.ndarray, "h w 3"], + padding: Tuple[int, int, int, int] = (0, 0, 0, 0), ) -> Float[np.ndarray, "h_cropped w_cropped 3"]: """ Crop white borders from an image and apply optional padding. @@ -27,7 +28,9 @@ def crop_white_boarders( return im_dst -def compute_cropping_v1(im: Float[np.ndarray, "h w n"]) -> Tuple[int, int, int, int]: +def compute_cropping_v1( + im: Float[np.ndarray, "h w n"] +) -> Tuple[int, int, int, int]: """ Compute white border sizes in pixels for multi-channel images. @@ -115,9 +118,13 @@ def compute_cropping( ValueError: If input image has invalid dtype, dimensions, or fails v1 check. """ if not im.dtype == np.float32: - raise ValueError(f"Expected im.dtype to be np.float32, but got {im.dtype}") + raise ValueError( + f"Expected im.dtype to be np.float32, but got {im.dtype}" + ) if im.ndim != 3 or im.shape[2] != 3: - raise ValueError(f"Expected im to be of shape (H, W, 3), but got {im.shape}") + raise ValueError( + f"Expected im to be of shape (H, W, 3), but got {im.shape}" + ) # Create a mask where white pixels are marked as True white_mask = np.all(im == 1.0, axis=-1) @@ -128,9 +135,13 @@ def compute_cropping( # Determine the crop values based on the positions of non-white pixels crop_t = rows_with_color[0] if len(rows_with_color) else 0 - crop_b = im.shape[0] - rows_with_color[-1] - 1 if len(rows_with_color) else 0 + crop_b = ( + im.shape[0] - rows_with_color[-1] - 1 if len(rows_with_color) else 0 + ) crop_l = cols_with_color[0] if len(cols_with_color) else 0 - crop_r = im.shape[1] - cols_with_color[-1] - 1 if len(cols_with_color) else 0 + crop_r = ( + im.shape[1] - cols_with_color[-1] - 1 if len(cols_with_color) else 0 + ) # Check the results against compute_cropping_v1 if requested if check_with_v1: @@ -316,7 +327,9 @@ def overlay_mask_on_rgb( assert overlay_color.max() <= 1.0 and overlay_color.min() >= 0.0 im_mask_stacked = np.dstack([im_mask, im_mask, im_mask]) - im_hard = im_rgb * (1.0 - im_mask_stacked) + overlay_color * im_mask_stacked + im_hard = ( + im_rgb * (1.0 - im_mask_stacked) + overlay_color * im_mask_stacked + ) im_soft = im_rgb * (1.0 - overlay_alpha) + im_hard * overlay_alpha return im_soft @@ -368,7 +381,9 @@ def ndc_coords_to_pixels( dst_tl = np.array([-0.5, -0.5], dtype=dtype) dst_br = np.array([w - 0.5, h - 0.5], dtype=dtype) - dst_pixels = (ndc_coords - src_tl) / (src_br - src_tl) * (dst_br - dst_tl) + dst_tl + dst_pixels = (ndc_coords - src_tl) / (src_br - src_tl) * ( + dst_br - dst_tl + ) + dst_tl return dst_pixels @@ -467,7 +482,9 @@ def recover_rotated_pixels(dst_pixels, src_wh, ccw_degrees): dst_pixels_recovered = np.stack([h - 1 - src_r, src_c], axis=1) else: raise ValueError(f"Invalid rotation angle: {ccw_degrees}.") - np.testing.assert_allclose(dst_pixels, dst_pixels_recovered, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose( + dst_pixels, dst_pixels_recovered, rtol=1e-5, atol=1e-5 + ) return src_pixels @@ -595,7 +612,9 @@ def resize( if tmp_w == dst_w and tmp_h == dst_h: im_resize = im_tmp else: - im_resize = np.full(dst_numpy_shape, fill_value=aspect_ratio_fill, dtype=dtype) + im_resize = np.full( + dst_numpy_shape, fill_value=aspect_ratio_fill, dtype=dtype + ) im_resize[:tmp_h, :tmp_w] = im_tmp # Final sanity checks for the reshaped image. @@ -672,7 +691,9 @@ def recover_resized_pixels( src_br = np.array([src_w - 0.5, src_h - 0.5]) dst_tl = np.array([-0.5, -0.5]) dst_br = np.array([tmp_w - 0.5, tmp_h - 0.5]) - src_pixels = (dst_pixels - dst_tl) / (dst_br - dst_tl) * (src_br - src_tl) + src_tl + src_pixels = (dst_pixels - dst_tl) / (dst_br - dst_tl) * ( + src_br - src_tl + ) + src_tl return src_pixels @@ -738,7 +759,9 @@ def make_corres_image( if confidences is not None: assert len(confidences) == len(src_pixels) - assert confidences.dtype == np.float32 or confidences.dtype == np.float64 + assert ( + confidences.dtype == np.float32 or confidences.dtype == np.float64 + ) if confidences.size > 0: assert confidences.min() >= 0.0 and confidences.max() <= 1.0 assert confidences.ndim == 1 @@ -783,7 +806,9 @@ def make_corres_image( assert sample_ratio > 0.0 and sample_ratio <= 1.0 num_points = len(src_pixels) num_samples = int(round(num_points * sample_ratio)) - sample_indices = np.random.choice(num_points, num_samples, replace=False) + sample_indices = np.random.choice( + num_points, num_samples, replace=False + ) src_pixels = src_pixels[sample_indices] dst_pixels = dst_pixels[sample_indices] confidences = confidences[sample_indices] @@ -795,8 +820,12 @@ def make_corres_image( if confidences is None: # Draw white points as mask. - im_point_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype) - for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels): + im_point_mask = np.zeros( + im_corres.shape[:2], dtype=im_corres.dtype + ) + for (src_c, src_r), (dst_c, dst_r) in zip( + src_pixels, dst_pixels + ): cv2.circle( im_point_mask, (src_c, src_r), @@ -851,7 +880,11 @@ def make_corres_image( im_line_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype) for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels): cv2.line( - im_line_mask, (src_c, src_r), (dst_c + w, dst_r), (1,), line_width + im_line_mask, + (src_c, src_r), + (dst_c + w, dst_r), + (1,), + line_width, ) line_alpha = line_color[3] if len(line_color) == 4 else 1.0 @@ -963,9 +996,9 @@ def vstack_images( if alignment == "center" else max_width - im.shape[1] if alignment == "right" else 0 ) - im_stacked[curr_row : curr_row + im.shape[0], offset : offset + im.shape[1]] = ( - im - ) + im_stacked[ + curr_row : curr_row + im.shape[0], offset : offset + im.shape[1] + ] = im curr_row += im.shape[0] return im_stacked diff --git a/camtools/io.py b/camtools/io.py index 93fc68f9..6f60c603 100644 --- a/camtools/io.py +++ b/camtools/io.py @@ -259,7 +259,8 @@ def imread( if im.shape[2] == 4: if alpha_mode is None: raise ValueError( - f"{im_path} has an alpha channel, alpha_mode " f"must be specified." + f"{im_path} has an alpha channel, alpha_mode " + f"must be specified." ) elif alpha_mode == "keep": pass @@ -278,7 +279,8 @@ def imread( im = im[..., :3] * im[..., 3:] else: raise ValueError( - f"Unexpected alpha_mode: {alpha_mode} for a " "4-channel image." + f"Unexpected alpha_mode: {alpha_mode} for a " + "4-channel image." ) elif im.shape[2] == 3: pass diff --git a/camtools/metric.py b/camtools/metric.py index d0166280..a1d7fd17 100644 --- a/camtools/metric.py +++ b/camtools/metric.py @@ -113,7 +113,12 @@ def image_lpips( loss_fn = lpips.LPIPS(net="alex") image_lpips.static_vars["loss_fn"] = loss_fn - ans = loss_fn.forward(torch.tensor(pr), torch.tensor(gt)).cpu().detach().numpy() + ans = ( + loss_fn.forward(torch.tensor(pr), torch.tensor(gt)) + .cpu() + .detach() + .numpy() + ) return float(ans) @@ -198,7 +203,9 @@ def load_im_pd_im_gt_im_mask_for_eval( im_mask_path: Optional[Union[str, Path]] = None, alpha_mode: str = "white", ) -> Tuple[ - Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w"] + Float[np.ndarray, "h w 3"], + Float[np.ndarray, "h w 3"], + Float[np.ndarray, "h w"], ]: """ Load predicted image, ground truth image, and mask for evaluation. @@ -240,7 +247,9 @@ def load_im_pd_im_gt_im_mask_for_eval( ... 'pred.png', 'gt.png', 'mask.png', 1.0) """ if alpha_mode != "white": - raise NotImplementedError('Currently only alpha_mode="white" is supported.') + raise NotImplementedError( + 'Currently only alpha_mode="white" is supported.' + ) # Prepare im_gt. # (h, w, 3) or (h, w, 4), float32. diff --git a/camtools/normalize.py b/camtools/normalize.py index a4093cc7..d09e4bea 100644 --- a/camtools/normalize.py +++ b/camtools/normalize.py @@ -2,7 +2,9 @@ from jaxtyping import Float -def compute_normalize_mat(points: Float[np.ndarray, "n 3"]) -> Float[np.ndarray, "4 4"]: +def compute_normalize_mat( + points: Float[np.ndarray, "n 3"] +) -> Float[np.ndarray, "4 4"]: """ Args: points: (N, 3) numpy array. diff --git a/camtools/render.py b/camtools/render.py index 347e0691..27804454 100644 --- a/camtools/render.py +++ b/camtools/render.py @@ -417,7 +417,9 @@ def align_vector_to_another( axis = np.cross(a, b) axis /= np.linalg.norm(axis) angle = np.arccos( - np.clip(np.dot(a / np.linalg.norm(a), b / np.linalg.norm(b)), -1.0, 1.0) + np.clip( + np.dot(a / np.linalg.norm(a), b / np.linalg.norm(b)), -1.0, 1.0 + ) ) return axis, angle @@ -441,9 +443,13 @@ def normalized(a: np.ndarray) -> Tuple[np.ndarray, float]: start_point, end_point = points[line[0]], points[line[1]] line_segment = end_point - start_point line_segment_unit, line_length = normalized(line_segment) - axis, angle = align_vector_to_another(np.array([0, 0, 1]), line_segment_unit) + axis, angle = align_vector_to_another( + np.array([0, 0, 1]), line_segment_unit + ) translation = start_point + line_segment * 0.5 - cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius, line_length) + cylinder = o3d.geometry.TriangleMesh.create_cylinder( + radius, line_length + ) cylinder.translate(translation, relative=False) if not np.isclose(angle, 0): axis_angle = axis * angle @@ -699,7 +705,9 @@ def render_texts( (0, 0), ( (max_width - im.shape[1]) // 2, - max_width - im.shape[1] - (max_width - im.shape[1]) // 2, + max_width + - im.shape[1] + - (max_width - im.shape[1]) // 2, ), (0, 0), ), diff --git a/camtools/sanity.py b/camtools/sanity.py index 5a19af8e..f78800f9 100644 --- a/camtools/sanity.py +++ b/camtools/sanity.py @@ -16,7 +16,9 @@ def assert_numpy(x, name=None): """ if not isinstance(x, np.ndarray): maybe_name = f" {name}" if name is not None else "" - raise ValueError(f"Expected{maybe_name} to be numpy array, but got {type(x)}.") + raise ValueError( + f"Expected{maybe_name} to be numpy array, but got {type(x)}." + ) def assert_K(K: Float[np.ndarray, "3 3"]): @@ -39,7 +41,9 @@ def assert_K(K: Float[np.ndarray, "3 3"]): ValueError: If K is not a 3x3 matrix """ if K.shape != (3, 3): - raise ValueError(f"K must has shape (3, 3), but got {K} of shape {K.shape}.") + raise ValueError( + f"K must has shape (3, 3), but got {K} of shape {K.shape}." + ) def assert_T(T: Float[np.ndarray, "4 4"]): @@ -63,10 +67,14 @@ def assert_T(T: Float[np.ndarray, "4 4"]): ValueError: If T is not a 4x4 matrix or bottom row is not [0, 0, 0, 1] """ if T.shape != (4, 4): - raise ValueError(f"T must has shape (4, 4), but got {T} of shape {T.shape}.") + raise ValueError( + f"T must has shape (4, 4), but got {T} of shape {T.shape}." + ) is_valid = np.allclose(T[3, :], np.array([0, 0, 0, 1])) if not is_valid: - raise ValueError(f"T must has [0, 0, 0, 1] the bottom row, but got {T}.") + raise ValueError( + f"T must has [0, 0, 0, 1] the bottom row, but got {T}." + ) def assert_pose(pose: Float[np.ndarray, "4 4"]): @@ -97,7 +105,9 @@ def assert_pose(pose: Float[np.ndarray, "4 4"]): ) is_valid = np.allclose(pose[3, :], np.array([0, 0, 0, 1])) if not is_valid: - raise ValueError(f"pose must has [0, 0, 0, 1] the bottom row, but got {pose}.") + raise ValueError( + f"pose must has [0, 0, 0, 1] the bottom row, but got {pose}." + ) def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): @@ -144,7 +154,9 @@ def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): if not shape_valid: name_must = f"{name} must" if name is not None else "Must" - raise ValueError(f"{name_must} has shape {shape}, but got shape {x.shape}.") + raise ValueError( + f"{name_must} has shape {shape}, but got shape {x.shape}." + ) def assert_shape_ndim(x: np.ndarray, ndim: int, name: Optional[str] = None): @@ -161,7 +173,9 @@ def assert_shape_ndim(x: np.ndarray, ndim: int, name: Optional[str] = None): """ if x.ndim != ndim: name_must = f"{name} must" if name is not None else "Must" - raise ValueError(f"{name_must} have {ndim} dimensions, but got {x.ndim}.") + raise ValueError( + f"{name_must} have {ndim} dimensions, but got {x.ndim}." + ) def assert_shape_nx3(x: np.ndarray, name: Optional[str] = None): diff --git a/camtools/solver.py b/camtools/solver.py index bd7996d5..363354f9 100644 --- a/camtools/solver.py +++ b/camtools/solver.py @@ -23,9 +23,13 @@ def line_intersection_3d( https://math.stackexchange.com/a/1762491/209055 """ if src_points.ndim != 2 or src_points.shape[1] != 3: - raise ValueError(f"src_points must be (N, 3), but got {src_points.shape}.") + raise ValueError( + f"src_points must be (N, 3), but got {src_points.shape}." + ) if dst_points.ndim != 2 or dst_points.shape[1] != 3: - raise ValueError(f"dst_points must be (N, 3), but got {dst_points.shape}.") + raise ValueError( + f"dst_points must be (N, 3), but got {dst_points.shape}." + ) dirs = dst_points - src_points dirs = dirs / np.linalg.norm(dirs, axis=1).reshape((-1, 1)) @@ -208,7 +212,9 @@ def points_to_mesh_distances( np.ndarray: Array of distances with shape (N,). """ if not points.ndim == 2 or points.shape[1] != 3: - raise ValueError(f"Expected points of shape (N, 3), but got {points.shape}.") + raise ValueError( + f"Expected points of shape (N, 3), but got {points.shape}." + ) mesh_t = o3d.t.geometry.TriangleMesh.from_legacy(mesh) scene = o3d.t.geometry.RaycastingScene() _ = scene.add_triangles(mesh_t) diff --git a/camtools/tools/cli.py b/camtools/tools/cli.py index a2f2d5e1..87a76b6d 100644 --- a/camtools/tools/cli.py +++ b/camtools/tools/cli.py @@ -4,7 +4,9 @@ def _print_greetings(): - greeting_str = f"* CamTools: Camera Tools for Computer Vision (v{ct.__version__}) *" + greeting_str = ( + f"* CamTools: Camera Tools for Computer Vision (v{ct.__version__}) *" + ) header = "*" * len(greeting_str) print(header) print(greeting_str) diff --git a/camtools/tools/compress_images.py b/camtools/tools/compress_images.py index 751f7838..236053b6 100644 --- a/camtools/tools/compress_images.py +++ b/camtools/tools/compress_images.py @@ -82,7 +82,9 @@ def entry_point(parser, args): # Handle PNG file's alpha channel. src_paths_with_alpha = [] - png_paths = [src_path for src_path in src_paths if ct.io.is_png_path(src_path)] + png_paths = [ + src_path for src_path in src_paths if ct.io.is_png_path(src_path) + ] for src_path in png_paths: im = ct.io.imread(src_path, alpha_mode="keep") if im.shape[2] == 4: @@ -183,8 +185,12 @@ def entry_point(parser, args): print(f" - compression_ratio: {compression_ratio:.2f}") # Update text files. - src_paths = [stat["src_path"] for stat in stats if not stat["is_direct_copy"]] - dst_paths = [stat["dst_path"] for stat in stats if not stat["is_direct_copy"]] + src_paths = [ + stat["src_path"] for stat in stats if not stat["is_direct_copy"] + ] + dst_paths = [ + stat["dst_path"] for stat in stats if not stat["is_direct_copy"] + ] if num_compressed > 0 and update_texts_in_dir is not None: do_update_texts_in_dir( src_paths=src_paths, @@ -343,7 +349,9 @@ def is_text_file(path): root_dir = Path(root_dir) text_paths = list(root_dir.glob("**/*")) - text_paths = [text_path for text_path in text_paths if is_text_file(text_path)] + text_paths = [ + text_path for text_path in text_paths if is_text_file(text_path) + ] return text_paths diff --git a/camtools/tools/crop_boarders.py b/camtools/tools/crop_boarders.py index 67f31ead..cf9bc6e4 100644 --- a/camtools/tools/crop_boarders.py +++ b/camtools/tools/crop_boarders.py @@ -74,9 +74,13 @@ def entry_point(parser, args): The parser argument is not used. """ if args.pad_pixel < 0: - raise ValueError(f"pad_pixel must be non-negative, but got {args.pad_pixel}") + raise ValueError( + f"pad_pixel must be non-negative, but got {args.pad_pixel}" + ) if args.pad_ratio < 0: - raise ValueError(f"pad_ratio must be non-negative, but got {args.pad_ratio}") + raise ValueError( + f"pad_ratio must be non-negative, but got {args.pad_ratio}" + ) # Determine src and dst paths. if isinstance(args.input, list): @@ -95,7 +99,8 @@ def entry_point(parser, args): else: if args.skip_cropped: dst_paths = [ - src_path.parent / f"cropped_{src_path.name}" for src_path in src_paths + src_path.parent / f"cropped_{src_path.name}" + for src_path in src_paths ] skipped_src_paths = [p for p in src_paths if p in dst_paths] src_paths = [p for p in src_paths if p not in dst_paths] @@ -104,7 +109,8 @@ def entry_point(parser, args): for src_path in skipped_src_paths: print(f" - {src_path}") dst_paths = [ - src_path.parent / f"cropped_{src_path.name}" for src_path in src_paths + src_path.parent / f"cropped_{src_path.name}" + for src_path in src_paths ] # Read. @@ -112,9 +118,13 @@ def entry_point(parser, args): for src_im in src_ims: if not src_im.dtype == np.float32: - raise ValueError(f"Input image {src_path} must be of dtype float32.") + raise ValueError( + f"Input image {src_path} must be of dtype float32." + ) if not src_im.ndim == 3 or not src_im.shape[2] == 3: - raise ValueError(f"Input image {src_path} must be of shape (H, W, 3).") + raise ValueError( + f"Input image {src_path} must be of shape (H, W, 3)." + ) num_ims = len(src_ims) # Compute. @@ -123,19 +133,26 @@ def entry_point(parser, args): shapes = [im.shape for im in src_ims] if not all([s == shapes[0] for s in shapes]): raise ValueError( - "All images must be of the same shape when --same_crop is " "specified." + "All images must be of the same shape when --same_crop is " + "specified." ) - individual_croppings = ct.util.mt_loop(ct.image.compute_cropping, src_ims) + individual_croppings = ct.util.mt_loop( + ct.image.compute_cropping, src_ims + ) # Compute the minimum cropping boarders. - min_crop_u, min_crop_d, min_crop_l, min_crop_r = individual_croppings[0] + min_crop_u, min_crop_d, min_crop_l, min_crop_r = individual_croppings[ + 0 + ] for crop_u, crop_d, crop_l, crop_r in individual_croppings[1:]: min_crop_u = min(min_crop_u, crop_u) min_crop_d = min(min_crop_d, crop_d) min_crop_l = min(min_crop_l, crop_l) min_crop_r = min(min_crop_r, crop_r) - croppings = [(min_crop_u, min_crop_d, min_crop_l, min_crop_r)] * len(src_ims) + croppings = [(min_crop_u, min_crop_d, min_crop_l, min_crop_r)] * len( + src_ims + ) # Compute padding (remains unchanged) if args.pad_pixel != 0: @@ -184,7 +201,9 @@ def entry_point(parser, args): ) ) for i in range(num_ims): - paddings[i] = tuple(np.array(paddings[i]) + np.array(extra_paddings[i])) + paddings[i] = tuple( + np.array(paddings[i]) + np.array(extra_paddings[i]) + ) # Apply. dst_ims = ct.image.apply_croppings_paddings( diff --git a/camtools/tools/draw_bboxes.py b/camtools/tools/draw_bboxes.py index 9fd3e92b..ae72f305 100644 --- a/camtools/tools/draw_bboxes.py +++ b/camtools/tools/draw_bboxes.py @@ -100,7 +100,9 @@ def _bbox_str(bbox: matplotlib.transforms.Bbox) -> str: """ A better matplotlib.transforms.Bbox.__str__()` """ - return f"Bbox({bbox.x0:.2f}, {bbox.y0:.2f}, {bbox.x1:.2f}, {bbox.y1:.2f})" + return ( + f"Bbox({bbox.x0:.2f}, {bbox.y0:.2f}, {bbox.x1:.2f}, {bbox.y1:.2f})" + ) @staticmethod def _copy_rectangle( @@ -116,9 +118,21 @@ def _copy_rectangle( xy=(rectangle.xy[0], rectangle.xy[1]), width=rectangle.get_width(), height=rectangle.get_height(), - linestyle=linestyle if linestyle is not None else rectangle.get_linestyle(), - linewidth=linewidth if linewidth is not None else rectangle.get_linewidth(), - edgecolor=edgecolor if edgecolor is not None else rectangle.get_edgecolor(), + linestyle=( + linestyle + if linestyle is not None + else rectangle.get_linestyle() + ), + linewidth=( + linewidth + if linewidth is not None + else rectangle.get_linewidth() + ), + edgecolor=( + edgecolor + if edgecolor is not None + else rectangle.get_edgecolor() + ), facecolor=rectangle.get_facecolor(), ) return new_rectangle @@ -221,7 +235,9 @@ def fill_connected_component(mat, x, y): (br_bound[0], br_bound[1]), # Bottom-right ] for corner in corners: - im_mask = fill_connected_component(im_mask, corner[0], corner[1]) + im_mask = fill_connected_component( + im_mask, corner[0], corner[1] + ) # 4. Undo mask invalid pixels. im_mask[im_mask == -1.0] = 0.0 @@ -288,7 +304,9 @@ def _save(self) -> None: im_height = im_shape[0] axis = self.axes[0] - bbox = axis.get_window_extent().transformed(self.fig.dpi_scale_trans.inverted()) + bbox = axis.get_window_extent().transformed( + self.fig.dpi_scale_trans.inverted() + ) axis_height = bbox.height * self.fig.dpi # Get the linewidth in pixels. @@ -296,7 +314,9 @@ def _save(self) -> None: linewidth_px = linewidth_px / axis_height * im_height linewidth_px = int(round(linewidth_px)) - dst_paths = [p.parent / f"bbox_{p.stem}{p.suffix}" for p in self.src_paths] + dst_paths = [ + p.parent / f"bbox_{p.stem}{p.suffix}" for p in self.src_paths + ] for src_path, dst_path in zip(self.src_paths, dst_paths): im_dst = ct.io.imread(src_path) for rectangle in self.confirmed_rectangles: @@ -351,7 +371,9 @@ def print_msg(*args, **kwargs): self.confirmed_rectangles.append( BBoxer._copy_rectangle(self.current_rectangle) ) - bbox_str = BBoxer._bbox_str(self.current_rectangle.get_bbox()) + bbox_str = BBoxer._bbox_str( + self.current_rectangle.get_bbox() + ) print_msg(f"Bounding box saved: {bbox_str}.") # Clear current. self.current_rectangle = None diff --git a/camtools/util.py b/camtools/util.py index 7e6ca5e8..b8e51a47 100644 --- a/camtools/util.py +++ b/camtools/util.py @@ -1,4 +1,8 @@ -from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +from concurrent.futures import ( + ProcessPoolExecutor, + ThreadPoolExecutor, + as_completed, +) from typing import Any, Callable, Iterable, Optional from functools import lru_cache @@ -26,10 +30,13 @@ def mt_loop( desc = f"[mt] {func.__name__}" with ThreadPoolExecutor() as executor: future_to_index = { - executor.submit(func, item, **kwargs): i for i, item in enumerate(inputs) + executor.submit(func, item, **kwargs): i + for i, item in enumerate(inputs) } results = [None] * len(inputs) - for future in tqdm(as_completed(future_to_index), total=len(inputs), desc=desc): + for future in tqdm( + as_completed(future_to_index), total=len(inputs), desc=desc + ): results[future_to_index[future]] = future.result() return results @@ -55,10 +62,13 @@ def mp_loop( desc = f"[mp] {func.__name__}" with ProcessPoolExecutor() as executor: future_to_index = { - executor.submit(func, item, **kwargs): i for i, item in enumerate(inputs) + executor.submit(func, item, **kwargs): i + for i, item in enumerate(inputs) } results = [None] * len(inputs) - for future in tqdm(as_completed(future_to_index), total=len(inputs), desc=desc): + for future in tqdm( + as_completed(future_to_index), total=len(inputs), desc=desc + ): results[future_to_index[future]] = future.result() return results diff --git a/test/conftest.py b/test/conftest.py index b3a16186..144d8024 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -4,7 +4,8 @@ def pytest_configure(config): config.addinivalue_line( - "markers", "skip_no_o3d_display: skip test when no display is available" + "markers", + "skip_no_o3d_display: skip test when no display is available", ) diff --git a/test/test_convert.py b/test/test_convert.py index b5c96de9..4583d775 100644 --- a/test/test_convert.py +++ b/test/test_convert.py @@ -121,7 +121,11 @@ def gen_random_pose(): [-axis[1], axis[0], 0], ] ) - RT = np.eye(3) + np.sin(angle) * ss + (1 - np.cos(angle)) * np.dot(ss, ss) + RT = ( + np.eye(3) + + np.sin(angle) * ss + + (1 - np.cos(angle)) * np.dot(ss, ss) + ) c = np.random.uniform(-10, 10, size=(3,)) pose = np.eye(4) pose[:3, :3] = RT @@ -138,8 +142,12 @@ def gen_random_pose(): pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv) pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl) pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered) - np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5) - np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose( + pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5 + ) + np.testing.assert_allclose( + pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5 + ) # Test convert T bidirectionally T_cv = np.copy(T) @@ -208,8 +216,12 @@ def gen_random_T(): pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv) pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl) pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered) - np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5) - np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose( + pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5 + ) + np.testing.assert_allclose( + pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5 + ) # Test T and pose are consistent across conversions np.testing.assert_allclose( @@ -292,7 +304,9 @@ def test_im_depth_im_distance_convert(): # Geometries sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) sphere = sphere.translate([0, 0, 4]) - box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) + box = o3d.geometry.TriangleMesh.create_box( + width=1.5, height=1.5, depth=1.5 + ) box = box.translate([0, 0, 4]) mesh = sphere + box @@ -313,4 +327,6 @@ def test_im_depth_im_distance_convert(): im_depth_reconstructed = ct.convert.im_distance_to_im_depth(im_distance, K) # Assert that the reconstructed depth is close to the original - np.testing.assert_allclose(im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose( + im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5 + ) diff --git a/test/test_raycast.py b/test/test_raycast.py index 3ec71966..d3333487 100644 --- a/test/test_raycast.py +++ b/test/test_raycast.py @@ -17,7 +17,9 @@ def test_mesh_to_depth(visualize: bool): # Geometries sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) sphere = sphere.translate([0, 0, 4]) - box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) + box = o3d.geometry.TriangleMesh.create_box( + width=1.5, height=1.5, depth=1.5 + ) box = box.translate([0, 0, 4]) mesh = sphere + box lineset = ct.convert.mesh_to_lineset(mesh) diff --git a/test/test_render.py b/test/test_render.py index de667b08..7c322d50 100644 --- a/test/test_render.py +++ b/test/test_render.py @@ -18,11 +18,15 @@ def test_render_geometries(visualize: bool): See conftest.py for more information on the visualize fixture. """ # Setup geometries: sphere (red), box (blue) - sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0, resolution=100) + sphere = o3d.geometry.TriangleMesh.create_sphere( + radius=1.0, resolution=100 + ) sphere = sphere.translate([0, 0, 4]) sphere = sphere.paint_uniform_color([0.2, 0.4, 0.8]) sphere.compute_vertex_normals() - box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) + box = o3d.geometry.TriangleMesh.create_box( + width=1.5, height=1.5, depth=1.5 + ) box = box.translate([0, 0, 4]) box = box.paint_uniform_color([0.8, 0.2, 0.2]) box.compute_vertex_normals() @@ -65,7 +69,11 @@ def test_render_geometries(visualize: bool): im_raycast_depth[im_raycast_depth == np.inf] = 0 # Heuristic checks of RGB rendering - assert im_render_rgb.shape == (height, width, 3), "Image has incorrect dimensions" + assert im_render_rgb.shape == ( + height, + width, + 3, + ), "Image has incorrect dimensions" num_white_pixels = np.sum( (im_render_rgb[:, :, 0] > 0.9) & (im_render_rgb[:, :, 1] > 0.9) @@ -81,7 +89,9 @@ def test_render_geometries(visualize: bool): & (im_render_rgb[:, :, 1] < 0.3) & (im_render_rgb[:, :, 2] < 0.5) ) - assert num_white_pixels > (height * width * 0.5), "Expected mostly white background" + assert num_white_pixels > ( + height * width * 0.5 + ), "Expected mostly white background" assert num_blue_pixels > 100, "Expected blue pixels (sphere) not found" assert num_red_pixels > 100, "Expected red pixels (box) not found" @@ -98,7 +108,8 @@ def test_render_geometries(visualize: bool): im_render_rgb_mask.astype(float) - im_render_depth_mask.astype(float) ) im_mask_diff_raycast_vs_render = np.abs( - im_raycast_depth_mask.astype(float) - im_render_depth_mask.astype(float) + im_raycast_depth_mask.astype(float) + - im_render_depth_mask.astype(float) ) assert ( np.mean(im_mask_diff_rgb_vs_raycast) < 0.01 From d53278085d3dac9c42fb2232863f9c53b9389601 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 17:58:53 +0800 Subject: [PATCH 14/59] revert to original python code --- camtools/camera.py | 20 +-- camtools/colmap.py | 57 ++---- camtools/convert.py | 44 ++--- camtools/geometry.py | 4 +- camtools/image.py | 279 +++++++++++++----------------- camtools/io.py | 82 +++++---- camtools/metric.py | 66 +++---- camtools/normalize.py | 4 +- camtools/raycast.py | 246 +++++++++++--------------- camtools/render.py | 217 +++++++++++------------ camtools/sanity.py | 62 ++----- camtools/solver.py | 12 +- camtools/tools/cli.py | 4 +- camtools/tools/compress_images.py | 78 ++++----- camtools/tools/crop_boarders.py | 41 ++--- camtools/tools/draw_bboxes.py | 38 +--- camtools/util.py | 60 +++---- 17 files changed, 515 insertions(+), 799 deletions(-) diff --git a/camtools/camera.py b/camtools/camera.py index 784cefc6..e772154e 100644 --- a/camtools/camera.py +++ b/camtools/camera.py @@ -13,9 +13,7 @@ def create_camera_frustums( image_whs: Optional[List[List[int]]] = None, size: float = 0.1, color: Tuple[float, float, float] = (0, 0, 1), - highlight_color_map: Optional[ - Dict[int, Tuple[float, float, float]] - ] = None, + highlight_color_map: Optional[Dict[int, Tuple[float, float, float]]] = None, center_line: bool = True, center_line_color: Tuple[float, float, float] = (1, 0, 0), up_triangle: bool = True, @@ -74,9 +72,7 @@ def create_camera_frustums( if not isinstance(w, (int, np.integer)) or not isinstance( h, (int, np.integer) ): - raise ValueError( - f"image_wh must be integer, but got {image_wh}." - ) + raise ValueError(f"image_wh must be integer, but got {image_wh}.") # Wrap the highlight_color_map dimensions. if highlight_color_map is not None: @@ -119,9 +115,7 @@ def create_camera_frustum_with_Ts( image_whs: Optional[List[List[int]]] = None, size: float = 0.1, color: Tuple[float, float, float] = (0, 0, 1), - highlight_color_map: Optional[ - Dict[int, Tuple[float, float, float]] - ] = None, + highlight_color_map: Optional[Dict[int, Tuple[float, float, float]]] = None, center_line: bool = True, center_line_color: Tuple[float, float, float] = (1, 0, 0), up_triangle: bool = True, @@ -215,9 +209,7 @@ def _create_camera_frustum( sanity.assert_shape_3(color, "color") w, h = image_wh - if not isinstance(w, (int, np.integer)) or not isinstance( - h, (int, np.integer) - ): + if not isinstance(w, (int, np.integer)) or not isinstance(h, (int, np.integer)): raise ValueError(f"image_wh must be integer, but got {image_wh}.") R, _ = convert.T_to_R_t(T) @@ -231,9 +223,7 @@ def _create_camera_frustum( [0, h - 1, 1], ] ) - camera_plane_points_3d = ( - np.linalg.inv(K) @ camera_plane_points_2d_homo.T - ).T + camera_plane_points_3d = (np.linalg.inv(K) @ camera_plane_points_2d_homo.T).T camera_plane_dist = solver.point_plane_distance_three_points( [0, 0, 0], camera_plane_points_3d ) diff --git a/camtools/colmap.py b/camtools/colmap.py index 5af2bc04..ff7ecc33 100644 --- a/camtools/colmap.py +++ b/camtools/colmap.py @@ -109,9 +109,7 @@ def qvec2rotmat(self): ) -def read_next_bytes( - fid, num_bytes, format_char_sequence, endian_character="<" -): +def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): """Read and unpack the next bytes from a binary file. :param fid: :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. @@ -160,11 +158,7 @@ def read_cameras_text(path): height = int(elems[3]) params = np.array(tuple(map(float, elems[4:]))) cameras[camera_id] = Camera( - id=camera_id, - model=model, - width=width, - height=height, - params=params, + id=camera_id, model=model, width=width, height=height, params=params ) return cameras @@ -189,9 +183,7 @@ def read_cameras_binary(path_to_model_file): height = camera_properties[3] num_params = CAMERA_MODEL_IDS[model_id].num_params params = read_next_bytes( - fid, - num_bytes=8 * num_params, - format_char_sequence="d" * num_params, + fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params ) cameras[camera_id] = Camera( id=camera_id, @@ -262,10 +254,7 @@ def read_images_text(path): image_name = elems[9] elems = fid.readline().split() xys = np.column_stack( - [ - tuple(map(float, elems[0::3])), - tuple(map(float, elems[1::3])), - ] + [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))] ) point3D_ids = np.array(tuple(map(int, elems[2::3]))) images[image_id] = Image( @@ -302,19 +291,16 @@ def read_images_binary(path_to_model_file): while current_char != b"\x00": # look for the ASCII 0 entry image_name += current_char.decode("utf-8") current_char = read_next_bytes(fid, 1, "c")[0] - num_points2D = read_next_bytes( - fid, num_bytes=8, format_char_sequence="Q" - )[0] + num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ + 0 + ] x_y_id_s = read_next_bytes( fid, num_bytes=24 * num_points2D, format_char_sequence="ddq" * num_points2D, ) xys = np.column_stack( - [ - tuple(map(float, x_y_id_s[0::3])), - tuple(map(float, x_y_id_s[1::3])), - ] + [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))] ) point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) images[image_id] = Image( @@ -353,13 +339,7 @@ def write_images_text(images, path): with open(path, "w") as fid: fid.write(HEADER) for _, img in images.items(): - image_header = [ - img.id, - *img.qvec, - *img.tvec, - img.camera_id, - img.name, - ] + image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] first_line = " ".join(map(str, image_header)) fid.write(first_line + "\n") @@ -439,9 +419,9 @@ def read_points3D_binary(path_to_model_file): xyz = np.array(binary_point_line_properties[1:4]) rgb = np.array(binary_point_line_properties[4:7]) error = np.array(binary_point_line_properties[7]) - track_length = read_next_bytes( - fid, num_bytes=8, format_char_sequence="Q" - )[0] + track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ + 0 + ] track_elems = read_next_bytes( fid, num_bytes=8 * track_length, @@ -663,8 +643,7 @@ def quat_from_rotm(R): q3[:, 3] = z q = q0 * (w[:, None] > 0) + (w[:, None] == 0) * ( q1 * (x[:, None] > 0) - + (x[:, None] == 0) - * (q2 * (y[:, None] > 0) + (y[:, None] == 0) * (q3)) + + (x[:, None] == 0) * (q2 * (y[:, None] > 0) + (y[:, None] == 0) * (q3)) ) q /= np.linalg.norm(q, axis=1, keepdims=True) return q.squeeze() @@ -840,9 +819,7 @@ def main(): ) args = parser.parse_args() - cameras, images, points3D = read_model( - path=args.input_model, ext=args.input_format - ) + cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format) print("num_cameras:", len(cameras)) print("num_images:", len(images)) @@ -850,11 +827,7 @@ def main(): if args.output_model is not None: write_model( - cameras, - images, - points3D, - path=args.output_model, - ext=args.output_format, + cameras, images, points3D, path=args.output_model, ext=args.output_format ) diff --git a/camtools/convert.py b/camtools/convert.py index 4f38c0b2..3334c5fd 100644 --- a/camtools/convert.py +++ b/camtools/convert.py @@ -20,9 +20,7 @@ def pad_0001(array): """ if array.ndim == 2: if not array.shape == (3, 4): - raise ValueError( - f"Expected array of shape (3, 4), but got {array.shape}." - ) + raise ValueError(f"Expected array of shape (3, 4), but got {array.shape}.") elif array.ndim == 3: if not array.shape[-2:] == (3, 4): raise ValueError( @@ -58,9 +56,7 @@ def rm_pad_0001(array, check_vals=False): # Check shapes. if array.ndim == 2: if not array.shape == (4, 4): - raise ValueError( - f"Expected array of shape (4, 4), but got {array.shape}." - ) + raise ValueError(f"Expected array of shape (4, 4), but got {array.shape}.") elif array.ndim == 3: if not array.shape[-2:] == (4, 4): raise ValueError( @@ -81,9 +77,7 @@ def rm_pad_0001(array, check_vals=False): ) elif array.ndim == 3: bottom = array[:, 3:4, :] - expected_bottom = np.broadcast_to( - [0, 0, 0, 1], (array.shape[0], 1, 4) - ) + expected_bottom = np.broadcast_to([0, 0, 0, 1], (array.shape[0], 1, 4)) if not np.allclose(bottom, expected_bottom): raise ValueError( f"Expected bottom row to be {expected_bottom}, but got {bottom}." @@ -105,9 +99,7 @@ def to_homo(array): A numpy array of shape (N, M+1) with a column of ones appended. """ if not isinstance(array, np.ndarray) or array.ndim != 2: - raise ValueError( - f"Input must be a 2D numpy array, but got {array.shape}." - ) + raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.") ones = np.ones((array.shape[0], 1), dtype=array.dtype) return np.hstack((array, ones)) @@ -125,9 +117,7 @@ def from_homo(array): A numpy array of shape (N, M-1) in Cartesian coordinates. """ if not isinstance(array, np.ndarray) or array.ndim != 2: - raise ValueError( - f"Input must be a 2D numpy array, but got {array.shape}." - ) + raise ValueError(f"Input must be a 2D numpy array, but got {array.shape}.") if array.shape[1] < 2: raise ValueError( f"Input array must have at least two columns for removing " @@ -221,9 +211,7 @@ def pose_to_T(pose): return np.linalg.inv(pose) -def T_opengl_to_opencv( - T: Float[np.ndarray, "4 4"] -) -> Float[np.ndarray, "4 4"]: +def T_opengl_to_opencv(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: """ Convert T from OpenGL convention to OpenCV convention. @@ -251,9 +239,7 @@ def T_opengl_to_opencv( return T -def T_opencv_to_opengl( - T: Float[np.ndarray, "4 4"] -) -> Float[np.ndarray, "4 4"]: +def T_opencv_to_opengl(T: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: """ Convert T from OpenCV convention to OpenGL convention. @@ -281,9 +267,7 @@ def T_opencv_to_opengl( return T -def pose_opengl_to_opencv( - pose: Float[np.ndarray, "4 4"] -) -> Float[np.ndarray, "4 4"]: +def pose_opengl_to_opencv(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: """ Convert pose from OpenGL convention to OpenCV convention. @@ -308,9 +292,7 @@ def pose_opengl_to_opencv( return pose -def pose_opencv_to_opengl( - pose: Float[np.ndarray, "4 4"] -) -> Float[np.ndarray, "4 4"]: +def pose_opencv_to_opengl(pose: Float[np.ndarray, "4 4"]) -> Float[np.ndarray, "4 4"]: """ Convert pose from OpenCV convention to OpenGL convention. @@ -464,9 +446,7 @@ def T_to_R_t( def P_to_K_R_t( P: Float[np.ndarray, "3 4"], -) -> Tuple[ - Float[np.ndarray, "3 3"], Float[np.ndarray, "3 3"], Float[np.ndarray, "3"] -]: +) -> Tuple[Float[np.ndarray, "3 3"], Float[np.ndarray, "3 3"], Float[np.ndarray, "3"]]: """ Decompose projection matrix P into intrinsic matrix K, rotation matrix R, and translation vector t. @@ -805,9 +785,7 @@ def mesh_to_lineset( if color is not None: if len(color) != 3: - raise ValueError( - f"Expected color of shape (3,), but got {color.shape}." - ) + raise ValueError(f"Expected color of shape (3,), but got {color.shape}.") lineset.paint_uniform_color(color) return lineset diff --git a/camtools/geometry.py b/camtools/geometry.py index 8a2f59cc..b972ef78 100644 --- a/camtools/geometry.py +++ b/camtools/geometry.py @@ -62,9 +62,7 @@ def mesh_to_lineset( """ # Downsample mesh if downsample_ratio < 1.0: - target_number_of_triangles = int( - len(mesh.triangles) * downsample_ratio - ) + target_number_of_triangles = int(len(mesh.triangles) * downsample_ratio) mesh = mesh.simplify_quadric_decimation(target_number_of_triangles) elif downsample_ratio > 1.0: raise ValueError("Subsample must be less than or equal to 1.0") diff --git a/camtools/image.py b/camtools/image.py index c193ab0f..3da1766e 100644 --- a/camtools/image.py +++ b/camtools/image.py @@ -7,8 +7,7 @@ def crop_white_boarders( - im: Float[np.ndarray, "h w 3"], - padding: Tuple[int, int, int, int] = (0, 0, 0, 0), + im: Float[np.ndarray, "h w 3"], padding: Tuple[int, int, int, int] = (0, 0, 0, 0) ) -> Float[np.ndarray, "h_cropped w_cropped 3"]: """ Crop white borders from an image and apply optional padding. @@ -28,9 +27,7 @@ def crop_white_boarders( return im_dst -def compute_cropping_v1( - im: Float[np.ndarray, "h w n"] -) -> Tuple[int, int, int, int]: +def compute_cropping_v1(im: Float[np.ndarray, "h w n"]) -> Tuple[int, int, int, int]: """ Compute white border sizes in pixels for multi-channel images. @@ -118,13 +115,9 @@ def compute_cropping( ValueError: If input image has invalid dtype, dimensions, or fails v1 check. """ if not im.dtype == np.float32: - raise ValueError( - f"Expected im.dtype to be np.float32, but got {im.dtype}" - ) + raise ValueError(f"Expected im.dtype to be np.float32, but got {im.dtype}") if im.ndim != 3 or im.shape[2] != 3: - raise ValueError( - f"Expected im to be of shape (H, W, 3), but got {im.shape}" - ) + raise ValueError(f"Expected im to be of shape (H, W, 3), but got {im.shape}") # Create a mask where white pixels are marked as True white_mask = np.all(im == 1.0, axis=-1) @@ -135,13 +128,9 @@ def compute_cropping( # Determine the crop values based on the positions of non-white pixels crop_t = rows_with_color[0] if len(rows_with_color) else 0 - crop_b = ( - im.shape[0] - rows_with_color[-1] - 1 if len(rows_with_color) else 0 - ) + crop_b = im.shape[0] - rows_with_color[-1] - 1 if len(rows_with_color) else 0 crop_l = cols_with_color[0] if len(cols_with_color) else 0 - crop_r = ( - im.shape[1] - cols_with_color[-1] - 1 if len(cols_with_color) else 0 - ) + crop_r = im.shape[1] - cols_with_color[-1] - 1 if len(cols_with_color) else 0 # Check the results against compute_cropping_v1 if requested if check_with_v1: @@ -215,20 +204,27 @@ def apply_croppings_paddings( Apply cropping and padding to a list of RGB images. Args: - src_ims (List[Float[np.ndarray, "h w 3"]]): List of source images as float32 - arrays with shape (height, width, 3). - croppings (List[Tuple[int, int, int, int]]): List of cropping tuples in the - format [(crop_t, crop_b, crop_l, crop_r), ...]. - paddings (List[Tuple[int, int, int, int]]): List of padding tuples in the - format [(pad_t, pad_b, pad_l, pad_r), ...]. + src_ims: list of images, float32. + croppings: list of 4-tuples + [ + (crop_t, crop_b, crop_l, crop_r), + (crop_t, crop_b, crop_l, crop_r), + ... + ] + paddings: list of 4-tuples + [ + (pad_t, pad_b, pad_l, pad_r), + (pad_t, pad_b, pad_l, pad_r), + ... + ] Returns: - List[Float[np.ndarray, "h_cropped w_cropped 3"]]: List of cropped and padded - images as float32 arrays with shape (height_cropped, width_cropped, 3). + List[Float[np.ndarray, "h_cropped w_cropped 3"]]: List of cropped and padded images + as float32 arrays with shape (height_cropped, width_cropped, 3). Raises: - ValueError: If the number of croppings or paddings doesn't match the number - of images, or if any cropping tuple has invalid length. + ValueError: If the number of croppings or paddings doesn't match the number of images, + or if any cropping tuple has invalid length. """ num_ims = len(src_ims) if not len(croppings) == num_ims: @@ -253,23 +249,26 @@ def get_post_croppings_paddings_shapes( paddings: List[Tuple[int, int, int, int]], ) -> List[Tuple[int, int, int]]: """ - Calculate the shapes of images after applying cropping and padding. + Compute the shapes of images after applying cropping and padding. Args: - src_shapes (List[Tuple[int, int, int]]): List of source image shapes in - (height, width, channels) format. - croppings (List[Tuple[int, int, int, int]]): List of cropping tuples in the - format [(crop_t, crop_b, crop_l, crop_r), ...]. - paddings (List[Tuple[int, int, int, int]]): List of padding tuples in the - format [(pad_t, pad_b, pad_l, pad_r), ...]. + src_shapes: list of source image shapes. + croppings: list of 4-tuples + [ + (crop_t, crop_b, crop_l, crop_r), + (crop_t, crop_b, crop_l, crop_r), + ... + ] + paddings: list of 4-tuples + [ + (pad_t, pad_b, pad_l, pad_r), + (pad_t, pad_b, pad_l, pad_r), + ... + ] Returns: - List[Tuple[int, int, int]]: List of output shapes in (height, width, channels) - format after applying cropping and padding. - - Raises: - ValueError: If the number of croppings or paddings doesn't match the number - of source shapes. + List[Tuple[int, int, int]]: List of resulting image shapes after cropping and padding + in the format (height_cropped, width_cropped, channels). """ dst_shapes = [] for src_shape, cropping, padding in zip(src_shapes, croppings, paddings): @@ -327,9 +326,7 @@ def overlay_mask_on_rgb( assert overlay_color.max() <= 1.0 and overlay_color.min() >= 0.0 im_mask_stacked = np.dstack([im_mask, im_mask, im_mask]) - im_hard = ( - im_rgb * (1.0 - im_mask_stacked) + overlay_color * im_mask_stacked - ) + im_hard = im_rgb * (1.0 - im_mask_stacked) + overlay_color * im_mask_stacked im_soft = im_rgb * (1.0 - overlay_alpha) + im_hard * overlay_alpha return im_soft @@ -341,26 +338,31 @@ def ndc_coords_to_pixels( align_corners: bool = False, ) -> Float[np.ndarray, "n 2"]: """ - Convert normalized device coordinates (NDC) to pixel coordinates. + Convert Normalized Device Coordinates (NDC) to pixel coordinates. Args: - ndc_coords (Float[np.ndarray, "n 2"]): Input coordinates in NDC space - (-1 to 1). Shape is (n, 2) where n is the number of points. - im_size_wh (Tuple[int, int]): Image size in (width, height) format. - align_corners (bool): If True, extreme values (-1 and 1) are considered to - refer to the center points of the border pixels. If False, extreme - values refer to the outer edges of the border pixels. Default: False. + ndc_coords: NDC coordinates. Each row represents (x, y) or (c, r). + Most values shall be in [-1, 1], where (-1, -1) is the top left + corner and (1, 1) is the bottom right corner. + im_size_wh: Image size (width, height). + align_corners: Determines how NDC coordinates map to pixel coordinates: + - If True: -1 and 1 are aligned to the center of the corner pixels + - If False: -1 and 1 are aligned to the corner of the corner pixels + In general image interpolation: + - When align_corners=True: src and dst images are aligned by the center + point of their corner pixels + - When align_corners=False: src and dst images are aligned by the corner + points of the corner pixels + The NDC space does not have a "pixels size", so we precisely align the + extrema -1 and 1 to either the center or corner of the corner pixels. Returns: - Float[np.ndarray, "n 2"]: Pixel coordinates with shape (n, 2). The - coordinates are in (x, y) format, where x is the horizontal coordinate - and y is the vertical coordinate. + Float[np.ndarray, "n 2"]: Pixel coordinates as a float array with shape + (num_points, 2). Out-of-bound values are not corrected. Notes: - - NDC space has (-1, -1) at the top-left corner and (1, 1) at the - bottom-right corner. - - Pixel space has (0, 0) at the top-left corner and (w-1, h-1) at the - bottom-right corner. + This function is commonly used in computer graphics to map normalized + coordinates to specific pixel locations in an image. """ sanity.assert_shape(ndc_coords, (None, 2), name="ndc_coords") w, h = im_size_wh[:2] @@ -381,9 +383,7 @@ def ndc_coords_to_pixels( dst_tl = np.array([-0.5, -0.5], dtype=dtype) dst_br = np.array([w - 0.5, h - 0.5], dtype=dtype) - dst_pixels = (ndc_coords - src_tl) / (src_br - src_tl) * ( - dst_br - dst_tl - ) + dst_tl + dst_pixels = (ndc_coords - src_tl) / (src_br - src_tl) * (dst_br - dst_tl) + dst_tl return dst_pixels @@ -482,9 +482,7 @@ def recover_rotated_pixels(dst_pixels, src_wh, ccw_degrees): dst_pixels_recovered = np.stack([h - 1 - src_r, src_c], axis=1) else: raise ValueError(f"Invalid rotation angle: {ccw_degrees}.") - np.testing.assert_allclose( - dst_pixels, dst_pixels_recovered, rtol=1e-5, atol=1e-5 - ) + np.testing.assert_allclose(dst_pixels, dst_pixels_recovered, rtol=1e-5, atol=1e-5) return src_pixels @@ -504,57 +502,40 @@ def resize( ] = None, interpolation: int = cv2.INTER_LINEAR, ) -> Union[ - Float[np.ndarray, "h_new w_new"], - Float[np.ndarray, "h_new w_new 3"], - UInt8[np.ndarray, "h_new w_new"], - UInt8[np.ndarray, "h_new w_new 3"], - UInt16[np.ndarray, "h_new w_new"], - UInt16[np.ndarray, "h_new w_new 3"], + Float[np.ndarray, "h_ w_"], + Float[np.ndarray, "h_ w_ 3"], + UInt8[np.ndarray, "h_ w_"], + UInt8[np.ndarray, "h_ w_ 3"], + UInt16[np.ndarray, "h_ w_"], + UInt16[np.ndarray, "h_ w_ 3"], ]: """ - Resize an image to a target size. - - The image is resized using OpenCV's resize function with the specified - interpolation method. The target size can be specified in several ways: - - 1. Provide target_height and target_width - 2. Provide target_height and keep aspect ratio - 3. Provide target_width and keep aspect ratio - 4. Provide scale_factor to scale both dimensions + Resize an image to a specified width and height, optionally maintaining aspect ratio. Args: - image (Float[np.ndarray, "h w c"]): Input image array with shape - (height, width, channels). - - target_height (Optional[int]): Target height in pixels. If None, height - is determined by target_width and aspect ratio. - - target_width (Optional[int]): Target width in pixels. If None, width is - determined by target_height and aspect ratio. - - scale_factor (Optional[float]): Scale factor to apply to both dimensions. - If provided, target_height and target_width are ignored. - - interpolation (int): OpenCV interpolation method. Default: cv2.INTER_LINEAR. - Common options: - - cv2.INTER_NEAREST: Nearest neighbor - - cv2.INTER_LINEAR: Bilinear - - cv2.INTER_CUBIC: Bicubic - - cv2.INTER_LANCZOS4: Lanczos + im (Union[Float[np.ndarray, "h w"], Float[np.ndarray, "h w 3"], + UInt8[np.ndarray, "h w"], UInt8[np.ndarray, "h w 3"], + UInt16[np.ndarray, "h w"], UInt16[np.ndarray, "h w 3"]]): + Input image as a numpy array with shape (height, width) or (height, width, 3). + Supported dtypes: uint8, uint16, float32, float64. + shape_wh (Tuple[int, int]): Target size as (width, height) in pixels. + aspect_ratio_fill (Optional[Union[float, Tuple[float, float, float], np.ndarray]]): + Value(s) to use for padding when maintaining aspect ratio. If None, image is + directly resized without maintaining aspect ratio. If provided, must match + the number of channels in the input image. + interpolation (int): OpenCV interpolation method (e.g., cv2.INTER_LINEAR). Returns: - Float[np.ndarray, "h' w' c"]: Resized image array with shape - (new_height, new_width, channels). - - Example: - >>> # Resize to specific dimensions - >>> resized = ct.image.resize(image, target_height=480, target_width=640) - >>> - >>> # Resize keeping aspect ratio - >>> resized = ct.image.resize(image, target_height=480) - >>> - >>> # Scale by factor - >>> resized = ct.image.resize(image, scale_factor=0.5) + Union[Float[np.ndarray, "h_ w_"], Float[np.ndarray, "h_ w_ 3"], + UInt8[np.ndarray, "h_ w_"], UInt8[np.ndarray, "h_ w_ 3"], + UInt16[np.ndarray, "h_ w_"], UInt16[np.ndarray, "h_ w_ 3"]]: + Resized image with the same dtype as input. Shape will be (height, width) + or (height, width, 3) depending on input. + + Notes: + - When maintaining aspect ratio, the image is resized to fit within the target + dimensions and padded with aspect_ratio_fill values as needed. + - OpenCV uses (width, height) for image size while numpy uses (height, width). """ # Sanity: dtype. dtype = im.dtype @@ -612,9 +593,7 @@ def resize( if tmp_w == dst_w and tmp_h == dst_h: im_resize = im_tmp else: - im_resize = np.full( - dst_numpy_shape, fill_value=aspect_ratio_fill, dtype=dtype - ) + im_resize = np.full(dst_numpy_shape, fill_value=aspect_ratio_fill, dtype=dtype) im_resize[:tmp_h, :tmp_w] = im_tmp # Final sanity checks for the reshaped image. @@ -691,9 +670,7 @@ def recover_resized_pixels( src_br = np.array([src_w - 0.5, src_h - 0.5]) dst_tl = np.array([-0.5, -0.5]) dst_br = np.array([tmp_w - 0.5, tmp_h - 0.5]) - src_pixels = (dst_pixels - dst_tl) / (dst_br - dst_tl) * ( - src_br - src_tl - ) + src_tl + src_pixels = (dst_pixels - dst_tl) / (dst_br - dst_tl) * (src_br - src_tl) + src_tl return src_pixels @@ -713,36 +690,30 @@ def make_corres_image( sample_ratio: Optional[float] = None, ) -> Float[np.ndarray, "h 2*w 3"]: """ - Create a correspondence visualization image by combining two images side by side. + Make correspondence image. Args: - im_src (Float[np.ndarray, "h w 3"]): Source image as float32 array with - shape (height, width, 3). Values should be in range [0, 1]. - im_dst (Float[np.ndarray, "h w 3"]): Destination image as float32 array with - shape (height, width, 3). Values should be in range [0, 1]. - src_pixels (Int[np.ndarray, "n 2"]): Source pixel coordinates as int array - with shape (n, 2) in (x, y) format. - dst_pixels (Int[np.ndarray, "n 2"]): Destination pixel coordinates as int - array with shape (n, 2) in (x, y) format. - confidences (Optional[Float[np.ndarray, "n"]]): Confidence scores for each - correspondence. Values should be in range [0, 1]. Default: None. - texts (Optional[List[str]]): Text labels for each correspondence point. - Default: None. - point_color (Optional[Tuple[float, ...]]): Color for correspondence points - in RGBA format. Default: (0, 1, 0, 1.0). - line_color (Optional[Tuple[float, ...]]): Color for correspondence lines - in RGBA format. Default: (0, 0, 1, 0.75). - text_color (Tuple[float, float, float]): Color for text labels in RGB - format. Default: (1, 1, 1). - point_size (int): Size of correspondence points in pixels. Default: 1. - line_width (int): Width of correspondence lines in pixels. Default: 1. - sample_ratio (Optional[float]): If provided, randomly sample this ratio of - correspondences to display. Default: None. + im_src: (h, w, 3) float image, range 0-1. + im_dst: (h, w, 3) float image, range 0-1. + src_pixels: (n, 2) int array, each row represents (x, y) or (c, r). + dst_pixels: (n, 2) int array, each row represents (x, y) or (c, r). + confidences: (n,) float array, confidence of each corres, range [0, 1]. + texts: List of texts to draw on the top-left of the image. + point_color: RGB or RGBA color of the point, float, range 0-1. + - If point_color == None: + points will never be drawn. + - If point_color != None and confidences == None + point color will be determined by point_color. + - If point_color != None and confidences != None + point color will be determined by "viridis" colormap. + line_color: RGB or RGBA color of the line, float, range 0-1. + text_color: RGB color of the text, float, range 0-1. + point_size: Size of the point. + line_width: Width of the line. + sample_ratio: Float value from 0-1. If None, all points are drawn. Returns: - Float[np.ndarray, "h 2*w 3"]: Visualization image as float32 array with - shape (height, 2*width, 3), showing source and destination images side - by side with correspondence lines. + Correspondence image with shape (h, 2*w, 3). """ assert im_src.shape == im_dst.shape assert im_src.ndim == 3 and im_src.shape[2] == 3 @@ -759,9 +730,7 @@ def make_corres_image( if confidences is not None: assert len(confidences) == len(src_pixels) - assert ( - confidences.dtype == np.float32 or confidences.dtype == np.float64 - ) + assert confidences.dtype == np.float32 or confidences.dtype == np.float64 if confidences.size > 0: assert confidences.min() >= 0.0 and confidences.max() <= 1.0 assert confidences.ndim == 1 @@ -806,9 +775,7 @@ def make_corres_image( assert sample_ratio > 0.0 and sample_ratio <= 1.0 num_points = len(src_pixels) num_samples = int(round(num_points * sample_ratio)) - sample_indices = np.random.choice( - num_points, num_samples, replace=False - ) + sample_indices = np.random.choice(num_points, num_samples, replace=False) src_pixels = src_pixels[sample_indices] dst_pixels = dst_pixels[sample_indices] confidences = confidences[sample_indices] @@ -820,12 +787,8 @@ def make_corres_image( if confidences is None: # Draw white points as mask. - im_point_mask = np.zeros( - im_corres.shape[:2], dtype=im_corres.dtype - ) - for (src_c, src_r), (dst_c, dst_r) in zip( - src_pixels, dst_pixels - ): + im_point_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype) + for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels): cv2.circle( im_point_mask, (src_c, src_r), @@ -880,11 +843,7 @@ def make_corres_image( im_line_mask = np.zeros(im_corres.shape[:2], dtype=im_corres.dtype) for (src_c, src_r), (dst_c, dst_r) in zip(src_pixels, dst_pixels): cv2.line( - im_line_mask, - (src_c, src_r), - (dst_c + w, dst_r), - (1,), - line_width, + im_line_mask, (src_c, src_r), (dst_c + w, dst_r), (1,), line_width ) line_alpha = line_color[3] if len(line_color) == 4 else 1.0 @@ -996,9 +955,9 @@ def vstack_images( if alignment == "center" else max_width - im.shape[1] if alignment == "right" else 0 ) - im_stacked[ - curr_row : curr_row + im.shape[0], offset : offset + im.shape[1] - ] = im + im_stacked[curr_row : curr_row + im.shape[0], offset : offset + im.shape[1]] = ( + im + ) curr_row += im.shape[0] return im_stacked diff --git a/camtools/io.py b/camtools/io.py index 6f60c603..1c028d19 100644 --- a/camtools/io.py +++ b/camtools/io.py @@ -117,10 +117,12 @@ def imwrite_depth( Args: im_path (Union[str, Path]): Output file path. Must have .png extension. Parent directories will be created automatically if they don't exist. + im (Float[np.ndarray]): Depth map as a 2D numpy array. Must be: - Shape: (height, width) - Data type: float32 or float64 - Values: Depth values in meters (or other consistent units) + depth_scale (float, optional): Scaling factor to apply before converting to uint16. Defaults to 1000.0. This determines the precision of stored depth values. For example: @@ -135,16 +137,16 @@ def imwrite_depth( - When reading the depth map with imread_depth(), use the same depth_scale to recover the original depth values - The user is responsible for defining what is invalid depth. For example, - invalid depth can be represented as np.nan, np.inf, 0, -1, etc. This - function simply multiplies the depth by depth_scale and converts to - uint16. For instance, with depth_scale = 1000: - - - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] - - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] - - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] - - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] - + The user is responsible for defining what is invalid depth. E.g., + invalid depth can represented as np.nan, np.inf, 0, -1, etc. This + function simply multiplies the depth by depth_scale can convert to + uint16. For instance, with depth_scale = 1000, + - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] + - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] + - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] + - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] + ^ + Best practice. Note that -1 is converted to 64536 / 1000 = 64.536 meters, therefore, it is important to clip depth with min_depth and max_depth. The best practice is to use 0 as invalid depth. @@ -259,8 +261,7 @@ def imread( if im.shape[2] == 4: if alpha_mode is None: raise ValueError( - f"{im_path} has an alpha channel, alpha_mode " - f"must be specified." + f"{im_path} has an alpha channel, alpha_mode " f"must be specified." ) elif alpha_mode == "keep": pass @@ -279,8 +280,7 @@ def imread( im = im[..., :3] * im[..., 3:] else: raise ValueError( - f"Unexpected alpha_mode: {alpha_mode} for a " - "4-channel image." + f"Unexpected alpha_mode: {alpha_mode} for a " "4-channel image." ) elif im.shape[2] == 3: pass @@ -317,39 +317,57 @@ def imread_depth( depth_scale: float = 1000.0, ) -> Float[np.ndarray, "h w"]: """ - Read a depth map from a 16-bit PNG file and convert to float. + Read and normalize a 16-bit depth map from a PNG file. This function handles depth map reading by: - - Loading 16-bit PNG data - - Converting to float32 format - - Applying depth scale to recover original values + - Loading 16-bit depth values from PNG + - Converting to float32 + - Applying depth scale normalization - Validating input data Args: - im_path (Union[str, Path]): Path to the depth map PNG file. - depth_scale (float, optional): Scaling factor to convert from uint16 to - float. Defaults to 1000.0. Must match the scale used when saving - the depth map. For example: - - depth_scale=1000: 1mm precision - - depth_scale=100: 1cm precision - - depth_scale=1: 1m precision + im_path (Union[str, Path]): Path to the depth map file. Must be a 16-bit PNG. + + depth_scale (float, optional): Scale factor to divide depth values by. + Defaults to 1000.0. This should match the scale used when writing + the depth map with imwrite_depth(). Returns: - Float[np.ndarray, "h w"]: Depth map as a float32 array with shape - (height, width). Values are in the original units (typically meters). + Float[np.ndarray]: Depth map as a 2D numpy array with: + - Data type: float32 + - Shape: (height, width) + - Values: Depth values in meters (or other consistent units) Notes: - - Zero values in the PNG file are preserved as zeros in the output - - Non-zero values are divided by depth_scale to recover original depths - - Use the same depth_scale value that was used with imwrite_depth() + - Invalid depth values (0, 65535) are preserved in the output + - The depth_scale should match the one used during writing - For best results, use 0 to represent invalid depth values + - Depth values are not automatically clipped - the user should + handle clipping based on their specific requirements + + The user is responsible for defining what is invalid depth. E.g., + invalid depth can represented as np.nan, np.inf, 0, -1, etc. This + function simply multiplies the depth by depth_scale can convert to + uint16. For instance, with depth_scale = 1000, + - Input depths : [np.nan, np.inf, -np.inf, 0, -1, 3.14] + - Written to ".png": [ 0, 0, 0, 0, 64536, 3140] + - Read from ".png" : [ 0, 0, 0, 0, 64536, 3140] + - Convert to float : [ 0, 0, 0, 0, 64.536, 3.14] + ^ + Best practice. + Note that -1 is converted to 64536 / 1000 = 64.536 meters, therefore, + it is important to clip depth with min_depth and max_depth. The best + practice is to use 0 as invalid depth. Examples: - >>> # Read depth map saved with 1mm precision + >>> # Read depth map with 1mm precision >>> depth = imread_depth('depth.png', depth_scale=1000) - >>> # Read depth map saved with 1cm precision + >>> # Read depth map with 1cm precision >>> depth = imread_depth('depth.png', depth_scale=100) + + >>> # Read depth map with 1m precision + >>> depth = imread_depth('depth.png', depth_scale=1) """ im_path = Path(im_path) assert is_png_path(im_path), f"{im_path} is not a PNG file." diff --git a/camtools/metric.py b/camtools/metric.py index a1d7fd17..7b85349a 100644 --- a/camtools/metric.py +++ b/camtools/metric.py @@ -113,12 +113,7 @@ def image_lpips( loss_fn = lpips.LPIPS(net="alex") image_lpips.static_vars["loss_fn"] = loss_fn - ans = ( - loss_fn.forward(torch.tensor(pr), torch.tensor(gt)) - .cpu() - .detach() - .numpy() - ) + ans = loss_fn.forward(torch.tensor(pr), torch.tensor(gt)).cpu().detach().numpy() return float(ans) @@ -203,53 +198,34 @@ def load_im_pd_im_gt_im_mask_for_eval( im_mask_path: Optional[Union[str, Path]] = None, alpha_mode: str = "white", ) -> Tuple[ - Float[np.ndarray, "h w 3"], - Float[np.ndarray, "h w 3"], - Float[np.ndarray, "h w"], + Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w"] ]: """ - Load predicted image, ground truth image, and mask for evaluation. - - This function loads and preprocesses images for evaluation: - - 1. Load predicted and ground truth images - 2. Convert images to float32 and normalize to [0, 1] - 3. Load or create evaluation mask - 4. Apply mask to both images - 5. Validate image shapes and types + Load prediction, ground truth, and mask images for image metric evaluation. Args: - im_pd_path (str): Path to predicted image file. - - im_gt_path (str): Path to ground truth image file. - - im_mask_path (Optional[str]): Path to mask image file. If None, uses - entire image. Default: None. - - im_mask_value (Optional[float]): Value in mask image to use for - evaluation. Pixels with this value are included. Default: 255.0. + im_pd_path: Path to the rendered image. + im_gt_path: Path to the ground truth RGB or RGBA image. + im_mask_path: Path to the mask image. The mask will be resized to the + same (h, w) as im_gt. + alpha_mode: The mode on how to handle the alpha channel. Currently only + "white" is supported. + - "white": If im_gt contains alpha channel, im_gt will be converted + to RGB, the background will be rendered as white, the + alpha channel will be then ignored. + - "keep" : If im_gt contains alpha channel, the alpha channel will + be used as mask. This mask can be overwritten by + im_mask_path if im_mask_path is not None. + (This option is not implemented yet.) Returns: - Tuple[Float[np.ndarray, "h w c"], Float[np.ndarray, "h w c"], - Float[np.ndarray, "h w"]]: Tuple containing: - - - Predicted image array normalized to [0, 1] - - Ground truth image array normalized to [0, 1] - - Binary mask array where True indicates pixels to evaluate - - Example: - >>> # Load images with full evaluation mask - >>> im_pd, im_gt, mask = load_im_pd_im_gt_im_mask_for_eval( - ... 'pred.png', 'gt.png') - >>> - >>> # Load images with specific mask - >>> im_pd, im_gt, mask = load_im_pd_im_gt_im_mask_for_eval( - ... 'pred.png', 'gt.png', 'mask.png', 1.0) + im_pd: (h, w, 3), float32, value in [0, 1]. + im_gt: (h, w, 3), float32, value in [0, 1]. + im_mask: (h, w), float32, value only 0 or 1. Even if im_mask_path is + None, im_mask will be returned as all 1s. """ if alpha_mode != "white": - raise NotImplementedError( - 'Currently only alpha_mode="white" is supported.' - ) + raise NotImplementedError('Currently only alpha_mode="white" is supported.') # Prepare im_gt. # (h, w, 3) or (h, w, 4), float32. diff --git a/camtools/normalize.py b/camtools/normalize.py index d09e4bea..a4093cc7 100644 --- a/camtools/normalize.py +++ b/camtools/normalize.py @@ -2,9 +2,7 @@ from jaxtyping import Float -def compute_normalize_mat( - points: Float[np.ndarray, "n 3"] -) -> Float[np.ndarray, "4 4"]: +def compute_normalize_mat(points: Float[np.ndarray, "n 3"]) -> Float[np.ndarray, "4 4"]: """ Args: points: (N, 3) numpy array. diff --git a/camtools/raycast.py b/camtools/raycast.py index 3c5d1918..6967f22e 100644 --- a/camtools/raycast.py +++ b/camtools/raycast.py @@ -75,50 +75,36 @@ def mesh_to_im_distance( """ Generate a distance image by ray casting a mesh from a given camera view. - The distance image contains the Euclidean distance between each 3D point on - the mesh surface and the camera center. The ray casting follows the equation: - + The distance image contains the Euclidean distance from the camera center to + the mesh surface for each pixel. The ray casting follows the equation: distance = ||C - P|| - where: - C is the camera center in world coordinates - P is the intersection point on the mesh surface - ||·|| denotes the Euclidean norm - Note: - A distance image shows the actual 3D distance from the camera center to - each surface point, while a depth image shows the z-coordinate in camera - space. Use ct.convert.im_distance_to_im_depth to convert between them. + Example usage: + # Create distance image for a 640x480 view + distance_image = ct.raycast.mesh_to_im_distance(mesh, K, T, 480, 640) + # Visualize distances + plt.imshow(distance_image) + plt.colorbar() Args: - mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. - - K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. - - T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera - transformation). - - height (int): Image height in pixels. - - width (int): Image width in pixels. + mesh: Open3D TriangleMesh to be ray casted. + K: (3, 3) camera intrinsic matrix. + T: (4, 4) camera extrinsic matrix (world-to-camera transformation). + height: Image height in pixels. + width: Image width in pixels. Returns: - Float[np.ndarray, "h w"]: Distance image as a float32 array with shape - (height, width). Each pixel contains the distance from the camera - center to the mesh surface. Invalid distances (no intersection) are - set to np.inf. - - Example: - >>> # Create distance image for a 640x480 view - >>> distance_image = ct.raycast.mesh_to_im_distance(mesh, K, T, 480, 640) - >>> # Visualize distances - >>> plt.imshow(distance_image) - >>> plt.colorbar() - - Note: - For casting the same mesh with multiple camera views, use - mesh_to_im_distances for better efficiency as it avoids repeated scene - setup. + (height, width) float32 array representing the distance image. Each + pixel contains the distance from the camera center to the mesh surface. + Invalid distances (no intersection) are set to np.inf. + + Note: For casting the same mesh with multiple camera views, use + mesh_to_im_distances for better efficiency as it avoids repeated scene + setup. """ im_distances = mesh_to_im_distances( mesh=mesh, @@ -143,50 +129,36 @@ def mesh_to_im_distances( Generate multiple distance images by ray casting a mesh from different views. For each camera view, generates a distance image containing the Euclidean - distance between each 3D point on the mesh surface and the camera center. - The distances are calculated as: - - distance = ||C_i - P_i|| - + distance from the camera center to the mesh surface. The distances are + calculated as: + distance = ||C_i - P_i|| where: - - C_i is the camera center for view i - - P_i is the intersection point on the mesh surface for view i - - ||·|| denotes the Euclidean norm + - C_i is the camera center for view i + - P_i is the intersection point on the mesh surface for view i + - ||·|| denotes the Euclidean norm - Note: - A distance image shows the actual 3D distance from the camera center to - each surface point, while a depth image shows the z-coordinate in camera - space. Use ct.convert.im_distance_to_im_depth to convert between them. + Example usage: + # Create distance images for 3 different views + distances = ct.raycast.mesh_to_im_distances(mesh, Ks, Ts, 480, 640) + # Visualize first view's distances + plt.imshow(distances[0]) + plt.colorbar() Args: - mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. - - Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for - N views. - - Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices - (world-to-camera transformations) for N views. - - height (int): Image height in pixels. - - width (int): Image width in pixels. + mesh: Open3D TriangleMesh to be ray casted. + Ks: (N, 3, 3) array of camera intrinsic matrices for N views. + Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera + transformations) for N views. + height: Image height in pixels. + width: Image width in pixels. Returns: - Float[np.ndarray, "n h w"]: Distance images as a float32 array with shape - (N, height, width). Each image contains the distances from the - corresponding camera center to the mesh surface. Invalid distances - (no intersection) are set to np.inf. - - Example: - >>> # Create distance images for 3 different views - >>> distances = ct.raycast.mesh_to_im_distances(mesh, Ks, Ts, 480, 640) - >>> # Visualize first view's distances - >>> plt.imshow(distances[0]) - >>> plt.colorbar() - - Note: - This function is more efficient than calling mesh_to_im_distance multiple - times as it only sets up the ray casting scene once. + (N, height, width) float32 array representing the distance images. Each + image contains the distances from the corresponding camera center to the + mesh surface. Invalid distances (no intersection) are set to np.inf. + + Note: This function is more efficient than calling mesh_to_im_distance + multiple times as it only sets up the ray casting scene once. """ for K in Ks: sanity.assert_K(K) @@ -225,52 +197,37 @@ def mesh_to_im_depth( width: int, ) -> Float[np.ndarray, "h w"]: """ - Generate a depth image (z-depth) by ray casting a mesh from a camera view. - - The depth image contains the z-coordinate of each 3D point on the mesh - surface in camera coordinates. This represents the perpendicular distance - from the camera plane to the surface point. + Generate a depth image by ray casting a mesh from a given camera view. - Args: - mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. - - K (Float[np.ndarray, "3 3"]): Camera intrinsic matrix. Format: - - [[fx, 0, cx], - [0, fy, cy], - [0, 0, 1]] - - where fx, fy are focal lengths and cx, cy are principal points. - - T (Float[np.ndarray, "4 4"]): Camera extrinsic matrix (world-to-camera - transformation). Format: - - [[R | t], - [0 | 1]] - - where R is a 3x3 rotation matrix and t is a 3D translation vector. + The depth image contains the z-coordinate of the mesh surface in the camera + coordinate system for each pixel. The depth is calculated as: + depth = (distance * f) / sqrt(u² + v² + f²) + where: + - distance is the Euclidean distance from camera center to surface point + - f is the focal length from the intrinsic matrix K + - (u, v) are the pixel coordinates in the camera plane - height (int): Image height in pixels. + Example usage: + # Create depth image for a 640x480 view + depth_image = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) + # Visualize depths + plt.imshow(depth_image) + plt.colorbar() - width (int): Image width in pixels. + Args: + mesh: Open3D TriangleMesh to be ray casted. + K: (3, 3) camera intrinsic matrix. + T: (4, 4) camera extrinsic matrix (world-to-camera transformation). + height: Image height in pixels. + width: Image width in pixels. Returns: - Float[np.ndarray, "h w"]: Depth image as a float32 array with shape - (height, width). Each pixel contains the z-coordinate of the mesh - surface in camera space. Invalid depths (no intersection) are - set to np.inf. - - Note: - A depth image shows the z-coordinate in camera space, while a distance - image shows the actual 3D distance from the camera center to each surface - point. Use ct.convert.im_depth_to_im_distance to convert between them. - - Example: - >>> # Create depth image from camera view - >>> depth = ct.raycast.mesh_to_im_depth(mesh, K, T, 480, 640) - >>> # Visualize depths - >>> plt.imshow(depth) - >>> plt.colorbar() + (height, width) float32 array representing the depth image. Each + pixel contains the z-coordinate of the mesh surface in camera space. + Invalid depths (no intersection) are set to np.inf. + + Note: This function internally uses mesh_to_im_distance and converts the + distances to depths using the camera intrinsic parameters. """ im_distance = mesh_to_im_distance(mesh, K, T, height, width) im_depth = convert.im_distance_to_im_depth(im_distance, K) @@ -285,47 +242,38 @@ def mesh_to_im_depths( width: int, ) -> Float[np.ndarray, "n h w"]: """ - Generate multiple depth images (z-depth) by ray casting a mesh from different - views. + Generate multiple depth images by ray casting a mesh from different views. - Each depth image contains the z-coordinate of each 3D point on the mesh - surface in the corresponding camera coordinates. This represents the - perpendicular distance from the camera plane to the surface point. - - Args: - mesh (o3d.geometry.TriangleMesh): Open3D TriangleMesh to be ray casted. - - Ks (Float[np.ndarray, "n 3 3"]): Array of camera intrinsic matrices for - N views. - - Ts (Float[np.ndarray, "n 4 4"]): Array of camera extrinsic matrices - (world-to-camera transformations) for N views. + For each camera view, generates a depth image containing the z-coordinate of + the mesh surface in the camera coordinate system. The depths are calculated as: + depth = (distance * f) / sqrt(u² + v² + f²) + where: + - distance is the Euclidean distance from camera center to surface point + - f is the focal length from the intrinsic matrix K + - (u, v) are the pixel coordinates in the camera plane - height (int): Image height in pixels. + Example usage: + # Create depth images for 3 different views + depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640) + # Visualize first view's depths + plt.imshow(depths[0]) + plt.colorbar() - width (int): Image width in pixels. + Args: + mesh: Open3D TriangleMesh to be ray casted. + Ks: (N, 3, 3) array of camera intrinsic matrices for N views. + Ts: (N, 4, 4) array of camera extrinsic matrices (world-to-camera + transformations) for N views. + height: Image height in pixels. + width: Image width in pixels. Returns: - Float[np.ndarray, "n h w"]: Depth images as a float32 array with shape - (N, height, width). Each image contains the z-coordinates of the mesh - surface in the corresponding camera space. Invalid depths (no - intersection) are set to np.inf. - - Note: - A depth image shows the z-coordinate in camera space, while a distance - image shows the actual 3D distance from the camera center to each surface - point. Use ct.convert.im_depth_to_im_distance to convert between them. - - Example: - >>> # Create depth images for 3 different views - >>> depths = ct.raycast.mesh_to_im_depths(mesh, Ks, Ts, 480, 640) - >>> # Visualize first view's depths - >>> plt.imshow(depths[0]) - >>> plt.colorbar() - - Note: - This function internally uses mesh_to_im_distances and converts the - distances to depths using the camera intrinsic parameters. + (N, height, width) float32 array representing the depth images. Each + image contains the z-coordinates of the mesh surface in camera space. + Invalid depths (no intersection) are set to np.inf. + + Note: This function internally uses mesh_to_im_distances and converts the + distances to depths using the camera intrinsic parameters. """ im_distances = mesh_to_im_distances(mesh, Ks, Ts, height, width) im_depths = np.stack( diff --git a/camtools/render.py b/camtools/render.py index 27804454..de01f3b0 100644 --- a/camtools/render.py +++ b/camtools/render.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Optional, Union +from typing import List, Tuple, Optional import numpy as np import open3d as o3d @@ -19,86 +19,75 @@ def render_geometries( line_radius: Optional[float] = None, to_depth: bool = False, visible: bool = False, -) -> Union[Float[np.ndarray, "h w 3"], Float[np.ndarray, "h w"]]: +) -> Float[np.ndarray, "h w 3"]: """ Render Open3D geometries to an image using the specified camera parameters. + This function may require a display. The rendering follows the standard pinhole camera model: + λ[x, y, 1]^T = K @ [R | t] @ [X, Y, Z, 1]^T + where: + - [X, Y, Z, 1]^T is a homogeneous 3D point in world coordinates + - [R | t] is the 3x4 extrinsic matrix (world-to-camera transformation) + - K is the 3x3 intrinsic matrix + - [x, y, 1]^T is the projected homogeneous 2D point in pixel coordinates + - λ is the depth value + + Example usage: + # Create some geometries + mesh = o3d.geometry.TriangleMesh.create_box() + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(np.random.rand(100, 3)) - λ[x, y, 1]^T = K @ [R | t] @ [X, Y, Z, 1]^T + # Render with default camera + image = render_geometries([mesh, pcd]) - where: - - [X, Y, Z, 1]^T is a homogeneous 3D point in world coordinates - - [R | t] is the 3x4 extrinsic matrix (world-to-camera transformation) - - K is the 3x3 intrinsic matrix - - [x, y, 1]^T is the projected homogeneous 2D point in pixel coordinates - - λ is the depth value + # Render with specific camera parameters + K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) + T = np.eye(4) + depth_image = render_geometries([mesh], K=K, T=T, to_depth=True) Args: - geometries (List[o3d.geometry.Geometry3D]): List of Open3D geometries to - render. Supported types include: + geometries: List of Open3D geometries to render. Supported types include: - TriangleMesh - PointCloud - LineSet - - K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, - uses Open3D's default camera inferred from the geometries. Must be - provided if T is provided. Format: - - [[fx, 0, cx], - [0, fy, cy], - [0, 0, 1]] - - where fx, fy are focal lengths and cx, cy are principal points. - - T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix - (world-to-camera transformation). If None, uses Open3D's default - camera. Must be provided if K is provided. Format: - - [[R | t], - [0 | 1]] - - where R is a 3x3 rotation matrix and t is a 3D translation vector. - - view_status_str (Optional[str]): JSON string containing viewing camera - parameters from o3d.visualization.Visualizer.get_view_status(). - Does not include window size or point size. - - height (int): Height of the output image in pixels. Default: 720. - - width (int): Width of the output image in pixels. Default: 1280. - - point_size (float): Size of points for PointCloud objects, in pixels. - Default: 1.0. - - line_radius (Optional[float]): Radius of lines for LineSet objects, in - world units. When set, LineSets are converted to cylinder meshes. - Unlike point_size, this is in world metric space. Default: None. - - to_depth (bool): If True, renders a depth image instead of RGB. Invalid - depths are set to 0. Default: False. - - visible (bool): If True, shows the rendering window. Default: False. + K: (3, 3) camera intrinsic matrix. If None, uses Open3D's default camera + inferred from the geometries. Must be provided if T is provided. + The intrinsic matrix follows the format: + [[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]] + where: + - fx, fy: focal lengths in pixels + - cx, cy: principal point coordinates + T: (4, 4) camera extrinsic matrix (world-to-camera transformation). + If None, uses Open3D's default camera inferred from the geometries. + Must be provided if K is provided. The extrinsic matrix follows the + format: + [[R | t], + [0 | 1]] + where: + - R: (3, 3) rotation matrix + - t: (3,) translation vector + view_status_str: JSON string containing viewing camera parameters from + o3d.visualization.Visualizer.get_view_status(). This does not + include window size or point size. + height: Height of the output image in pixels. + width: Width of the output image in pixels. + point_size: Size of points for PointCloud objects, in pixels. + line_radius: Radius of lines for LineSet objects, in world units. When + set, LineSets are converted to cylinder meshes with this radius. + Unlike point_size, this is in world metric space, not pixel space. + to_depth: If True, renders a depth image instead of RGB. Invalid depths + are set to 0. + visible: If True, shows the rendering window. Returns: - Float[np.ndarray, "h w 3"]: If to_depth is False, returns an RGB image - array with shape (height, width, 3) and values in [0, 1]. If - to_depth is True, returns a depth image array with shape - (height, width) and depth values in world units. - - Example: - >>> # Create some geometries - >>> mesh = o3d.geometry.TriangleMesh.create_box() - >>> pcd = o3d.geometry.PointCloud() - >>> pcd.points = o3d.utility.Vector3dVector(np.random.rand(100, 3)) - >>> - >>> # Render with default camera - >>> image = render_geometries([mesh, pcd]) - >>> - >>> # Render with specific camera parameters - >>> K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) - >>> T = np.eye(4) - >>> depth_image = render_geometries([mesh], K=K, T=T, to_depth=True) + If to_depth is False: + (H, W, 3) float32 RGB image array with values in [0, 1] + If to_depth is True: + (H, W) float32 depth image array with depth values in world units """ if not isinstance(geometries, list): @@ -176,62 +165,62 @@ def get_render_view_status_str( ) -> str: """ Get a view status string containing camera parameters from Open3D visualizer. + This is useful for rendering multiple geometries with consistent camera views. + This function may require a display. The view status string contains camera parameters in JSON format, including: - - Camera position and orientation - Field of view - Zoom level - Other view control settings + Example usage: + # Get view status for default camera + view_str = get_render_view_status_str([mesh, pcd]) + + # Get view status for specific camera + K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) + T = np.eye(4) + view_str = get_render_view_status_str([mesh], K=K, T=T) + + # Use view status for consistent rendering + image1 = render_geometries([mesh], view_status_str=view_str) + image2 = render_geometries([pcd], view_status_str=view_str) + Args: - geometries (List[o3d.geometry.Geometry3D]): List of Open3D geometries to - set up the view. Supported types: + geometries: List of Open3D geometries to set up the view. Supported types: - TriangleMesh - PointCloud - LineSet - - K (Optional[Float[np.ndarray, "3 3"]]): Camera intrinsic matrix. If None, - uses Open3D's default camera inferred from the geometries. Must be - provided if T is provided. Format: - + K: (3, 3) camera intrinsic matrix. If None, uses Open3D's default camera + inferred from the geometries. Must be provided if T is provided. + The intrinsic matrix follows the format: [[fx, 0, cx], [0, fy, cy], [0, 0, 1]] - - where fx, fy are focal lengths and cx, cy are principal points. - - T (Optional[Float[np.ndarray, "4 4"]]): Camera extrinsic matrix - (world-to-camera transformation). If None, uses Open3D's default - camera. Must be provided if K is provided. Format: - + where: + - fx, fy: focal lengths in pixels + - cx, cy: principal point coordinates + T: (4, 4) camera extrinsic matrix (world-to-camera transformation). + If None, uses Open3D's default camera inferred from the geometries. + Must be provided if K is provided. The extrinsic matrix follows the + format: [[R | t], [0 | 1]] - - where R is a 3x3 rotation matrix and t is a 3D translation vector. - - height (int): Height of the view window in pixels. Default: 720. - - width (int): Width of the view window in pixels. Default: 1280. + where: + - R: (3, 3) rotation matrix + - t: (3,) translation vector + height: Height of the view window in pixels. + width: Width of the view window in pixels. Returns: - str: JSON string containing camera view parameters from - o3d.visualization.Visualizer.get_view_status(). This includes camera - position, orientation, field of view, zoom level, and other view - control settings. Does not include window size or point size. - - Example: - >>> # Get view status for default camera - >>> view_str = get_render_view_status_str([mesh, pcd]) - >>> - >>> # Get view status for specific camera - >>> K = np.array([[1000, 0, 640], [0, 1000, 360], [0, 0, 1]]) - >>> T = np.eye(4) - >>> view_str = get_render_view_status_str([mesh], K=K, T=T) - >>> - >>> # Use view status for consistent rendering - >>> image1 = render_geometries([mesh], view_status_str=view_str) - >>> image2 = render_geometries([pcd], view_status_str=view_str) + JSON string containing camera view parameters from + o3d.visualization.Visualizer.get_view_status(). This includes: + - Camera position and orientation + - Field of view + - Zoom level + - Other view control settings + Note: Does not include window size or point size. """ if not isinstance(geometries, list): raise TypeError("geometries must be a list of Open3D geometries.") @@ -417,9 +406,7 @@ def align_vector_to_another( axis = np.cross(a, b) axis /= np.linalg.norm(axis) angle = np.arccos( - np.clip( - np.dot(a / np.linalg.norm(a), b / np.linalg.norm(b)), -1.0, 1.0 - ) + np.clip(np.dot(a / np.linalg.norm(a), b / np.linalg.norm(b)), -1.0, 1.0) ) return axis, angle @@ -443,13 +430,9 @@ def normalized(a: np.ndarray) -> Tuple[np.ndarray, float]: start_point, end_point = points[line[0]], points[line[1]] line_segment = end_point - start_point line_segment_unit, line_length = normalized(line_segment) - axis, angle = align_vector_to_another( - np.array([0, 0, 1]), line_segment_unit - ) + axis, angle = align_vector_to_another(np.array([0, 0, 1]), line_segment_unit) translation = start_point + line_segment * 0.5 - cylinder = o3d.geometry.TriangleMesh.create_cylinder( - radius, line_length - ) + cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius, line_length) cylinder.translate(translation, relative=False) if not np.isclose(angle, 0): axis_angle = axis * angle @@ -705,9 +688,7 @@ def render_texts( (0, 0), ( (max_width - im.shape[1]) // 2, - max_width - - im.shape[1] - - (max_width - im.shape[1]) // 2, + max_width - im.shape[1] - (max_width - im.shape[1]) // 2, ), (0, 0), ), diff --git a/camtools/sanity.py b/camtools/sanity.py index f78800f9..5d2d4ccd 100644 --- a/camtools/sanity.py +++ b/camtools/sanity.py @@ -16,9 +16,7 @@ def assert_numpy(x, name=None): """ if not isinstance(x, np.ndarray): maybe_name = f" {name}" if name is not None else "" - raise ValueError( - f"Expected{maybe_name} to be numpy array, but got {type(x)}." - ) + raise ValueError(f"Expected{maybe_name} to be numpy array, but got {type(x)}.") def assert_K(K: Float[np.ndarray, "3 3"]): @@ -41,9 +39,7 @@ def assert_K(K: Float[np.ndarray, "3 3"]): ValueError: If K is not a 3x3 matrix """ if K.shape != (3, 3): - raise ValueError( - f"K must has shape (3, 3), but got {K} of shape {K.shape}." - ) + raise ValueError(f"K must has shape (3, 3), but got {K} of shape {K.shape}.") def assert_T(T: Float[np.ndarray, "4 4"]): @@ -67,14 +63,10 @@ def assert_T(T: Float[np.ndarray, "4 4"]): ValueError: If T is not a 4x4 matrix or bottom row is not [0, 0, 0, 1] """ if T.shape != (4, 4): - raise ValueError( - f"T must has shape (4, 4), but got {T} of shape {T.shape}." - ) + raise ValueError(f"T must has shape (4, 4), but got {T} of shape {T.shape}.") is_valid = np.allclose(T[3, :], np.array([0, 0, 0, 1])) if not is_valid: - raise ValueError( - f"T must has [0, 0, 0, 1] the bottom row, but got {T}." - ) + raise ValueError(f"T must has [0, 0, 0, 1] the bottom row, but got {T}.") def assert_pose(pose: Float[np.ndarray, "4 4"]): @@ -90,14 +82,13 @@ def assert_pose(pose: Float[np.ndarray, "4 4"]): - R is a 3x3 rotation matrix - t is a 3x1 translation vector - Bottom row must be [0, 0, 0, 1] - The pose matrix is the inverse of the extrinsic matrix T. Args: - pose (Float[np.ndarray, "4 4"]): Camera pose matrix to validate. + pose: Camera pose matrix to validate Raises: - ValueError: If pose is not a 4x4 matrix or bottom row is not [0, 0, 0, 1]. + ValueError: If pose is not a 4x4 matrix or bottom row is not [0, 0, 0, 1] """ if pose.shape != (4, 4): raise ValueError( @@ -105,40 +96,25 @@ def assert_pose(pose: Float[np.ndarray, "4 4"]): ) is_valid = np.allclose(pose[3, :], np.array([0, 0, 0, 1])) if not is_valid: - raise ValueError( - f"pose must has [0, 0, 0, 1] the bottom row, but got {pose}." - ) + raise ValueError(f"pose must has [0, 0, 0, 1] the bottom row, but got {pose}.") def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): """ Assert that an array has the expected shape. - The shape can be specified in several ways: - - 1. Exact shape: (3, 4) matches only arrays with shape (3, 4) - 2. Wildcard: (-1, 4) matches arrays with any first dimension and 4 columns - 3. Multiple wildcards: (-1, -1, 3) matches any HxWx3 array + The shape pattern can contain None values to indicate that dimension can be + any size. For example: + - (None, 3) matches any 2D array where the second dimension is 3 + - (3, None, 3) matches any 3D array where first and last dimensions are 3 Args: - array (np.ndarray): Input array to check. - - expected_shape (Tuple[int, ...]): Expected shape tuple. Use -1 as a - wildcard to match any size in that dimension. - - name (Optional[str]): Name of the array for error messages. If None, - uses 'array'. Default: None. + x: Array to validate + shape: Tuple of expected dimensions (can contain None for flexible dimensions) + name: Optional name of the variable for error message Raises: - AssertionError: If the array shape does not match the expected shape. - - Example: - >>> # Check exact shape - >>> assert_shape(array, (100, 3), 'points') - >>> - >>> # Check shape with wildcards - >>> assert_shape(image, (-1, -1, 3), 'image') # Any HxWx3 array - >>> assert_shape(points, (-1, 3), 'points') # Any Nx3 array + ValueError: If array dimensions don't match the expected shape pattern """ shape_valid = True @@ -154,9 +130,7 @@ def assert_shape(x: np.ndarray, shape: tuple, name: Optional[str] = None): if not shape_valid: name_must = f"{name} must" if name is not None else "Must" - raise ValueError( - f"{name_must} has shape {shape}, but got shape {x.shape}." - ) + raise ValueError(f"{name_must} has shape {shape}, but got shape {x.shape}.") def assert_shape_ndim(x: np.ndarray, ndim: int, name: Optional[str] = None): @@ -173,9 +147,7 @@ def assert_shape_ndim(x: np.ndarray, ndim: int, name: Optional[str] = None): """ if x.ndim != ndim: name_must = f"{name} must" if name is not None else "Must" - raise ValueError( - f"{name_must} have {ndim} dimensions, but got {x.ndim}." - ) + raise ValueError(f"{name_must} have {ndim} dimensions, but got {x.ndim}.") def assert_shape_nx3(x: np.ndarray, name: Optional[str] = None): diff --git a/camtools/solver.py b/camtools/solver.py index 363354f9..bd7996d5 100644 --- a/camtools/solver.py +++ b/camtools/solver.py @@ -23,13 +23,9 @@ def line_intersection_3d( https://math.stackexchange.com/a/1762491/209055 """ if src_points.ndim != 2 or src_points.shape[1] != 3: - raise ValueError( - f"src_points must be (N, 3), but got {src_points.shape}." - ) + raise ValueError(f"src_points must be (N, 3), but got {src_points.shape}.") if dst_points.ndim != 2 or dst_points.shape[1] != 3: - raise ValueError( - f"dst_points must be (N, 3), but got {dst_points.shape}." - ) + raise ValueError(f"dst_points must be (N, 3), but got {dst_points.shape}.") dirs = dst_points - src_points dirs = dirs / np.linalg.norm(dirs, axis=1).reshape((-1, 1)) @@ -212,9 +208,7 @@ def points_to_mesh_distances( np.ndarray: Array of distances with shape (N,). """ if not points.ndim == 2 or points.shape[1] != 3: - raise ValueError( - f"Expected points of shape (N, 3), but got {points.shape}." - ) + raise ValueError(f"Expected points of shape (N, 3), but got {points.shape}.") mesh_t = o3d.t.geometry.TriangleMesh.from_legacy(mesh) scene = o3d.t.geometry.RaycastingScene() _ = scene.add_triangles(mesh_t) diff --git a/camtools/tools/cli.py b/camtools/tools/cli.py index 87a76b6d..a2f2d5e1 100644 --- a/camtools/tools/cli.py +++ b/camtools/tools/cli.py @@ -4,9 +4,7 @@ def _print_greetings(): - greeting_str = ( - f"* CamTools: Camera Tools for Computer Vision (v{ct.__version__}) *" - ) + greeting_str = f"* CamTools: Camera Tools for Computer Vision (v{ct.__version__}) *" header = "*" * len(greeting_str) print(header) print(greeting_str) diff --git a/camtools/tools/compress_images.py b/camtools/tools/compress_images.py index 236053b6..350e3f85 100644 --- a/camtools/tools/compress_images.py +++ b/camtools/tools/compress_images.py @@ -82,9 +82,7 @@ def entry_point(parser, args): # Handle PNG file's alpha channel. src_paths_with_alpha = [] - png_paths = [ - src_path for src_path in src_paths if ct.io.is_png_path(src_path) - ] + png_paths = [src_path for src_path in src_paths if ct.io.is_png_path(src_path)] for src_path in png_paths: im = ct.io.imread(src_path, alpha_mode="keep") if im.shape[2] == 4: @@ -185,12 +183,8 @@ def entry_point(parser, args): print(f" - compression_ratio: {compression_ratio:.2f}") # Update text files. - src_paths = [ - stat["src_path"] for stat in stats if not stat["is_direct_copy"] - ] - dst_paths = [ - stat["dst_path"] for stat in stats if not stat["is_direct_copy"] - ] + src_paths = [stat["src_path"] for stat in stats if not stat["is_direct_copy"]] + dst_paths = [stat["dst_path"] for stat in stats if not stat["is_direct_copy"]] if num_compressed > 0 and update_texts_in_dir is not None: do_update_texts_in_dir( src_paths=src_paths, @@ -251,46 +245,36 @@ def compress_image_and_return_stat( min_jpg_compression_ratio: float, ): """ - Compress an image and return compression statistics. - - This function compresses an image using the specified quality level and - returns statistics about the compression: - - 1. Original file size - 2. Compressed file size - 3. Compression ratio - 4. Mean squared error (MSE) - 5. Peak signal-to-noise ratio (PSNR) - 6. Structural similarity index (SSIM) + Compress image and return stats. Args: - im_path (str): Path to the input image file. - - out_path (str): Path to save the compressed image. - - quality (int): JPEG compression quality level (0-100). Higher values - give better quality but larger file sizes. Default: 95. - - verbose (bool): If True, prints compression statistics. Default: False. + src_path: Path to image. + - Only ".jpg" or ".png" is supported. + - Directory will be created if it does not exist. + dst_path: Path to image. + - Only ".jpg" or ".png" is supported. + - Directory will be created if it does not exist. + quality: Quality of the output JPEG image, 1-100. Default is 95. + delete_src: If True, the src_path will be deleted. + min_jpg_compression_ratio: Minimum compression ratio for jpg->jpg + compression. If the compression ratio is above this value, the image + will not be compressed. This avoids compressing an image that is + already compressed. Returns: - Dict[str, Union[int, float]]: Dictionary containing compression - statistics: - - 'original_size': Original file size in bytes - - 'compressed_size': Compressed file size in bytes - - 'compression_ratio': Ratio of original to compressed size - - 'mse': Mean squared error between original and compressed - - 'psnr': Peak signal-to-noise ratio in dB - - 'ssim': Structural similarity index (0-1) - - Example: - >>> # Compress with default quality - >>> stats = compress_image_and_return_stat('input.png', 'output.jpg') - >>> print(f"Compression ratio: {stats['compression_ratio']:.2f}x") - >>> - >>> # Compress with specific quality and print stats - >>> stats = compress_image_and_return_stat('input.png', 'output.jpg', - ... quality=80, verbose=True) + stat: A dictionary of stats. + { + "src_path": Path to the source image. + "dst_path": Path to the destination image. + "src_size": Size of the source image in bytes. + "dst_size": Size of the destination image in bytes. + "compression_ratio": Compression ratio. + "is_direct_copy": True if the image is already compressed. + } + + Notes: + - You should not use this to save a depth image (typically uint16). + - Float image will get a range check to ensure it is in [0, 1]. """ stat = {} @@ -349,9 +333,7 @@ def is_text_file(path): root_dir = Path(root_dir) text_paths = list(root_dir.glob("**/*")) - text_paths = [ - text_path for text_path in text_paths if is_text_file(text_path) - ] + text_paths = [text_path for text_path in text_paths if is_text_file(text_path)] return text_paths diff --git a/camtools/tools/crop_boarders.py b/camtools/tools/crop_boarders.py index cf9bc6e4..67f31ead 100644 --- a/camtools/tools/crop_boarders.py +++ b/camtools/tools/crop_boarders.py @@ -74,13 +74,9 @@ def entry_point(parser, args): The parser argument is not used. """ if args.pad_pixel < 0: - raise ValueError( - f"pad_pixel must be non-negative, but got {args.pad_pixel}" - ) + raise ValueError(f"pad_pixel must be non-negative, but got {args.pad_pixel}") if args.pad_ratio < 0: - raise ValueError( - f"pad_ratio must be non-negative, but got {args.pad_ratio}" - ) + raise ValueError(f"pad_ratio must be non-negative, but got {args.pad_ratio}") # Determine src and dst paths. if isinstance(args.input, list): @@ -99,8 +95,7 @@ def entry_point(parser, args): else: if args.skip_cropped: dst_paths = [ - src_path.parent / f"cropped_{src_path.name}" - for src_path in src_paths + src_path.parent / f"cropped_{src_path.name}" for src_path in src_paths ] skipped_src_paths = [p for p in src_paths if p in dst_paths] src_paths = [p for p in src_paths if p not in dst_paths] @@ -109,8 +104,7 @@ def entry_point(parser, args): for src_path in skipped_src_paths: print(f" - {src_path}") dst_paths = [ - src_path.parent / f"cropped_{src_path.name}" - for src_path in src_paths + src_path.parent / f"cropped_{src_path.name}" for src_path in src_paths ] # Read. @@ -118,13 +112,9 @@ def entry_point(parser, args): for src_im in src_ims: if not src_im.dtype == np.float32: - raise ValueError( - f"Input image {src_path} must be of dtype float32." - ) + raise ValueError(f"Input image {src_path} must be of dtype float32.") if not src_im.ndim == 3 or not src_im.shape[2] == 3: - raise ValueError( - f"Input image {src_path} must be of shape (H, W, 3)." - ) + raise ValueError(f"Input image {src_path} must be of shape (H, W, 3).") num_ims = len(src_ims) # Compute. @@ -133,26 +123,19 @@ def entry_point(parser, args): shapes = [im.shape for im in src_ims] if not all([s == shapes[0] for s in shapes]): raise ValueError( - "All images must be of the same shape when --same_crop is " - "specified." + "All images must be of the same shape when --same_crop is " "specified." ) - individual_croppings = ct.util.mt_loop( - ct.image.compute_cropping, src_ims - ) + individual_croppings = ct.util.mt_loop(ct.image.compute_cropping, src_ims) # Compute the minimum cropping boarders. - min_crop_u, min_crop_d, min_crop_l, min_crop_r = individual_croppings[ - 0 - ] + min_crop_u, min_crop_d, min_crop_l, min_crop_r = individual_croppings[0] for crop_u, crop_d, crop_l, crop_r in individual_croppings[1:]: min_crop_u = min(min_crop_u, crop_u) min_crop_d = min(min_crop_d, crop_d) min_crop_l = min(min_crop_l, crop_l) min_crop_r = min(min_crop_r, crop_r) - croppings = [(min_crop_u, min_crop_d, min_crop_l, min_crop_r)] * len( - src_ims - ) + croppings = [(min_crop_u, min_crop_d, min_crop_l, min_crop_r)] * len(src_ims) # Compute padding (remains unchanged) if args.pad_pixel != 0: @@ -201,9 +184,7 @@ def entry_point(parser, args): ) ) for i in range(num_ims): - paddings[i] = tuple( - np.array(paddings[i]) + np.array(extra_paddings[i]) - ) + paddings[i] = tuple(np.array(paddings[i]) + np.array(extra_paddings[i])) # Apply. dst_ims = ct.image.apply_croppings_paddings( diff --git a/camtools/tools/draw_bboxes.py b/camtools/tools/draw_bboxes.py index ae72f305..9fd3e92b 100644 --- a/camtools/tools/draw_bboxes.py +++ b/camtools/tools/draw_bboxes.py @@ -100,9 +100,7 @@ def _bbox_str(bbox: matplotlib.transforms.Bbox) -> str: """ A better matplotlib.transforms.Bbox.__str__()` """ - return ( - f"Bbox({bbox.x0:.2f}, {bbox.y0:.2f}, {bbox.x1:.2f}, {bbox.y1:.2f})" - ) + return f"Bbox({bbox.x0:.2f}, {bbox.y0:.2f}, {bbox.x1:.2f}, {bbox.y1:.2f})" @staticmethod def _copy_rectangle( @@ -118,21 +116,9 @@ def _copy_rectangle( xy=(rectangle.xy[0], rectangle.xy[1]), width=rectangle.get_width(), height=rectangle.get_height(), - linestyle=( - linestyle - if linestyle is not None - else rectangle.get_linestyle() - ), - linewidth=( - linewidth - if linewidth is not None - else rectangle.get_linewidth() - ), - edgecolor=( - edgecolor - if edgecolor is not None - else rectangle.get_edgecolor() - ), + linestyle=linestyle if linestyle is not None else rectangle.get_linestyle(), + linewidth=linewidth if linewidth is not None else rectangle.get_linewidth(), + edgecolor=edgecolor if edgecolor is not None else rectangle.get_edgecolor(), facecolor=rectangle.get_facecolor(), ) return new_rectangle @@ -235,9 +221,7 @@ def fill_connected_component(mat, x, y): (br_bound[0], br_bound[1]), # Bottom-right ] for corner in corners: - im_mask = fill_connected_component( - im_mask, corner[0], corner[1] - ) + im_mask = fill_connected_component(im_mask, corner[0], corner[1]) # 4. Undo mask invalid pixels. im_mask[im_mask == -1.0] = 0.0 @@ -304,9 +288,7 @@ def _save(self) -> None: im_height = im_shape[0] axis = self.axes[0] - bbox = axis.get_window_extent().transformed( - self.fig.dpi_scale_trans.inverted() - ) + bbox = axis.get_window_extent().transformed(self.fig.dpi_scale_trans.inverted()) axis_height = bbox.height * self.fig.dpi # Get the linewidth in pixels. @@ -314,9 +296,7 @@ def _save(self) -> None: linewidth_px = linewidth_px / axis_height * im_height linewidth_px = int(round(linewidth_px)) - dst_paths = [ - p.parent / f"bbox_{p.stem}{p.suffix}" for p in self.src_paths - ] + dst_paths = [p.parent / f"bbox_{p.stem}{p.suffix}" for p in self.src_paths] for src_path, dst_path in zip(self.src_paths, dst_paths): im_dst = ct.io.imread(src_path) for rectangle in self.confirmed_rectangles: @@ -371,9 +351,7 @@ def print_msg(*args, **kwargs): self.confirmed_rectangles.append( BBoxer._copy_rectangle(self.current_rectangle) ) - bbox_str = BBoxer._bbox_str( - self.current_rectangle.get_bbox() - ) + bbox_str = BBoxer._bbox_str(self.current_rectangle.get_bbox()) print_msg(f"Bounding box saved: {bbox_str}.") # Clear current. self.current_rectangle = None diff --git a/camtools/util.py b/camtools/util.py index b8e51a47..a7fc13f7 100644 --- a/camtools/util.py +++ b/camtools/util.py @@ -1,9 +1,5 @@ -from concurrent.futures import ( - ProcessPoolExecutor, - ThreadPoolExecutor, - as_completed, -) -from typing import Any, Callable, Iterable, Optional +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +from typing import Any, Callable, Iterable from functools import lru_cache from tqdm import tqdm @@ -30,13 +26,10 @@ def mt_loop( desc = f"[mt] {func.__name__}" with ThreadPoolExecutor() as executor: future_to_index = { - executor.submit(func, item, **kwargs): i - for i, item in enumerate(inputs) + executor.submit(func, item, **kwargs): i for i, item in enumerate(inputs) } results = [None] * len(inputs) - for future in tqdm( - as_completed(future_to_index), total=len(inputs), desc=desc - ): + for future in tqdm(as_completed(future_to_index), total=len(inputs), desc=desc): results[future_to_index[future]] = future.result() return results @@ -62,42 +55,41 @@ def mp_loop( desc = f"[mp] {func.__name__}" with ProcessPoolExecutor() as executor: future_to_index = { - executor.submit(func, item, **kwargs): i - for i, item in enumerate(inputs) + executor.submit(func, item, **kwargs): i for i, item in enumerate(inputs) } results = [None] * len(inputs) - for future in tqdm( - as_completed(future_to_index), total=len(inputs), desc=desc - ): + for future in tqdm(as_completed(future_to_index), total=len(inputs), desc=desc): results[future_to_index[future]] = future.result() return results def query_yes_no(question, default=None): - """ - Ask a yes/no question via raw_input() and return their answer. + """Ask a yes/no question via raw_input() and return their answer. Args: - question (str): A string that is presented to the user. - default (Optional[bool]): The presumed answer if the user just hits - . Possible values: - - True: The answer is assumed to be yes - - False: The answer is assumed to be no - - None: The answer is required from the user + question: A string that is presented to the user. + default: The presumed answer if the user just hits . + - True: The answer is assumed to be yes. + - False: The answer is assumed to be no. + - None: The answer is required from the user. Returns: - bool: True for "yes" or False for "no". + Returns True for "yes" or False for "no". Examples: - >>> if query_yes_no("Continue?", default="yes"): - ... print("Proceeding.") - ... else: - ... print("Aborted.") - - >>> if not query_yes_no("Continue?", default="yes"): - ... print("Aborted.") - ... return # Or exit(0) - ... print("Proceeding.") + ```python + if query_yes_no("Continue?", default="yes"): + print("Proceeding.") + else: + print("Aborted.") + ``` + + ```python + if not query_yes_no("Continue?", default="yes"): + print("Aborted.") + return # Or exit(0) + print("Proceeding.") + ``` """ if default is None: prompt = "[y/n]" From 3c4b5af1e278eb2c55754938bdd6768035cfc8fe Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:01:13 +0800 Subject: [PATCH 15/59] chore(docs.yml): remove SPHINXOPTS from documentation build command to simplify the process --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2426ec36..4720887d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,7 +27,7 @@ jobs: - name: Build documentation run: | cd docs - make clean && make html SPHINXOPTS="-W --keep-going" + make clean && make html # SPHINXOPTS="-W --keep-going" - name: Notice run: | From cf9e01e6e4db28c731db0c5be3cf655fafddc32e Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:01:57 +0800 Subject: [PATCH 16/59] revert to original python code test --- test/conftest.py | 3 +-- test/test_convert.py | 30 +++++++----------------------- test/test_raycast.py | 4 +--- test/test_render.py | 21 +++++---------------- 4 files changed, 14 insertions(+), 44 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 144d8024..b3a16186 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -4,8 +4,7 @@ def pytest_configure(config): config.addinivalue_line( - "markers", - "skip_no_o3d_display: skip test when no display is available", + "markers", "skip_no_o3d_display: skip test when no display is available" ) diff --git a/test/test_convert.py b/test/test_convert.py index 4583d775..b5c96de9 100644 --- a/test/test_convert.py +++ b/test/test_convert.py @@ -121,11 +121,7 @@ def gen_random_pose(): [-axis[1], axis[0], 0], ] ) - RT = ( - np.eye(3) - + np.sin(angle) * ss - + (1 - np.cos(angle)) * np.dot(ss, ss) - ) + RT = np.eye(3) + np.sin(angle) * ss + (1 - np.cos(angle)) * np.dot(ss, ss) c = np.random.uniform(-10, 10, size=(3,)) pose = np.eye(4) pose[:3, :3] = RT @@ -142,12 +138,8 @@ def gen_random_pose(): pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv) pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl) pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered) - np.testing.assert_allclose( - pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5 - ) - np.testing.assert_allclose( - pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5 - ) + np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5) # Test convert T bidirectionally T_cv = np.copy(T) @@ -216,12 +208,8 @@ def gen_random_T(): pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv) pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl) pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered) - np.testing.assert_allclose( - pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5 - ) - np.testing.assert_allclose( - pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5 - ) + np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5) # Test T and pose are consistent across conversions np.testing.assert_allclose( @@ -304,9 +292,7 @@ def test_im_depth_im_distance_convert(): # Geometries sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) sphere = sphere.translate([0, 0, 4]) - box = o3d.geometry.TriangleMesh.create_box( - width=1.5, height=1.5, depth=1.5 - ) + box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) box = box.translate([0, 0, 4]) mesh = sphere + box @@ -327,6 +313,4 @@ def test_im_depth_im_distance_convert(): im_depth_reconstructed = ct.convert.im_distance_to_im_depth(im_distance, K) # Assert that the reconstructed depth is close to the original - np.testing.assert_allclose( - im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5 - ) + np.testing.assert_allclose(im_depth, im_depth_reconstructed, rtol=1e-5, atol=1e-5) diff --git a/test/test_raycast.py b/test/test_raycast.py index d3333487..3ec71966 100644 --- a/test/test_raycast.py +++ b/test/test_raycast.py @@ -17,9 +17,7 @@ def test_mesh_to_depth(visualize: bool): # Geometries sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0) sphere = sphere.translate([0, 0, 4]) - box = o3d.geometry.TriangleMesh.create_box( - width=1.5, height=1.5, depth=1.5 - ) + box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) box = box.translate([0, 0, 4]) mesh = sphere + box lineset = ct.convert.mesh_to_lineset(mesh) diff --git a/test/test_render.py b/test/test_render.py index 7c322d50..de667b08 100644 --- a/test/test_render.py +++ b/test/test_render.py @@ -18,15 +18,11 @@ def test_render_geometries(visualize: bool): See conftest.py for more information on the visualize fixture. """ # Setup geometries: sphere (red), box (blue) - sphere = o3d.geometry.TriangleMesh.create_sphere( - radius=1.0, resolution=100 - ) + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=1.0, resolution=100) sphere = sphere.translate([0, 0, 4]) sphere = sphere.paint_uniform_color([0.2, 0.4, 0.8]) sphere.compute_vertex_normals() - box = o3d.geometry.TriangleMesh.create_box( - width=1.5, height=1.5, depth=1.5 - ) + box = o3d.geometry.TriangleMesh.create_box(width=1.5, height=1.5, depth=1.5) box = box.translate([0, 0, 4]) box = box.paint_uniform_color([0.8, 0.2, 0.2]) box.compute_vertex_normals() @@ -69,11 +65,7 @@ def test_render_geometries(visualize: bool): im_raycast_depth[im_raycast_depth == np.inf] = 0 # Heuristic checks of RGB rendering - assert im_render_rgb.shape == ( - height, - width, - 3, - ), "Image has incorrect dimensions" + assert im_render_rgb.shape == (height, width, 3), "Image has incorrect dimensions" num_white_pixels = np.sum( (im_render_rgb[:, :, 0] > 0.9) & (im_render_rgb[:, :, 1] > 0.9) @@ -89,9 +81,7 @@ def test_render_geometries(visualize: bool): & (im_render_rgb[:, :, 1] < 0.3) & (im_render_rgb[:, :, 2] < 0.5) ) - assert num_white_pixels > ( - height * width * 0.5 - ), "Expected mostly white background" + assert num_white_pixels > (height * width * 0.5), "Expected mostly white background" assert num_blue_pixels > 100, "Expected blue pixels (sphere) not found" assert num_red_pixels > 100, "Expected red pixels (box) not found" @@ -108,8 +98,7 @@ def test_render_geometries(visualize: bool): im_render_rgb_mask.astype(float) - im_render_depth_mask.astype(float) ) im_mask_diff_raycast_vs_render = np.abs( - im_raycast_depth_mask.astype(float) - - im_render_depth_mask.astype(float) + im_raycast_depth_mask.astype(float) - im_render_depth_mask.astype(float) ) assert ( np.mean(im_mask_diff_rgb_vs_raycast) < 0.01 From a846643aeb700bc3859bbdb7437a7b7fb9d79302 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:05:17 +0800 Subject: [PATCH 17/59] chore(pyproject.toml): remove black configuration as it is no longer needed --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b457f746..d6dab6d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,8 +51,3 @@ torch = [ [tool.setuptools] packages = ["camtools", "camtools.tools"] - -[tool.black] -include = '\.pyi?$' -line-length = 79 -target-version = ['py38'] From 38a5b5d62354c22d94939c422a804b33efee422b Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:09:30 +0800 Subject: [PATCH 18/59] chore(.gitignore): add docs/_static/ and docs/_templates/ to ignore list to exclude Sphinx documentation artifacts --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 81628d19..fb9b7686 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,8 @@ instance/ # Sphinx documentation docs/_build/ +docs/_static/ +docs/_templates/ docs/api/ .doctrees .buildinfo From 11c90c52cd395a98b8ab5bb74a81abb24ca50176 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:13:36 +0800 Subject: [PATCH 19/59] docs(index.rst): reorder sections in the table of contents to improve logical flow --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 39b53d99..e22cb1bc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,8 +29,8 @@ clear and easy-to-use APIs. :caption: Contents: features - installation coordinates + installation contributing api/modules From 598ec3ccb78e86c625abbee668ce55b8c19d16e5 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:16:02 +0800 Subject: [PATCH 20/59] docs(index.rst): update API documentation reference path from 'api/modules' to 'api' for clarity and consistency --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index e22cb1bc..56e9c230 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,7 +32,7 @@ clear and easy-to-use APIs. coordinates installation contributing - api/modules + api Indices and tables ================== From 2924265c56832651c586fc41ff9d174286b993c2 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:18:42 +0800 Subject: [PATCH 21/59] docs: add API reference documentation and update Sphinx configuration --- docs/api.rst | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ docs/conf.py | 3 ++- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 docs/api.rst diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..1d320add --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,66 @@ +API Reference +============ + +Camera Operations +--------------- + +.. automodule:: camtools.camera + :members: + :undoc-members: + :show-inheritance: + +Coordinate Conversion +------------------- + +.. automodule:: camtools.convert + :members: + :undoc-members: + :show-inheritance: + +Image and Depth I/O +----------------- + +.. automodule:: camtools.io + :members: + :undoc-members: + :show-inheritance: + +Projection +--------- + +.. automodule:: camtools.project + :members: + :undoc-members: + :show-inheritance: + +Ray Casting +---------- + +.. automodule:: camtools.raycast + :members: + :undoc-members: + :show-inheritance: + +COLMAP Tools +----------- + +.. automodule:: camtools.colmap + :members: + :undoc-members: + :show-inheritance: + +Image Metrics +------------ + +.. automodule:: camtools.metric + :members: + :undoc-members: + :show-inheritance: + +Rendering +-------- + +.. automodule:: camtools.render + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 7931801d..4d58882b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -74,4 +74,5 @@ autodoc_member_order = "bysource" autodoc_typehints = "description" autodoc_typehints_description_target = "documented" -add_module_names = False +add_module_names = True +python_use_unqualified_type_names = False From ec5516fc552513f41954a199bc28c9ba045cf2db Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:22:31 +0800 Subject: [PATCH 22/59] docs(api.rst, conf.py): update module references from 'camtools' to 'ct' alias for consistency and brevity in documentation --- docs/api.rst | 18 ++++++++++-------- docs/conf.py | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 1d320add..a804bb5c 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -4,7 +4,9 @@ API Reference Camera Operations --------------- -.. automodule:: camtools.camera +.. currentmodule:: ct + +.. automodule:: ct.camera :members: :undoc-members: :show-inheritance: @@ -12,7 +14,7 @@ Camera Operations Coordinate Conversion ------------------- -.. automodule:: camtools.convert +.. automodule:: ct.convert :members: :undoc-members: :show-inheritance: @@ -20,7 +22,7 @@ Coordinate Conversion Image and Depth I/O ----------------- -.. automodule:: camtools.io +.. automodule:: ct.io :members: :undoc-members: :show-inheritance: @@ -28,7 +30,7 @@ Image and Depth I/O Projection --------- -.. automodule:: camtools.project +.. automodule:: ct.project :members: :undoc-members: :show-inheritance: @@ -36,7 +38,7 @@ Projection Ray Casting ---------- -.. automodule:: camtools.raycast +.. automodule:: ct.raycast :members: :undoc-members: :show-inheritance: @@ -44,7 +46,7 @@ Ray Casting COLMAP Tools ----------- -.. automodule:: camtools.colmap +.. automodule:: ct.colmap :members: :undoc-members: :show-inheritance: @@ -52,7 +54,7 @@ COLMAP Tools Image Metrics ------------ -.. automodule:: camtools.metric +.. automodule:: ct.metric :members: :undoc-members: :show-inheritance: @@ -60,7 +62,7 @@ Image Metrics Rendering -------- -.. automodule:: camtools.render +.. automodule:: ct.render :members: :undoc-members: :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 4d58882b..8aa3ece2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,3 +76,9 @@ autodoc_typehints_description_target = "documented" add_module_names = True python_use_unqualified_type_names = False + +# Custom module name display +modindex_common_prefix = ["camtools."] # Strip 'camtools.' from module index +import camtools as ct + +sys.modules["ct"] = ct # Allow using 'ct' as an alias in documentation From 9be654b524438151cdfed592b37d7ff8ce79f2e7 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:30:05 +0800 Subject: [PATCH 23/59] docs(api.rst): add table of contents to improve navigation and readability --- docs/api.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index a804bb5c..17a98c43 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,6 +1,11 @@ API Reference ============ +.. contents:: + :local: + :depth: 2 + :class: this-will-duplicate-information-and-it-is-still-useful-here + Camera Operations --------------- From fd95627ecdf79458864e51696ee7473d2cfd2058 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:46:05 +0800 Subject: [PATCH 24/59] chore: remove docs/api from .gitignore as it is no longer needed --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index fb9b7686..4c5e845a 100644 --- a/.gitignore +++ b/.gitignore @@ -79,7 +79,6 @@ instance/ docs/_build/ docs/_static/ docs/_templates/ -docs/api/ .doctrees .buildinfo From 5b49be2d4efa4f042383893c79d224d2c5adc701 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 18:51:21 +0800 Subject: [PATCH 25/59] docs(index.rst): update toctree structure, rename 'Contents' to 'Docs' and add 'API' section for better documentation organization --- docs/index.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 56e9c230..99f5ac2b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,12 +26,17 @@ clear and easy-to-use APIs. .. toctree:: :maxdepth: 2 - :caption: Contents: + :caption: Docs features coordinates installation contributing + +.. toctree:: + :maxdepth: 2 + :caption: API + api Indices and tables From 448c879a955947a9cd6b3912aa2d852c5cf2fd63 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 19:35:14 +0800 Subject: [PATCH 26/59] docs(api.rst): restructure and align section headers for improved readability and consistency --- docs/api.rst | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 17a98c43..9446570f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,15 +1,10 @@ API Reference -============ +============== -.. contents:: - :local: - :depth: 2 - :class: this-will-duplicate-information-and-it-is-still-useful-here +.. currentmodule:: ct Camera Operations ---------------- - -.. currentmodule:: ct +---------------------------------------- .. automodule:: ct.camera :members: @@ -17,15 +12,16 @@ Camera Operations :show-inheritance: Coordinate Conversion -------------------- +---------------------------------------- +.. _label1: .. automodule:: ct.convert :members: :undoc-members: :show-inheritance: Image and Depth I/O ------------------ +---------------------------------------- .. automodule:: ct.io :members: @@ -33,7 +29,7 @@ Image and Depth I/O :show-inheritance: Projection ---------- +---------------------------------------- .. automodule:: ct.project :members: @@ -41,7 +37,7 @@ Projection :show-inheritance: Ray Casting ----------- +---------------------------------------- .. automodule:: ct.raycast :members: @@ -49,7 +45,7 @@ Ray Casting :show-inheritance: COLMAP Tools ------------ +---------------------------------------- .. automodule:: ct.colmap :members: @@ -57,7 +53,7 @@ COLMAP Tools :show-inheritance: Image Metrics ------------- +---------------------------------------- .. automodule:: ct.metric :members: @@ -65,7 +61,7 @@ Image Metrics :show-inheritance: Rendering --------- +---------------------------------------- .. automodule:: ct.render :members: From 91c8d1262797012b6a98c24f2151c630af5cefe2 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:00:45 +0800 Subject: [PATCH 27/59] refactor(docs): restructure API documentation into modular files for better organization and maintainability chore(.gitignore): remove unused static and template directories from gitignore --- .gitignore | 2 -- docs/api.rst | 77 +++++++------------------------------------- docs/api/camera.rst | 9 ++++++ docs/api/colmap.rst | 9 ++++++ docs/api/convert.rst | 9 ++++++ docs/api/io.rst | 9 ++++++ docs/api/metric.rst | 9 ++++++ docs/api/project.rst | 9 ++++++ docs/api/raycast.rst | 9 ++++++ docs/api/render.rst | 9 ++++++ docs/index.rst | 5 --- 11 files changed, 83 insertions(+), 73 deletions(-) create mode 100644 docs/api/camera.rst create mode 100644 docs/api/colmap.rst create mode 100644 docs/api/convert.rst create mode 100644 docs/api/io.rst create mode 100644 docs/api/metric.rst create mode 100644 docs/api/project.rst create mode 100644 docs/api/raycast.rst create mode 100644 docs/api/render.rst diff --git a/.gitignore b/.gitignore index 4c5e845a..46f577c1 100644 --- a/.gitignore +++ b/.gitignore @@ -77,8 +77,6 @@ instance/ # Sphinx documentation docs/_build/ -docs/_static/ -docs/_templates/ .doctrees .buildinfo diff --git a/docs/api.rst b/docs/api.rst index 9446570f..dd25181a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,69 +1,14 @@ API Reference ============== -.. currentmodule:: ct - -Camera Operations ----------------------------------------- - -.. automodule:: ct.camera - :members: - :undoc-members: - :show-inheritance: - -Coordinate Conversion ----------------------------------------- - -.. _label1: -.. automodule:: ct.convert - :members: - :undoc-members: - :show-inheritance: - -Image and Depth I/O ----------------------------------------- - -.. automodule:: ct.io - :members: - :undoc-members: - :show-inheritance: - -Projection ----------------------------------------- - -.. automodule:: ct.project - :members: - :undoc-members: - :show-inheritance: - -Ray Casting ----------------------------------------- - -.. automodule:: ct.raycast - :members: - :undoc-members: - :show-inheritance: - -COLMAP Tools ----------------------------------------- - -.. automodule:: ct.colmap - :members: - :undoc-members: - :show-inheritance: - -Image Metrics ----------------------------------------- - -.. automodule:: ct.metric - :members: - :undoc-members: - :show-inheritance: - -Rendering ----------------------------------------- - -.. automodule:: ct.render - :members: - :undoc-members: - :show-inheritance: +.. toctree:: + :maxdepth: 2 + + api/camera + api/convert + api/io + api/project + api/raycast + api/colmap + api/metric + api/render diff --git a/docs/api/camera.rst b/docs/api/camera.rst new file mode 100644 index 00000000..e0d109ce --- /dev/null +++ b/docs/api/camera.rst @@ -0,0 +1,9 @@ +ct.camera +========= + +.. currentmodule:: ct + +.. automodule:: ct.camera + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/colmap.rst b/docs/api/colmap.rst new file mode 100644 index 00000000..1a4f4185 --- /dev/null +++ b/docs/api/colmap.rst @@ -0,0 +1,9 @@ +ct.colmap +========= + +.. currentmodule:: ct + +.. automodule:: ct.colmap + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/convert.rst b/docs/api/convert.rst new file mode 100644 index 00000000..cf58a528 --- /dev/null +++ b/docs/api/convert.rst @@ -0,0 +1,9 @@ +ct.convert +========= + +.. currentmodule:: ct + +.. automodule:: ct.convert + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/io.rst b/docs/api/io.rst new file mode 100644 index 00000000..1deb1b2b --- /dev/null +++ b/docs/api/io.rst @@ -0,0 +1,9 @@ +ct.io +===== + +.. currentmodule:: ct + +.. automodule:: ct.io + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/metric.rst b/docs/api/metric.rst new file mode 100644 index 00000000..69b32400 --- /dev/null +++ b/docs/api/metric.rst @@ -0,0 +1,9 @@ +ct.metric +========= + +.. currentmodule:: ct + +.. automodule:: ct.metric + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/project.rst b/docs/api/project.rst new file mode 100644 index 00000000..7724b783 --- /dev/null +++ b/docs/api/project.rst @@ -0,0 +1,9 @@ +ct.project +========== + +.. currentmodule:: ct + +.. automodule:: ct.project + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/raycast.rst b/docs/api/raycast.rst new file mode 100644 index 00000000..90ae6365 --- /dev/null +++ b/docs/api/raycast.rst @@ -0,0 +1,9 @@ +ct.raycast +========== + +.. currentmodule:: ct + +.. automodule:: ct.raycast + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/render.rst b/docs/api/render.rst new file mode 100644 index 00000000..d715ad5a --- /dev/null +++ b/docs/api/render.rst @@ -0,0 +1,9 @@ +ct.render +========= + +.. currentmodule:: ct + +.. automodule:: ct.render + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/index.rst b/docs/index.rst index 99f5ac2b..0a1b94fc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,11 +32,6 @@ clear and easy-to-use APIs. coordinates installation contributing - -.. toctree:: - :maxdepth: 2 - :caption: API - api Indices and tables From ce987bffb393f3ff420dddacdc3cc4423c5db238 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:02:39 +0800 Subject: [PATCH 28/59] docs(index.rst): add 'Home ' to the table of contents for better navigation --- docs/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.rst b/docs/index.rst index 0a1b94fc..91d870ed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,6 +28,7 @@ clear and easy-to-use APIs. :maxdepth: 2 :caption: Docs + Home features coordinates installation From 68f50c7c2694c0f487dd40265fce602b88a5d8f5 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:02:44 +0800 Subject: [PATCH 29/59] docs(api.rst): reorder API reference sections for better organization and readability --- docs/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index dd25181a..719a996c 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,10 +5,10 @@ API Reference :maxdepth: 2 api/camera + api/colmap api/convert api/io + api/metric api/project api/raycast - api/colmap - api/metric api/render From d4790c7f49ac9a1d0104b3e7db20915e2ec09fbb Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:16:28 +0800 Subject: [PATCH 30/59] docs: consolidate API reference into a single file and remove individual module files docs: update index.rst to reduce toctree maxdepth for better navigation --- docs/api.rst | 96 ++++++++++++++++++++++++++++++++++++++------ docs/api/camera.rst | 9 ----- docs/api/colmap.rst | 9 ----- docs/api/convert.rst | 9 ----- docs/api/io.rst | 9 ----- docs/api/metric.rst | 9 ----- docs/api/project.rst | 9 ----- docs/api/raycast.rst | 9 ----- docs/api/render.rst | 9 ----- docs/index.rst | 2 +- 10 files changed, 84 insertions(+), 86 deletions(-) delete mode 100644 docs/api/camera.rst delete mode 100644 docs/api/colmap.rst delete mode 100644 docs/api/convert.rst delete mode 100644 docs/api/io.rst delete mode 100644 docs/api/metric.rst delete mode 100644 docs/api/project.rst delete mode 100644 docs/api/raycast.rst delete mode 100644 docs/api/render.rst diff --git a/docs/api.rst b/docs/api.rst index 719a996c..07438a07 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,14 +1,84 @@ API Reference -============== - -.. toctree:: - :maxdepth: 2 - - api/camera - api/colmap - api/convert - api/io - api/metric - api/project - api/raycast - api/render +============= + +This section provides detailed API documentation for all CamTools modules. + +> ct.camera +=========== + +.. currentmodule:: ct + +.. automodule:: ct.camera + :members: + :undoc-members: + :show-inheritance: + +> ct.colmap +=========== + +.. currentmodule:: ct + +.. automodule:: ct.colmap + :members: + :undoc-members: + :show-inheritance: + +> ct.convert +=========== + +.. currentmodule:: ct + +.. automodule:: ct.convert + :members: + :undoc-members: + :show-inheritance: + +> ct.io +======= + +.. currentmodule:: ct + +.. automodule:: ct.io + :members: + :undoc-members: + :show-inheritance: + +> ct.metric +=========== + +.. currentmodule:: ct + +.. automodule:: ct.metric + :members: + :undoc-members: + :show-inheritance: + +> ct.project +=========== + +.. currentmodule:: ct + +.. automodule:: ct.project + :members: + :undoc-members: + :show-inheritance: + +> ct.raycast +=========== + +.. currentmodule:: ct + +.. automodule:: ct.raycast + :members: + :undoc-members: + :show-inheritance: + +> ct.render +=========== + +.. currentmodule:: ct + +.. automodule:: ct.render + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/camera.rst b/docs/api/camera.rst deleted file mode 100644 index e0d109ce..00000000 --- a/docs/api/camera.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.camera -========= - -.. currentmodule:: ct - -.. automodule:: ct.camera - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/colmap.rst b/docs/api/colmap.rst deleted file mode 100644 index 1a4f4185..00000000 --- a/docs/api/colmap.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.colmap -========= - -.. currentmodule:: ct - -.. automodule:: ct.colmap - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/convert.rst b/docs/api/convert.rst deleted file mode 100644 index cf58a528..00000000 --- a/docs/api/convert.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.convert -========= - -.. currentmodule:: ct - -.. automodule:: ct.convert - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/io.rst b/docs/api/io.rst deleted file mode 100644 index 1deb1b2b..00000000 --- a/docs/api/io.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.io -===== - -.. currentmodule:: ct - -.. automodule:: ct.io - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/metric.rst b/docs/api/metric.rst deleted file mode 100644 index 69b32400..00000000 --- a/docs/api/metric.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.metric -========= - -.. currentmodule:: ct - -.. automodule:: ct.metric - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/project.rst b/docs/api/project.rst deleted file mode 100644 index 7724b783..00000000 --- a/docs/api/project.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.project -========== - -.. currentmodule:: ct - -.. automodule:: ct.project - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/raycast.rst b/docs/api/raycast.rst deleted file mode 100644 index 90ae6365..00000000 --- a/docs/api/raycast.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.raycast -========== - -.. currentmodule:: ct - -.. automodule:: ct.raycast - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/render.rst b/docs/api/render.rst deleted file mode 100644 index d715ad5a..00000000 --- a/docs/api/render.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.render -========= - -.. currentmodule:: ct - -.. automodule:: ct.render - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/index.rst b/docs/index.rst index 91d870ed..94d14b41 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,7 @@ clear and easy-to-use APIs.

.. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Docs Home From d31a597f5aa0eaa5e4cfe0832c5ecfcbd5bbbe9a Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:22:24 +0800 Subject: [PATCH 31/59] docs(api.rst): add table of contents and internal references for better navigation and structure --- docs/api.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index 07438a07..d4447aac 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,7 +1,16 @@ API Reference ============= -This section provides detailed API documentation for all CamTools modules. +- :ref:`ct.camera ` +- :ref:`ct.colmap ` +- :ref:`ct.convert ` +- :ref:`ct.io ` +- :ref:`ct.metric ` +- :ref:`ct.project ` +- :ref:`ct.raycast ` +- :ref:`ct.render ` + +.. _ct-camera: > ct.camera =========== @@ -13,6 +22,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-colmap: + > ct.colmap =========== @@ -23,6 +34,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-convert: + > ct.convert =========== @@ -33,6 +46,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-io: + > ct.io ======= @@ -43,6 +58,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-metric: + > ct.metric =========== @@ -53,6 +70,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-project: + > ct.project =========== @@ -63,6 +82,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-raycast: + > ct.raycast =========== @@ -73,6 +94,8 @@ This section provides detailed API documentation for all CamTools modules. :undoc-members: :show-inheritance: +.. _ct-render: + > ct.render =========== From 53e00cebf4fd57f0b841ff86e5f4f4a06482fb78 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:46:00 +0800 Subject: [PATCH 32/59] docs(api.rst): format section headers with backticks for consistency and clarity --- docs/api.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index d4447aac..6864cca8 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -12,7 +12,7 @@ API Reference .. _ct-camera: -> ct.camera +``> ct.camera`` =========== .. currentmodule:: ct @@ -24,7 +24,7 @@ API Reference .. _ct-colmap: -> ct.colmap +``> ct.colmap`` =========== .. currentmodule:: ct @@ -36,7 +36,7 @@ API Reference .. _ct-convert: -> ct.convert +``> ct.convert`` =========== .. currentmodule:: ct @@ -48,7 +48,7 @@ API Reference .. _ct-io: -> ct.io +``> ct.io`` ======= .. currentmodule:: ct @@ -60,7 +60,7 @@ API Reference .. _ct-metric: -> ct.metric +``> ct.metric`` =========== .. currentmodule:: ct @@ -72,7 +72,7 @@ API Reference .. _ct-project: -> ct.project +``> ct.project`` =========== .. currentmodule:: ct @@ -84,7 +84,7 @@ API Reference .. _ct-raycast: -> ct.raycast +``> ct.raycast`` =========== .. currentmodule:: ct @@ -96,7 +96,7 @@ API Reference .. _ct-render: -> ct.render +``> ct.render`` =========== .. currentmodule:: ct From a9a55bceb25f879557562ca90a6c0707b8ca59fe Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sat, 28 Dec 2024 20:47:59 +0800 Subject: [PATCH 33/59] docs(index.rst): simplify indices section by removing modindex and search references --- docs/index.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 94d14b41..2e8890c4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,9 +35,7 @@ clear and easy-to-use APIs. contributing api -Indices and tables -================== +Indices +======= * :ref:`genindex` -* :ref:`modindex` -* :ref:`search` From 8a830301b913172a2d32cbfe9fe4e9550a2ba803 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 00:40:46 +0800 Subject: [PATCH 34/59] docs(README.md): simplify documentation build commands by using `-C` flag with `make` and update paths for clarity --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 81915756..5888b71f 100644 --- a/README.md +++ b/README.md @@ -260,14 +260,13 @@ To build and view the documentation locally: pip install -e .[docs] # Build the documentation -cd docs -make clean && make html +make -C docs clean && make -C docs html -# To treat warnings as errors -make clean && make html SPHINXOPTS="-W --keep-going" +# (Optional) Build the documentation with warnings as errors +make -C docs clean && make -C docs html SPHINXOPTS="-W --keep-going" -# Start a local server to view the documentation (run inside `docs/`) -python -m http.server 8000 --directory _build/html +# Start a local server to view the documentation +python -m http.server 8000 --directory docs/_build/html ``` Then open your browser and navigate to `http://localhost:8000` to view the documentation. From c529c761cf1286c2a17e542405b58f07d9f8f85c Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:18:17 +0800 Subject: [PATCH 35/59] refactor(docs): restructure API reference into separate files for better organization and maintainability --- docs/api.rst | 115 +++++------------------------------------------ docs/camera.rst | 9 ++++ docs/colmap.rst | 9 ++++ docs/convert.rst | 9 ++++ docs/io.rst | 9 ++++ docs/metric.rst | 9 ++++ docs/project.rst | 9 ++++ docs/raycast.rst | 9 ++++ docs/render.rst | 9 ++++ 9 files changed, 83 insertions(+), 104 deletions(-) create mode 100644 docs/camera.rst create mode 100644 docs/colmap.rst create mode 100644 docs/convert.rst create mode 100644 docs/io.rst create mode 100644 docs/metric.rst create mode 100644 docs/project.rst create mode 100644 docs/raycast.rst create mode 100644 docs/render.rst diff --git a/docs/api.rst b/docs/api.rst index 6864cca8..157f35cc 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,107 +1,14 @@ API Reference ============= -- :ref:`ct.camera ` -- :ref:`ct.colmap ` -- :ref:`ct.convert ` -- :ref:`ct.io ` -- :ref:`ct.metric ` -- :ref:`ct.project ` -- :ref:`ct.raycast ` -- :ref:`ct.render ` - -.. _ct-camera: - -``> ct.camera`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.camera - :members: - :undoc-members: - :show-inheritance: - -.. _ct-colmap: - -``> ct.colmap`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.colmap - :members: - :undoc-members: - :show-inheritance: - -.. _ct-convert: - -``> ct.convert`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.convert - :members: - :undoc-members: - :show-inheritance: - -.. _ct-io: - -``> ct.io`` -======= - -.. currentmodule:: ct - -.. automodule:: ct.io - :members: - :undoc-members: - :show-inheritance: - -.. _ct-metric: - -``> ct.metric`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.metric - :members: - :undoc-members: - :show-inheritance: - -.. _ct-project: - -``> ct.project`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.project - :members: - :undoc-members: - :show-inheritance: - -.. _ct-raycast: - -``> ct.raycast`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.raycast - :members: - :undoc-members: - :show-inheritance: - -.. _ct-render: - -``> ct.render`` -=========== - -.. currentmodule:: ct - -.. automodule:: ct.render - :members: - :undoc-members: - :show-inheritance: +.. toctree:: + :maxdepth: 2 + + camera + colmap + convert + io + metric + project + raycast + render diff --git a/docs/camera.rst b/docs/camera.rst new file mode 100644 index 00000000..e0d109ce --- /dev/null +++ b/docs/camera.rst @@ -0,0 +1,9 @@ +ct.camera +========= + +.. currentmodule:: ct + +.. automodule:: ct.camera + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/colmap.rst b/docs/colmap.rst new file mode 100644 index 00000000..1a4f4185 --- /dev/null +++ b/docs/colmap.rst @@ -0,0 +1,9 @@ +ct.colmap +========= + +.. currentmodule:: ct + +.. automodule:: ct.colmap + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/convert.rst b/docs/convert.rst new file mode 100644 index 00000000..cf58a528 --- /dev/null +++ b/docs/convert.rst @@ -0,0 +1,9 @@ +ct.convert +========= + +.. currentmodule:: ct + +.. automodule:: ct.convert + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/io.rst b/docs/io.rst new file mode 100644 index 00000000..c0e03d96 --- /dev/null +++ b/docs/io.rst @@ -0,0 +1,9 @@ +ct.io +======= + +.. currentmodule:: ct + +.. automodule:: ct.io + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/metric.rst b/docs/metric.rst new file mode 100644 index 00000000..69b32400 --- /dev/null +++ b/docs/metric.rst @@ -0,0 +1,9 @@ +ct.metric +========= + +.. currentmodule:: ct + +.. automodule:: ct.metric + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/project.rst b/docs/project.rst new file mode 100644 index 00000000..62b4e2cc --- /dev/null +++ b/docs/project.rst @@ -0,0 +1,9 @@ +ct.project +========= + +.. currentmodule:: ct + +.. automodule:: ct.project + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/raycast.rst b/docs/raycast.rst new file mode 100644 index 00000000..fcf79b37 --- /dev/null +++ b/docs/raycast.rst @@ -0,0 +1,9 @@ +ct.raycast +========= + +.. currentmodule:: ct + +.. automodule:: ct.raycast + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/render.rst b/docs/render.rst new file mode 100644 index 00000000..d715ad5a --- /dev/null +++ b/docs/render.rst @@ -0,0 +1,9 @@ +ct.render +========= + +.. currentmodule:: ct + +.. automodule:: ct.render + :members: + :undoc-members: + :show-inheritance: From 512b8ab4b91ec5fb21ad92f918ee238c01e1d367 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:38:06 +0800 Subject: [PATCH 36/59] docs: restructure API documentation by moving modules into an api subdirectory for better organization and clarity --- docs/api.rst | 16 ++++++++-------- docs/{ => api}/camera.rst | 0 docs/{ => api}/colmap.rst | 0 docs/{ => api}/convert.rst | 0 docs/{ => api}/io.rst | 0 docs/{ => api}/metric.rst | 0 docs/{ => api}/project.rst | 0 docs/{ => api}/raycast.rst | 0 docs/{ => api}/render.rst | 0 docs/index.rst | 5 ----- 10 files changed, 8 insertions(+), 13 deletions(-) rename docs/{ => api}/camera.rst (100%) rename docs/{ => api}/colmap.rst (100%) rename docs/{ => api}/convert.rst (100%) rename docs/{ => api}/io.rst (100%) rename docs/{ => api}/metric.rst (100%) rename docs/{ => api}/project.rst (100%) rename docs/{ => api}/raycast.rst (100%) rename docs/{ => api}/render.rst (100%) diff --git a/docs/api.rst b/docs/api.rst index 157f35cc..755da7de 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -4,11 +4,11 @@ API Reference .. toctree:: :maxdepth: 2 - camera - colmap - convert - io - metric - project - raycast - render + api/camera + api/colmap + api/convert + api/io + api/metric + api/project + api/raycast + api/render diff --git a/docs/camera.rst b/docs/api/camera.rst similarity index 100% rename from docs/camera.rst rename to docs/api/camera.rst diff --git a/docs/colmap.rst b/docs/api/colmap.rst similarity index 100% rename from docs/colmap.rst rename to docs/api/colmap.rst diff --git a/docs/convert.rst b/docs/api/convert.rst similarity index 100% rename from docs/convert.rst rename to docs/api/convert.rst diff --git a/docs/io.rst b/docs/api/io.rst similarity index 100% rename from docs/io.rst rename to docs/api/io.rst diff --git a/docs/metric.rst b/docs/api/metric.rst similarity index 100% rename from docs/metric.rst rename to docs/api/metric.rst diff --git a/docs/project.rst b/docs/api/project.rst similarity index 100% rename from docs/project.rst rename to docs/api/project.rst diff --git a/docs/raycast.rst b/docs/api/raycast.rst similarity index 100% rename from docs/raycast.rst rename to docs/api/raycast.rst diff --git a/docs/render.rst b/docs/api/render.rst similarity index 100% rename from docs/render.rst rename to docs/api/render.rst diff --git a/docs/index.rst b/docs/index.rst index 2e8890c4..1df2c5c6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,8 +34,3 @@ clear and easy-to-use APIs. installation contributing api - -Indices -======= - -* :ref:`genindex` From c9357cca28b0ecf376f0abc28baba96428076b52 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:38:56 +0800 Subject: [PATCH 37/59] chore(docs): add empty .gitkeep file to maintain docs/_static directory structure --- docs/_static/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/_static/.gitkeep diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 00000000..e69de29b From 02ad74933f2c752dba97b43024eddd2de42b36e9 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:44:04 +0800 Subject: [PATCH 38/59] docs(api): fix section header formatting in API documentation files to ensure consistency --- docs/api/convert.rst | 2 +- docs/api/project.rst | 2 +- docs/api/raycast.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/convert.rst b/docs/api/convert.rst index cf58a528..0959fc98 100644 --- a/docs/api/convert.rst +++ b/docs/api/convert.rst @@ -1,5 +1,5 @@ ct.convert -========= +========== .. currentmodule:: ct diff --git a/docs/api/project.rst b/docs/api/project.rst index 62b4e2cc..7724b783 100644 --- a/docs/api/project.rst +++ b/docs/api/project.rst @@ -1,5 +1,5 @@ ct.project -========= +========== .. currentmodule:: ct diff --git a/docs/api/raycast.rst b/docs/api/raycast.rst index fcf79b37..90ae6365 100644 --- a/docs/api/raycast.rst +++ b/docs/api/raycast.rst @@ -1,5 +1,5 @@ ct.raycast -========= +========== .. currentmodule:: ct From ca3b7ac0227c95256760ed0aa147eb4f27e3b36f Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:46:01 +0800 Subject: [PATCH 39/59] refactor(docs): consolidate features.rst content into index.rst for better organization and readability --- docs/features.rst | 100 ---------------------------------------------- docs/index.rst | 100 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 101 deletions(-) delete mode 100644 docs/features.rst diff --git a/docs/features.rst b/docs/features.rst deleted file mode 100644 index 9de882a2..00000000 --- a/docs/features.rst +++ /dev/null @@ -1,100 +0,0 @@ -Features -======== - -What can you do with CamTools? ------------------------------- - -1. Plot cameras -^^^^^^^^^^^^^^^ - -Useful for debugging 3D reconstruction and NeRFs! - -.. code-block:: python - - import camtools as ct - import open3d as o3d - cameras = ct.camera.create_camera_frustums(Ks, Ts) - o3d.visualization.draw_geometries([cameras]) - -.. raw:: html - -

- -

- -2. Convert camera parameters -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: python - - pose = ct.convert.T_to_pose(T) # Convert T to pose - T = ct.convert.pose_to_T(pose) # Convert pose to T - R, t = ct.convert.T_to_R_t(T) # Convert T to R and t - C = ct.convert.pose_to_C(pose) # Convert pose to camera center - K, T = ct.convert.P_to_K_T(P) # Decompose projection matrix P to K and T - # And more... - -3. Projection and ray casting -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: python - - # Project 3D points to pixels. - pixels = ct.project.points_to_pixel(points, K, T) - - # Back-project depth image to 3D points. - points = ct.project.im_depth_to_points(im_depth, K, T) - - # Ray cast a triangle mesh to depth image given the camera parameters. - im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) - - # And more... - -4. Image and depth I/O -^^^^^^^^^^^^^^^^^^^^^^ - -Strict type checks and range checks are enforced. The image and depth I/O -APIs are specifically designed to solve the following pain points: - -- Is my image of type ``float32`` or ``uint8``? -- Does it have range ``[0, 1]`` or ``[0, 255]``? -- Is it RGB or BGR? -- Does my image have an alpha channel? -- When saving depth image as integer-based ``.png``, is it correctly scaled? - -.. code-block:: python - - ct.io.imread() - ct.io.imwrite() - ct.io.imread_detph() - ct.io.imwrite_depth() - -5. Command-line tools -^^^^^^^^^^^^^^^^^^^^^ - -The ``ct`` command runs in terminal: - -.. code-block:: bash - - # Crop image boarders. - ct crop-boarders *.png --pad_pixel 10 --skip_cropped --same_crop - - # Draw synchronized bounding boxes interactively. - ct draw-bboxes path/to/a.png path/to/b.png - - # For more command-line tools. - ct --help - -.. raw:: html - -

- -

- -6. And more -^^^^^^^^^^^ - -- Solve line intersections -- COLMAP tools -- Points normalization -- And more... diff --git a/docs/index.rst b/docs/index.rst index 1df2c5c6..57eda9fe 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,8 +29,106 @@ clear and easy-to-use APIs. :caption: Docs Home - features coordinates installation contributing api + + +What can you do with CamTools? +------------------------------ + +1. Plot cameras +^^^^^^^^^^^^^^^ + +Useful for debugging 3D reconstruction and NeRFs! + +.. code-block:: python + + import camtools as ct + import open3d as o3d + cameras = ct.camera.create_camera_frustums(Ks, Ts) + o3d.visualization.draw_geometries([cameras]) + +.. raw:: html + +

+ +

+ +2. Convert camera parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + pose = ct.convert.T_to_pose(T) # Convert T to pose + T = ct.convert.pose_to_T(pose) # Convert pose to T + R, t = ct.convert.T_to_R_t(T) # Convert T to R and t + C = ct.convert.pose_to_C(pose) # Convert pose to camera center + K, T = ct.convert.P_to_K_T(P) # Decompose projection matrix P to K and T + # And more... + +3. Projection and ray casting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + # Project 3D points to pixels. + pixels = ct.project.points_to_pixel(points, K, T) + + # Back-project depth image to 3D points. + points = ct.project.im_depth_to_points(im_depth, K, T) + + # Ray cast a triangle mesh to depth image given the camera parameters. + im_depth = ct.raycast.mesh_to_im_depth(mesh, K, T, height, width) + + # And more... + +4. Image and depth I/O +^^^^^^^^^^^^^^^^^^^^^^ + +Strict type checks and range checks are enforced. The image and depth I/O +APIs are specifically designed to solve the following pain points: + +- Is my image of type ``float32`` or ``uint8``? +- Does it have range ``[0, 1]`` or ``[0, 255]``? +- Is it RGB or BGR? +- Does my image have an alpha channel? +- When saving depth image as integer-based ``.png``, is it correctly scaled? + +.. code-block:: python + + ct.io.imread() + ct.io.imwrite() + ct.io.imread_detph() + ct.io.imwrite_depth() + +5. Command-line tools +^^^^^^^^^^^^^^^^^^^^^ + +The ``ct`` command runs in terminal: + +.. code-block:: bash + + # Crop image boarders. + ct crop-boarders *.png --pad_pixel 10 --skip_cropped --same_crop + + # Draw synchronized bounding boxes interactively. + ct draw-bboxes path/to/a.png path/to/b.png + + # For more command-line tools. + ct --help + +.. raw:: html + +

+ +

+ +6. And more +^^^^^^^^^^^ + +- Solve line intersections +- COLMAP tools +- Points normalization +- And more... From 170ba878264a2d3e41542ee9e3dc60668d60cbfc Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:51:06 +0800 Subject: [PATCH 40/59] docs: update configuration and improve documentation structure feat(docs/conf.py): dynamically read version and git hash for release info refactor(docs/coordinates.rst): simplify section title for clarity chore(pyproject.toml): add tomli as a dependency for version parsing --- docs/conf.py | 26 +++++++++++++++++++++++--- docs/coordinates.rst | 4 ++-- pyproject.toml | 1 + 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8aa3ece2..fcf307bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -8,13 +8,33 @@ import os import sys - -sys.path.insert(0, os.path.abspath("..")) +import subprocess +import tomli + +# Get script directory and workspace root +script_dir = os.path.dirname(os.path.abspath(__file__)) +workspace_root = os.path.abspath(os.path.join(script_dir, "..")) +sys.path.insert(0, workspace_root) + +# Read version from pyproject.toml +with open(os.path.join(workspace_root, "pyproject.toml"), "r", encoding="utf-8") as f: + pyproject = tomli.loads(f.read()) +version = pyproject["project"]["version"] + +# Get git commit hash +try: + git_hash = ( + subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]) + .decode("ascii") + .strip() + ) + release = f"{version}+{git_hash}" +except subprocess.CalledProcessError: + release = version project = "CamTools" copyright = "2024, Yixing Lao" author = "Yixing Lao" -release = "0.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/coordinates.rst b/docs/coordinates.rst index 15e12de3..512268b1 100644 --- a/docs/coordinates.rst +++ b/docs/coordinates.rst @@ -1,5 +1,5 @@ -Camera Coordinate System -======================== +Camera Coordinates +================== A homogeneous point ``[X, Y, Z, 1]`` in the world coordinate can be projected to a homogeneous point ``[x, y, 1]`` in the image (pixel) coordinate using the diff --git a/pyproject.toml b/pyproject.toml index d6dab6d2..67401617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ docs = [ "sphinx-rtd-theme", "myst-parser", "furo", + "tomli", ] torch = [ "torch>=1.8.0", From f479b73ec7b957aa9267089c5e21cd132127b031 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:54:24 +0800 Subject: [PATCH 41/59] docs(README.md): add Read the Docs badge to indicate documentation status --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5888b71f..bd64122d 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@

CamTools: Camera Tools for Computer Vision

+[![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) [![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) From 2a3272125f36e3d3e9ba7e1d0e1051d0573db5c7 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:56:48 +0800 Subject: [PATCH 42/59] docs(README.md): add new documentation badge with improved styling for better visibility --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bd64122d..a3534fe8 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@

CamTools: Camera Tools for Computer Vision

+[![Docs](https://img.shields.io/badge/Docs-323940.svg?style=flat&logo=read-the-docs&logoColor=959DA5)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) From c1fcd2b9a72e358d08f6e267bcff4ab1f869a1bc Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:57:39 +0800 Subject: [PATCH 43/59] docs(README.md): standardize badge labels to lowercase for consistency and readability --- README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a3534fe8..35fe228c 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,13 @@

CamTools: Camera Tools for Computer Vision

-[![Docs](https://img.shields.io/badge/Docs-323940.svg?style=flat&logo=read-the-docs&logoColor=959DA5)](https://camtools.readthedocs.io/en/latest/?badge=latest) -[![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) -[![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) -[![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) -[![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) -[![GitHub](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) -[![Gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) -[![PyPI](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) +[![docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) +[![formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) +[![unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) +[![pypi](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) +[![github](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) +[![gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) +[![pypi](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) CamTools is a collection of tools for handling cameras in computer vision. It can be used for plotting, converting, projecting, ray casting, and doing more From bfda6c4e39a4b2d894b9c0aba6ea523bfd94b31b Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:59:05 +0800 Subject: [PATCH 44/59] Revert "docs(README.md): standardize badge labels to lowercase for consistency and readability" This reverts commit c1fcd2b9a72e358d08f6e267bcff4ab1f869a1bc. --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 35fe228c..a3534fe8 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,14 @@

CamTools: Camera Tools for Computer Vision

-[![docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) -[![formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) -[![unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) -[![pypi](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) -[![github](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) -[![gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) -[![pypi](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) +[![Docs](https://img.shields.io/badge/Docs-323940.svg?style=flat&logo=read-the-docs&logoColor=959DA5)](https://camtools.readthedocs.io/en/latest/?badge=latest) +[![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) +[![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) +[![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) +[![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) +[![GitHub](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) +[![Gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) +[![PyPI](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) CamTools is a collection of tools for handling cameras in computer vision. It can be used for plotting, converting, projecting, ray casting, and doing more From bc892660fd0bd49df19df3ca1a396c926913b516 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:59:18 +0800 Subject: [PATCH 45/59] chore(README.md): remove duplicate Docs badge to clean up documentation links --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index a3534fe8..bd64122d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@

CamTools: Camera Tools for Computer Vision

-[![Docs](https://img.shields.io/badge/Docs-323940.svg?style=flat&logo=read-the-docs&logoColor=959DA5)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) From 430a4294e723a6ecf361e0839a0275dc115084c6 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 01:59:24 +0800 Subject: [PATCH 46/59] docs(README.md): reorder badges to improve readability and consistency --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd64122d..035acca9 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,13 @@

CamTools: Camera Tools for Computer Vision

-[![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) [![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) [![GitHub](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) [![Gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) [![PyPI](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) +[![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) CamTools is a collection of tools for handling cameras in computer vision. It can be used for plotting, converting, projecting, ray casting, and doing more From 3d02dc5e163a7af5f7f92c60193b75b37cc18b9e Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:02:46 +0800 Subject: [PATCH 47/59] docs: update README.md with navigation links and remove redundant badges refactor(docs/conf.py): simplify path handling and use binary mode for reading pyproject.toml --- README.md | 7 +++++-- docs/conf.py | 7 ++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 035acca9..dadef172 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,14 @@

CamTools: Camera Tools for Computer Vision

+

+ Docs | Source Code | Installation +

+ + [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) [![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) -[![GitHub](https://img.shields.io/badge/GitHub-323940.svg?style=flat&logo=github&logoColor=959DA5)](https://github.com/yxlao/camtools) -[![Gitee](https://img.shields.io/badge/Gitee-323940.svg?style=flat&logo=gitee&logoColor=959DA5)](https://gitee.com/yxlao/camtools) [![PyPI](https://img.shields.io/pypi/v/camtools?style=flat&label=PyPI&logo=PyPI&logoColor=959DA5&labelColor=323940&color=808080)](https://pypi.org/project/camtools) [![Docs](https://readthedocs.org/projects/camtools/badge/?version=latest)](https://camtools.readthedocs.io/en/latest/?badge=latest) diff --git a/docs/conf.py b/docs/conf.py index fcf307bc..e177bf5c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,13 +11,10 @@ import subprocess import tomli -# Get script directory and workspace root -script_dir = os.path.dirname(os.path.abspath(__file__)) -workspace_root = os.path.abspath(os.path.join(script_dir, "..")) -sys.path.insert(0, workspace_root) +sys.path.insert(0, os.path.abspath("..")) # Read version from pyproject.toml -with open(os.path.join(workspace_root, "pyproject.toml"), "r", encoding="utf-8") as f: +with open("../pyproject.toml", "rb") as f: pyproject = tomli.loads(f.read()) version = pyproject["project"]["version"] From 5d5e8d7a0284f4405b2ab1db11351520f17f0ffb Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:04:36 +0800 Subject: [PATCH 48/59] docs(README.md): improve formatting and add link to camera coordinate system section --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dadef172..a0217906 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,13 @@ CamTools Logo

+

CamTools: Camera Tools for Computer Vision

- Docs | Source Code | Installation + Docs | + Camera Coordinates | + Installation

@@ -24,8 +27,13 @@ clear and easy-to-use APIs.

- - CamTools Logo + + CamTools Logo

From 0d956c8b46568b576505a98f3c70b2f853d992fd Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:05:59 +0800 Subject: [PATCH 49/59] docs(README.md): remove "Camera Coordinates" link from navigation for clarity and simplification --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index a0217906..e8cb2102 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@

Docs | - Camera Coordinates | Installation

From e0b81b187c21429af7fe48b1d0feed8003a02845 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:06:19 +0800 Subject: [PATCH 50/59] chore: remove extra blank line in README.md for consistency and cleaner formatting --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index e8cb2102..e2c84e14 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ Installation

- [![Formatter](https://github.com/yxlao/camtools/actions/workflows/formatter.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/formatter.yml) [![Unit Test](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/unit_test.yml) [![PyPI](https://github.com/yxlao/camtools/actions/workflows/pypi.yml/badge.svg)](https://github.com/yxlao/camtools/actions/workflows/pypi.yml) From c92e094fbeb8a4d406ccfff40f13d8cd8ea2e7b0 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:08:49 +0800 Subject: [PATCH 51/59] fix(docs/conf.py): decode file content as utf-8 when reading pyproject.toml to handle encoding properly --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index e177bf5c..ad912cc5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,7 @@ # Read version from pyproject.toml with open("../pyproject.toml", "rb") as f: - pyproject = tomli.loads(f.read()) + pyproject = tomli.loads(f.read().decode("utf-8")) version = pyproject["project"]["version"] # Get git commit hash From 0e83bd9952c9c453ae77090c03f9aad986f404bc Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:14:59 +0800 Subject: [PATCH 52/59] refactor(docs): simplify version retrieval by using camtools package instead of parsing pyproject.toml style(docs): replace raw HTML image tags with reStructuredText image directives for better compatibility and maintainability --- docs/conf.py | 8 +++----- docs/index.rst | 47 ++++++++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index ad912cc5..5f874b26 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,14 +9,12 @@ import os import sys import subprocess -import tomli +import camtools as ct sys.path.insert(0, os.path.abspath("..")) -# Read version from pyproject.toml -with open("../pyproject.toml", "rb") as f: - pyproject = tomli.loads(f.read().decode("utf-8")) -version = pyproject["project"]["version"] +# Get version from camtools package +version = ct.__version__ # Get git commit hash try: diff --git a/docs/index.rst b/docs/index.rst index 57eda9fe..405a1c2a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,28 +1,38 @@ Welcome to CamTools =================== -.. raw:: html +.. only:: not latex -

- - - CamTools Logo - -

+ .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camtools_logo_light.png + :width: 360 + :align: center + :alt: CamTools Logono + :class: only-light + + .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camtools_logo_dark.png + :width: 360 + :align: center + :alt: CamTools Logo + :class: only-dark CamTools is a collection of tools for handling cameras in computer vision. It can be used for plotting, converting, projecting, ray casting, and doing more with camera parameters. It follows the standard camera coordinate system with clear and easy-to-use APIs. -.. raw:: html +.. only:: not latex -

- - - Camera Coordinates - -

+ .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camera_coordinates_light.png + :width: 520 + :align: center + :alt: Camera Coordinates + :class: only-light + + .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camera_coordinates_dark.png + :width: 520 + :align: center + :alt: Camera Coordinates + :class: only-dark .. toctree:: :maxdepth: 1 @@ -50,11 +60,10 @@ Useful for debugging 3D reconstruction and NeRFs! cameras = ct.camera.create_camera_frustums(Ks, Ts) o3d.visualization.draw_geometries([cameras]) -.. raw:: html - -

- -

+.. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camera_frames.png + :width: 360 + :align: center + :alt: Camera Frames 2. Convert camera parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 5f491e32a87261c42320b8e477fa3ad88afb3044 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:15:42 +0800 Subject: [PATCH 53/59] chore(docs/conf.py): remove unused import of camtools module to clean up code --- docs/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 5f874b26..3fe53682 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -94,6 +94,5 @@ # Custom module name display modindex_common_prefix = ["camtools."] # Strip 'camtools.' from module index -import camtools as ct sys.modules["ct"] = ct # Allow using 'ct' as an alias in documentation From f208cf0e1757a2fee8b107890c1abb97ab11ad45 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:30:10 +0800 Subject: [PATCH 54/59] docs(api): restructure and expand API documentation with new modules and updated toctree depth --- docs/api.rst | 12 +++++++++--- docs/api/artifact.rst | 9 +++++++++ docs/api/colormap.rst | 9 +++++++++ docs/api/geometry.rst | 9 +++++++++ docs/api/image.rst | 9 +++++++++ docs/api/normalize.rst | 9 +++++++++ docs/api/sanity.rst | 9 +++++++++ docs/api/solver.rst | 9 +++++++++ docs/api/transform.rst | 9 +++++++++ docs/api/util.rst | 9 +++++++++ 10 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 docs/api/artifact.rst create mode 100644 docs/api/colormap.rst create mode 100644 docs/api/geometry.rst create mode 100644 docs/api/image.rst create mode 100644 docs/api/normalize.rst create mode 100644 docs/api/sanity.rst create mode 100644 docs/api/solver.rst create mode 100644 docs/api/transform.rst create mode 100644 docs/api/util.rst diff --git a/docs/api.rst b/docs/api.rst index 755da7de..fcde9bd0 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,14 +1,20 @@ API Reference -============= +============ .. toctree:: - :maxdepth: 2 + :maxdepth: 1 api/camera - api/colmap + api/colormap api/convert + api/geometry + api/image api/io api/metric + api/normalize api/project api/raycast api/render + api/solver + api/transform + api/util diff --git a/docs/api/artifact.rst b/docs/api/artifact.rst new file mode 100644 index 00000000..7b818f7b --- /dev/null +++ b/docs/api/artifact.rst @@ -0,0 +1,9 @@ +ct.artifact +=========== + +.. currentmodule:: ct + +.. automodule:: ct.artifact + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/colormap.rst b/docs/api/colormap.rst new file mode 100644 index 00000000..ae2257e9 --- /dev/null +++ b/docs/api/colormap.rst @@ -0,0 +1,9 @@ +ct.colormap +=========== + +.. currentmodule:: ct + +.. automodule:: ct.colormap + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/geometry.rst b/docs/api/geometry.rst new file mode 100644 index 00000000..88b6d8aa --- /dev/null +++ b/docs/api/geometry.rst @@ -0,0 +1,9 @@ +ct.geometry +=========== + +.. currentmodule:: ct + +.. automodule:: ct.geometry + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/image.rst b/docs/api/image.rst new file mode 100644 index 00000000..3d4344b1 --- /dev/null +++ b/docs/api/image.rst @@ -0,0 +1,9 @@ +ct.image +======== + +.. currentmodule:: ct + +.. automodule:: ct.image + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/normalize.rst b/docs/api/normalize.rst new file mode 100644 index 00000000..94df2fb1 --- /dev/null +++ b/docs/api/normalize.rst @@ -0,0 +1,9 @@ +ct.normalize +=========== + +.. currentmodule:: ct + +.. automodule:: ct.normalize + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/sanity.rst b/docs/api/sanity.rst new file mode 100644 index 00000000..b36bcf7c --- /dev/null +++ b/docs/api/sanity.rst @@ -0,0 +1,9 @@ +ct.sanity +========= + +.. currentmodule:: ct + +.. automodule:: ct.sanity + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/solver.rst b/docs/api/solver.rst new file mode 100644 index 00000000..a2b87679 --- /dev/null +++ b/docs/api/solver.rst @@ -0,0 +1,9 @@ +ct.solver +========= + +.. currentmodule:: ct + +.. automodule:: ct.solver + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/transform.rst b/docs/api/transform.rst new file mode 100644 index 00000000..89fca24e --- /dev/null +++ b/docs/api/transform.rst @@ -0,0 +1,9 @@ +ct.transform +=========== + +.. currentmodule:: ct + +.. automodule:: ct.transform + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/util.rst b/docs/api/util.rst new file mode 100644 index 00000000..1d8b65c2 --- /dev/null +++ b/docs/api/util.rst @@ -0,0 +1,9 @@ +ct.util +======= + +.. currentmodule:: ct + +.. automodule:: ct.util + :members: + :undoc-members: + :show-inheritance: From acfd0f199d65d18173d268c1aa4c58fdc255af2b Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:32:54 +0800 Subject: [PATCH 55/59] docs: remove unused API documentation files and suppress toctree warnings --- docs/api.rst | 8 +++++++- docs/api/artifact.rst | 9 --------- docs/api/colmap.rst | 9 --------- docs/api/sanity.rst | 9 --------- docs/api/util.rst | 9 --------- docs/conf.py | 5 +++++ 6 files changed, 12 insertions(+), 37 deletions(-) delete mode 100644 docs/api/artifact.rst delete mode 100644 docs/api/colmap.rst delete mode 100644 docs/api/sanity.rst delete mode 100644 docs/api/util.rst diff --git a/docs/api.rst b/docs/api.rst index fcde9bd0..b1cf0bdc 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,4 +17,10 @@ API Reference api/render api/solver api/transform - api/util + + +.. Not included in the docs website: +.. api/artifact +.. api/colmap +.. api/sanity +.. api/util diff --git a/docs/api/artifact.rst b/docs/api/artifact.rst deleted file mode 100644 index 7b818f7b..00000000 --- a/docs/api/artifact.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.artifact -=========== - -.. currentmodule:: ct - -.. automodule:: ct.artifact - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/colmap.rst b/docs/api/colmap.rst deleted file mode 100644 index 1a4f4185..00000000 --- a/docs/api/colmap.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.colmap -========= - -.. currentmodule:: ct - -.. automodule:: ct.colmap - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/sanity.rst b/docs/api/sanity.rst deleted file mode 100644 index b36bcf7c..00000000 --- a/docs/api/sanity.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.sanity -========= - -.. currentmodule:: ct - -.. automodule:: ct.sanity - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/util.rst b/docs/api/util.rst deleted file mode 100644 index 1d8b65c2..00000000 --- a/docs/api/util.rst +++ /dev/null @@ -1,9 +0,0 @@ -ct.util -======= - -.. currentmodule:: ct - -.. automodule:: ct.util - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 3fe53682..ee7be984 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -96,3 +96,8 @@ modindex_common_prefix = ["camtools."] # Strip 'camtools.' from module index sys.modules["ct"] = ct # Allow using 'ct' as an alias in documentation + +# Suppress specific warnings +suppress_warnings = [ + "toctree.excluded", # Suppress warnings about files not in any toctree +] From c554a564213096f960d135ee3b5dc5cec0e0b8a1 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:36:52 +0800 Subject: [PATCH 56/59] docs: add repo link to README and update documentation header in index.rst for better navigation and consistency --- README.md | 1 + docs/index.rst | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e2c84e14..92568783 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@

Docs | + Repo | Installation

diff --git a/docs/index.rst b/docs/index.rst index 405a1c2a..2cac033f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ -Welcome to CamTools -=================== +CamTools Documentation +====================== .. only:: not latex @@ -15,6 +15,14 @@ Welcome to CamTools :alt: CamTools Logo :class: only-dark +.. raw:: html + +

+ Docs | + Repo | + Installation +

+ CamTools is a collection of tools for handling cameras in computer vision. It can be used for plotting, converting, projecting, ray casting, and doing more with camera parameters. It follows the standard camera coordinate system with From 21910803db64a4c8f90d314b2a11d9bb9d0a6b8c Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:54:44 +0800 Subject: [PATCH 57/59] chore: rename GitHub workflow from 'Documentation' to 'Docs' for brevity --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4720887d..557aeb97 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,4 +1,4 @@ -name: Documentation +name: Docs on: workflow_dispatch: From c45bd1a9ffe05a4e09bb7767353c70d5d697521e Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 02:59:06 +0800 Subject: [PATCH 58/59] docs: add camera.rst documentation and update index.rst to reference it --- docs/{coordinates.rst => camera.rst} | 14 ++++++++++++++ docs/index.rst | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) rename docs/{coordinates.rst => camera.rst} (91%) diff --git a/docs/coordinates.rst b/docs/camera.rst similarity index 91% rename from docs/coordinates.rst rename to docs/camera.rst index 512268b1..5676b3a0 100644 --- a/docs/coordinates.rst +++ b/docs/camera.rst @@ -1,6 +1,20 @@ Camera Coordinates ================== +.. only:: not latex + + .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camera_coordinates_light.png + :width: 520 + :align: center + :alt: Camera Coordinates + :class: only-light + + .. image:: https://raw.githubusercontent.com/yxlao/camtools/main/camtools/assets/camera_coordinates_dark.png + :width: 520 + :align: center + :alt: Camera Coordinates + :class: only-dark + A homogeneous point ``[X, Y, Z, 1]`` in the world coordinate can be projected to a homogeneous point ``[x, y, 1]`` in the image (pixel) coordinate using the following equation: diff --git a/docs/index.rst b/docs/index.rst index 2cac033f..b548a038 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,7 +47,7 @@ clear and easy-to-use APIs. :caption: Docs Home - coordinates + camera installation contributing api From 5b774a806936909503d34b400476baa2d074e6ea Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 29 Dec 2024 03:00:22 +0800 Subject: [PATCH 59/59] docs(index.rst): remove installation link from documentation index to simplify navigation --- docs/index.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index b548a038..5d199762 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,8 +19,7 @@ CamTools Documentation

Docs | - Repo | - Installation + Repo

CamTools is a collection of tools for handling cameras in computer vision. It