fix: opencv<->opengl T and pose convert (#52)

yxlao · Apr 7, 2024 · 8fe8dd1 · 8fe8dd1
1 parent 9aa2347
commit 8fe8dd1
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -170,12 +170,13 @@ the beginning of the README.
 - **Camera coordinate:** right-handed, with $Z$ pointing away from the camera
   towards the view direction and $Y$ axis pointing down. Note that the OpenCV
   convention (camtools' default) is different from the OpenGL/Blender
-  convention, where $Z$ points towards the opposite view direction and the $Y$
-  axis points up. To convert between the OpenCV camera coordinates and the
-  OpenGL-style coordinates, use the conversion functions such as
-  `ct.convert.T_opencv_to_opengl()`, `ct.convert.T_opengl_to_opencv()`,
-  `ct.convert.pose_opencv_to_opengl()`, and
-  `ct.convert.pose_opengl_to_opencv()`, etc.
+  convention, where $Z$ points towards the opposite view direction, $Y$ points
+  up and $X$ points right. To convert between the OpenCV camera coordinates and
+  the OpenGL-style coordinates, use the conversion functions:
+  - `ct.convert.T_opencv_to_opengl()`
+  - `ct.convert.T_opengl_to_opencv()`
+  - `ct.convert.pose_opencv_to_opengl()`
+  - `ct.convert.pose_opengl_to_opencv()`
 - **Image coordinate:** starts from the top-left corner of the image, with $x$
   pointing right (corresponding to the image width) and $y$ pointing down
   (corresponding to the image height). This is consistent with OpenCV. Pay

diff --git a/camtools/convert.py b/camtools/convert.py
@@ -166,7 +166,7 @@ def pose_to_T(pose):
     return np.linalg.inv(pose)
 
 
-def T_opengl_to_opencv(T_opengl):
+def T_opengl_to_opencv(T):
     """
     Convert T from OpenGL convention to OpenCV convention.
 
@@ -181,16 +181,16 @@ def T_opengl_to_opencv(T_opengl):
         - +Z: The negative view direction, pointing back and away from the camera
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
+          https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
     """
-    sanity.assert_T(T_opengl)
-
-    R_b2p = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
-
-    R_opengl, t_opengl = T_to_R_t(T_opengl)
-    R = R_b2p @ R_opengl
-    t = t_opengl @ R_b2p
-    T = R_t_to_T(R, t)
-
+    sanity.assert_T(T)
+    # pose = T_to_pose(T)
+    # pose = pose_opengl_to_opencv(pose)
+    # T = pose_to_T(pose)
+    T = np.copy(T)
+    T[1:3, 0:4] *= -1
+    T = T[:, [1, 0, 2, 3]]
+    T[:, 2] *= -1
     return T
 
 
@@ -209,21 +209,20 @@ def T_opencv_to_opengl(T):
         - +Z: The negative view direction, pointing back and away from the camera
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
+          https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
     """
     sanity.assert_T(T)
-
-    R_b2p = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
-    R_p2b = R_b2p.T
-
-    R, t = T_to_R_t(T)
-    R_opengl = R_p2b @ R
-    t_opengl = t @ R_p2b
-    T_opengl = R_t_to_T(R_opengl, t_opengl)
-
-    return T_opengl
+    # pose = T_to_pose(T)
+    # pose = pose_opencv_to_opengl(pose)
+    # T = pose_to_T(pose)
+    T = np.copy(T)
+    T[:, 2] *= -1
+    T = T[:, [1, 0, 2, 3]]
+    T[1:3, 0:4] *= -1
+    return T
 
 
-def pose_opengl_to_opencv(pose_opengl):
+def pose_opengl_to_opencv(pose):
     """
     Convert pose from OpenGL convention to OpenCV convention.
 
@@ -238,9 +237,12 @@ def pose_opengl_to_opencv(pose_opengl):
         - +Z: The negative view direction, pointing back and away from the camera
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
+          https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
     """
-    sanity.assert_pose(pose_opengl)
-    pose = np.copy(pose_opengl)
+    sanity.assert_pose(pose)
+    pose = np.copy(pose)
+    pose[2, :] *= -1
+    pose = pose[[1, 0, 2, 3], :]
     pose[0:3, 1:3] *= -1
     return pose
 
@@ -260,11 +262,14 @@ def pose_opencv_to_opengl(pose):
         - +Z: The negative view direction, pointing back and away from the camera
         - -Z: The view direction
         - Used in: OpenGL, Blender, Nerfstudio
+          https://docs.nerf.studio/quickstart/data_conventions.html#coordinate-conventions
     """
     sanity.assert_pose(pose)
-    pose_opengl = np.copy(pose)
-    pose_opengl[0:3, 1:3] *= -1
-    return pose_opengl
+    pose = np.copy(pose)
+    pose[0:3, 1:3] *= -1
+    pose = pose[[1, 0, 2, 3], :]
+    pose[2, :] *= -1
+    return pose
 
 
 def R_t_to_C(R, t):

diff --git a/test/test_convert.py b/test/test_convert.py
@@ -103,3 +103,134 @@ def HouseHolderQR(A):
     # print(f"> Rotation {R.shape}:\n{R}")
     # print(f"> Translation {t.shape}:\n{t}")
     # print(f"> Projection {P.shape}:\n{P}")
+
+
+def test_convert_pose_opencv_opengl():
+
+    def gen_random_pose():
+        axis = np.random.normal(size=3)
+        axis = axis / np.linalg.norm(axis)
+        angle = np.random.uniform(0, 2 * np.pi)
+        # Skew-symmetric matrix
+        ss = np.array(
+            [
+                [0, -axis[2], axis[1]],
+                [axis[2], 0, -axis[0]],
+                [-axis[1], axis[0], 0],
+            ]
+        )
+        RT = np.eye(3) + np.sin(angle) * ss + (1 - np.cos(angle)) * np.dot(ss, ss)
+        c = np.random.uniform(-10, 10, size=(3,))
+        pose = np.eye(4)
+        pose[:3, :3] = RT
+        pose[:3, 3] = c
+
+        return pose
+
+    for _ in range(10):
+        pose = gen_random_pose()
+        T = ct.convert.pose_to_T(pose)
+
+        # Test convert pose bidirectionally
+        pose_cv = np.copy(pose)
+        pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv)
+        pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl)
+        pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered)
+        np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5)
+
+        # Test convert T bidirectionally
+        T_cv = np.copy(T)
+        T_gl = ct.convert.T_opencv_to_opengl(T_cv)
+        T_cv_recovered = ct.convert.T_opengl_to_opencv(T_gl)
+        T_gl_recovered = ct.convert.T_opencv_to_opengl(T_cv_recovered)
+        np.testing.assert_allclose(T_cv, T_cv_recovered, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(T_gl, T_gl_recovered, rtol=1e-5, atol=1e-5)
+
+        # Test T and pose are consistent across conversions
+        np.testing.assert_allclose(
+            pose_cv,
+            ct.convert.T_to_pose(T_cv),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            pose_gl,
+            ct.convert.T_to_pose(T_gl),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            pose_cv_recovered,
+            ct.convert.T_to_pose(T_cv_recovered),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            pose_gl_recovered,
+            ct.convert.T_to_pose(T_gl_recovered),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+
+
+def test_convert_T_opencv_to_opengl():
+
+    def gen_random_T():
+        R = ct.convert.roll_pitch_yaw_to_R(
+            np.random.uniform(-np.pi, np.pi),
+            np.random.uniform(-np.pi, np.pi),
+            np.random.uniform(-np.pi, np.pi),
+        )
+        t = np.random.uniform(-10, 10, size=(3,))
+        T = np.eye(4)
+        T[:3, :3] = R
+        T[:3, 3] = t
+
+        return T
+
+    for _ in range(10):
+        T = gen_random_T()
+        pose = ct.convert.T_to_pose(T)
+
+        # Test convert T bidirectionally
+        T_cv = np.copy(T)
+        T_gl = ct.convert.T_opencv_to_opengl(T_cv)
+        T_cv_recovered = ct.convert.T_opengl_to_opencv(T_gl)
+        T_gl_recovered = ct.convert.T_opencv_to_opengl(T_cv_recovered)
+        np.testing.assert_allclose(T_cv, T_cv_recovered, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(T_gl, T_gl_recovered, rtol=1e-5, atol=1e-5)
+
+        # Test convert pose bidirectionally
+        pose_cv = np.copy(pose)
+        pose_gl = ct.convert.pose_opencv_to_opengl(pose_cv)
+        pose_cv_recovered = ct.convert.pose_opengl_to_opencv(pose_gl)
+        pose_gl_recovered = ct.convert.pose_opencv_to_opengl(pose_cv_recovered)
+        np.testing.assert_allclose(pose_cv, pose_cv_recovered, rtol=1e-5, atol=1e-5)
+        np.testing.assert_allclose(pose_gl, pose_gl_recovered, rtol=1e-5, atol=1e-5)
+
+        # Test T and pose are consistent across conversions
+        np.testing.assert_allclose(
+            T_cv,
+            ct.convert.pose_to_T(pose_cv),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            T_gl,
+            ct.convert.pose_to_T(pose_gl),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            T_cv_recovered,
+            ct.convert.pose_to_T(pose_cv_recovered),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        np.testing.assert_allclose(
+            T_gl_recovered,
+            ct.convert.pose_to_T(pose_gl_recovered),
+            rtol=1e-5,
+            atol=1e-5,
+        )