Unity-Technologies · clementinboittiaux · Jun 26, 2024
diff --git a/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs b/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs
@@ -24,6 +24,7 @@ public override void OnInspectorGUI()
                 EditorGUILayout.PropertyField(so.FindProperty("m_Width"), true);
                 EditorGUILayout.PropertyField(so.FindProperty("m_Height"), true);
                 EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true);
+                EditorGUILayout.PropertyField(so.FindProperty("m_RGBD"), true);
                 EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
                 EditorGUILayout.PropertyField(so.FindProperty("m_ObservationType"), true);
             }

diff --git a/com.unity.ml-agents/Runtime/Resources.meta b/com.unity.ml-agents/Runtime/Resources.meta
diff --git a/com.unity.ml-agents/Runtime/Resources/DepthShader.shader b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader
@@ -0,0 +1,64 @@
+Shader "Custom/DepthShader"
+{
+    Properties
+    {
+        _MainTex ("Texture", 2D) = "white" {}
+    }
+    SubShader
+    {
+        Pass
+        {
+            CGPROGRAM
+            #pragma vertex vert
+            #pragma fragment frag
+
+            #include "UnityCG.cginc"
+
+            struct appdata
+            {
+                float4 vertex : POSITION;
+                float2 uv : TEXCOORD0;
+            };
+
+            struct v2f
+            {
+                float2 uv : TEXCOORD0;
+                float4 vertex : SV_POSITION;
+                float4 screenPos: TEXTCOORD1;
+            };
+
+            v2f vert (appdata v)
+            {
+                v2f o;
+                o.vertex = UnityObjectToClipPos(v.vertex);
+                o.screenPos = ComputeScreenPos(o.vertex);
+                o.uv = v.uv;
+                return o;
+            }
+
+            sampler2D _MainTex, _CameraDepthTexture;
+
+            float4 frag (v2f i) : SV_Target
+            {
+                // Extract color from texture
+                float4 color = tex2D(_MainTex, i.uv);
+
+                // Extract depth from camera depth texture
+                float depth = LinearEyeDepth(tex2D(_CameraDepthTexture, i.screenPos.xy));
+
+                // Clip depth to far plane
+                float farPlane = _ProjectionParams.z;
+                if (depth > farPlane) depth = 0;
+
+                // Convert color from linear to sRGB
+                color.rgb = LinearToGammaSpace(saturate(color.rgb));
+
+                // Store depth in alpha channel
+                color.a = depth;
+
+                return color;
+            }
+            ENDCG
+        }
+    }
+}
diff --git a/com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta b/com.unity.ml-agents/Runtime/Resources/DepthShader.shader.meta
diff --git a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
@@ -13,11 +13,19 @@ public class CameraSensor : ISensor, IBuiltInSensor, IDisposable
         int m_Width;
         int m_Height;
         bool m_Grayscale;
+        bool m_RGBD;
         string m_Name;
         private ObservationSpec m_ObservationSpec;
         SensorCompressionType m_CompressionType;
         Texture2D m_Texture;
 
+        /// <summary>
+        /// Indicates wether or not the Render method is being executed by CameraSensor.
+        /// This boolean is checked in CameraSensorComponent.OnRenderImage method to avoid
+        /// applying the depth shader outside of the camera sensor scope.
+        /// </summary>
+        public bool m_InCameraSensorRender { get; private set; }
+
         /// <summary>
         /// The Camera used for rendering the sensor observations.
         /// </summary>
@@ -47,17 +55,19 @@ public SensorCompressionType CompressionType
         /// <param name="compression">The compression to apply to the generated image.</param>
         /// <param name="observationType">The type of observation.</param>
         public CameraSensor(
-            Camera camera, int width, int height, bool grayscale, string name, SensorCompressionType compression, ObservationType observationType = ObservationType.Default)
+            Camera camera, int width, int height, bool grayscale, bool rgbd, string name, SensorCompressionType compression, ObservationType observationType = ObservationType.Default)
         {
             m_Camera = camera;
             m_Width = width;
             m_Height = height;
             m_Grayscale = grayscale;
+            m_RGBD = rgbd;
             m_Name = name;
-            var channels = grayscale ? 1 : 3;
+            var channels = rgbd ? 4 : grayscale ? 1 : 3;  // RGBD has priority over Grayscale
             m_ObservationSpec = ObservationSpec.Visual(channels, height, width, observationType);
             m_CompressionType = compression;
-            m_Texture = new Texture2D(width, height, TextureFormat.RGB24, false);
+            m_Texture = new Texture2D(width, height, rgbd ? TextureFormat.RGBAFloat : TextureFormat.RGB24, false);
+            m_InCameraSensorRender = false;
         }
 
         /// <summary>
@@ -90,8 +100,11 @@ public byte[] GetCompressedObservation()
             using (TimerStack.Instance.Scoped("CameraSensor.GetCompressedObservation"))
             {
                 // TODO support more types here, e.g. JPG
-                var compressed = m_Texture.EncodeToPNG();
-                return compressed;
+                if (m_CompressionType == SensorCompressionType.OPENEXR)
+                {
+                    return m_Texture.EncodeToEXR();
+                }
+                return m_Texture.EncodeToPNG();
             }
         }
 
@@ -104,7 +117,7 @@ public int Write(ObservationWriter writer)
         {
             using (TimerStack.Instance.Scoped("CameraSensor.WriteToTensor"))
             {
-                var numWritten = writer.WriteTexture(m_Texture, m_Grayscale);
+                var numWritten = writer.WriteTexture(m_Texture, m_Grayscale, m_RGBD);
                 return numWritten;
             }
         }
@@ -131,7 +144,7 @@ public CompressionSpec GetCompressionSpec()
         /// <param name="texture2D">Texture2D to render to.</param>
         /// <param name="width">Width of resulting 2D texture.</param>
         /// <param name="height">Height of resulting 2D texture.</param>
-        public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, int width, int height)
+        public void ObservationToTexture(Camera obsCamera, Texture2D texture2D, int width, int height)
         {
             if (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Null)
             {
@@ -140,9 +153,9 @@ public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, i
 
             var oldRec = obsCamera.rect;
             obsCamera.rect = new Rect(0f, 0f, 1f, 1f);
-            var depth = 24;
-            var format = RenderTextureFormat.Default;
-            var readWrite = RenderTextureReadWrite.Default;
+            var depth = m_RGBD ? 32 : 24;
+            var format = m_RGBD ? RenderTextureFormat.ARGBFloat : RenderTextureFormat.Default;
+            var readWrite = m_RGBD ? RenderTextureReadWrite.Linear : RenderTextureReadWrite.Default;
 
             var tempRt =
                 RenderTexture.GetTemporary(width, height, depth, format, readWrite);
@@ -154,8 +167,12 @@ public static void ObservationToTexture(Camera obsCamera, Texture2D texture2D, i
             RenderTexture.active = tempRt;
             obsCamera.targetTexture = tempRt;
 
+            m_InCameraSensorRender = true;
+
             obsCamera.Render();
 
+            m_InCameraSensorRender = false;
+
             texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);
 
             obsCamera.targetTexture = prevCameraRt;

diff --git a/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs b/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
@@ -67,13 +67,26 @@ public int Height
         bool m_Grayscale;
 
         /// <summary>
-        /// Whether to generate grayscale images or color.
+        /// Whether to generate grayscale images or color. Disable RGBD to use it.
         /// Note that changing this after the sensor is created has no effect.
         /// </summary>
         public bool Grayscale
         {
             get { return m_Grayscale; }
-            set { m_Grayscale = value; }
+            set { m_Grayscale = value; UpdateSensor(); }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("rgbd")]
+        bool m_RGBD;
+
+        /// <summary>
+        /// Whether to generate color+depth images. RGBD has priority over Grayscale.
+        /// Note that changing this after the sensor is created has no effect.
+        /// </summary>
+        public bool RGBD
+        {
+            get { return m_RGBD; }
+            set { m_RGBD = value; UpdateSensor(); }
         }
 
         [HideInInspector, SerializeField]
@@ -130,9 +143,15 @@ public int ObservationStacks
             set { m_ObservationStacks = value; }
         }
 
+        /// <summary>
+        /// The material used to render the depth image.
+        /// </summary>
+        private Material m_DepthMaterial;
+
         void Start()
         {
             UpdateSensor();
+            m_DepthMaterial = new Material(Shader.Find("Custom/DepthShader"));
         }
 
         /// <summary>
@@ -142,7 +161,7 @@ void Start()
         public override ISensor[] CreateSensors()
         {
             Dispose();
-            m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, Grayscale, m_SensorName, m_Compression, m_ObservationType);
+            m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, Grayscale, RGBD, m_SensorName, m_Compression, m_ObservationType);
 
             if (ObservationStacks != 1)
             {
@@ -158,6 +177,14 @@ internal void UpdateSensor()
         {
             if (m_Sensor != null)
             {
+                // Update depth settings before camera settings because m_Compression might change
+                if (m_RGBD)
+                {
+                    m_Grayscale = false;
+                    m_Compression = SensorCompressionType.OPENEXR;
+                }
+
+                // Update camera settings
                 m_Sensor.Camera = m_Camera;
                 m_Sensor.CompressionType = m_Compression;
                 m_Sensor.Camera.enabled = m_RuntimeCameraEnable;
@@ -175,5 +202,20 @@ public void Dispose()
                 m_Sensor = null;
             }
         }
+
+        /// <summary>
+        /// Apply the depth material to the camera image if the sensor is set to RGBD.
+        /// </summary>
+        void OnRenderImage(RenderTexture src, RenderTexture dest)
+        {
+            if (m_RGBD && m_Sensor != null && m_Sensor.m_InCameraSensorRender)
+            {
+                Graphics.Blit(src, dest, m_DepthMaterial);
+            }
+            else
+            {
+                Graphics.Blit(src, dest);
+            }
+        }
     }
 }
diff --git a/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs b/com.unity.ml-agents/Runtime/Sensors/CompressionSpec.cs
@@ -14,7 +14,12 @@ public enum SensorCompressionType
         /// <summary>
         /// PNG format. Data will be stored in binary format.
         /// </summary>
-        PNG
+        PNG,
+
+        /// <summary>
+        /// OpenEXR format.
+        /// </summary>
+        OPENEXR
     }
 
     /// <summary>

diff --git a/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs b/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
@@ -296,7 +296,8 @@ public static class ObservationWriterExtension
         public static int WriteTexture(
             this ObservationWriter obsWriter,
             Texture2D texture,
-            bool grayScale)
+            bool grayScale,
+            bool rgbd = false)
         {
             if (texture.format == TextureFormat.RGB24)
             {
@@ -306,7 +307,7 @@ public static int WriteTexture(
             var width = texture.width;
             var height = texture.height;
 
-            var texturePixels = texture.GetPixels32();
+            var texturePixels = texture.GetPixels();
 
             // During training, we convert from Texture to PNG before sending to the trainer, which has the
             // effect of flipping the image. We need another flip here at inference time to match this.
@@ -316,22 +317,25 @@ public static int WriteTexture(
                 {
                     var currentPixel = texturePixels[(height - h - 1) * width + w];
 
-                    if (grayScale)
+                    if (grayScale && !rgbd)
                     {
                         obsWriter[0, h, w] =
-                            (currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
+                            (currentPixel.r + currentPixel.g + currentPixel.b) / 3f;
                     }
                     else
                     {
-                        // For Color32, the r, g and b values are between 0 and 255.
-                        obsWriter[0, h, w] = currentPixel.r / 255.0f;
-                        obsWriter[1, h, w] = currentPixel.g / 255.0f;
-                        obsWriter[2, h, w] = currentPixel.b / 255.0f;
+                        obsWriter[0, h, w] = currentPixel.r;
+                        obsWriter[1, h, w] = currentPixel.g;
+                        obsWriter[2, h, w] = currentPixel.b;
+                        if (rgbd)
+                        {
+                            obsWriter[3, h, w] = currentPixel.a;
+                        }
                     }
                 }
             }
 
-            return height * width * (grayScale ? 1 : 3);
+            return height * width * (rgbd ? 4 : grayScale ? 1 : 3);
         }
 
         internal static int WriteTextureRGB24(