Unity-Technologies · vadim0x60 · Feb 16, 2024
diff --git a/colab/Colab_UnityEnvironment_1_Run.ipynb b/colab/Colab_UnityEnvironment_1_Run.ipynb
diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
@@ -161,8 +161,8 @@
     "from pathlib import Path\n",
     "from typing import Callable, Any\n",
     "\n",
-    "import gym\n",
-    "from gym import Env\n",
+    "import gymnasium as gym\n",
+    "from gymnasium import Env\n",
     "\n",
     "from stable_baselines3 import PPO\n",
     "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",

diff --git a/docs/Installation-Anaconda-Windows.md b/docs/Installation-Anaconda-Windows.md
@@ -144,7 +144,7 @@ reinforcement learning trainers to use with Unity environments.
 The `ml-agents-envs` subdirectory contains a Python API to interface with Unity,
 which the `ml-agents` package depends on.
 
-The `gym-unity` subdirectory contains a package to interface with OpenAI Gym.
+The `gym-unity` subdirectory contains a package to interface with Gymnasium.
 
 Keep in mind where the files were downloaded, as you will need the trainer
 config files in this directory when running `mlagents-learn`. Make sure you are

diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md
@@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene.
 Add the following code to the `train_unity.py` file:
 
 ```python
-import gym
+import gymnasium as gym
 
 from baselines import deepq
 from baselines import logger

diff --git a/localized_docs/KR/docs/Installation-Anaconda-Windows.md b/localized_docs/KR/docs/Installation-Anaconda-Windows.md
@@ -112,7 +112,7 @@ git clone https://github.com/Unity-Technologies/ml-agents.git
 
 `ml-agents-envs` ���� ���丮���� `ml-agents` ��Ű���� ���ӵǴ� ����Ƽ�� �������̽��� ���� ���̽� API�� ���ԵǾ� �ֽ��ϴ�.
 
-`gym-unity` ���� ���丮���� OpenAI Gym�� �������̽��� ���� ��Ű���� ���ԵǾ� �ֽ��ϴ�.
+`gym-unity` ���� ���丮���� Gymnasium�� �������̽��� ���� ��Ű���� ���ԵǾ� �ֽ��ϴ�.
 
 `mlagents-learn`�� ������ �� Ʈ���̳��� ȯ�� ���� ������ �� ���丮 �ȿ� �ʿ��ϹǷ�, ������ �ٿ�ε� �� ���丮�� ��ġ�� ����Ͻʽÿ�.
 ���ͳ��� ����Ǿ����� Ȯ���ϰ� Anaconda ������Ʈ���� ���� ��ɾ Ÿ���� �Ͻʽÿ�t:

diff --git a/localized_docs/KR/docs/Installation.md b/localized_docs/KR/docs/Installation.md
@@ -36,7 +36,7 @@ git clone https://github.com/Unity-Technologies/ml-agents.git
 
 `ml-agents-envs` 하위 디렉토리에는 `ml-agents` 패키지에 종속되는 유니티의 인터페이스를 위한 파이썬 API가 포함되어 있습니다.
 
-`gym-unity` 하위 디렉토리에는 OpenAI Gym의 인터페이스를 위한 패키지가 포함되어 있습니다.
+`gym-unity` 하위 디렉토리에는 Gymnasium의 인터페이스를 위한 패키지가 포함되어 있습니다.
 
 ### 파이썬과 mlagents 패키지 설치
 

diff --git a/localized_docs/RU/docs/Установка.md b/localized_docs/RU/docs/Установка.md
@@ -12,7 +12,7 @@ ML-Agents Toolkit состоит из нескольких компоненто
   API для взаимодействия с Unity сценой. Этот пакет управляет передачей данных между Unity сценой и алгоритмами
   машинного обучения, реализованных на Python. Пакет mlagents зависит от mlagents_envs.
   - ([`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/main/gym-unity)) - позволяет обернуть вашу сцену
-  в Unity в среду OpenAI Gym.
+  в Unity в среду Gymnasium.
 - Unity [Project](https://github.com/Unity-Technologies/ml-agents/tree/main/Project),
 содержащий [примеры сцены](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md),
 где реализованы различные возможности ML-Agents для наглядности.

diff --git a/localized_docs/TR/docs/Installation.md b/localized_docs/TR/docs/Installation.md
@@ -7,7 +7,7 @@ ML-Agents Araç Seti birkaç bileşen içermektedir:
   - [`mlagents`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/ml-agents) Unity sahnenizdeki davranışları eğitmenizi sağlayan makine öğrenimi algoritmalarını içerir. Bu nedenle `mlagents` paketini kurmanız gerekecek.
   - [`mlagents_envs`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/ml-agents-envs) Unity sahnesiyle etkileşime girmek için Python API içermektedir. Unity sahnesi ile Python makine öğrenimi algoritmaları arasında veri mesajlaşmasını kolaylaştıran temel bir katmandır.
     Sonuç olarak, `mlagents,` `mlagents_envs` apisine bağımlıdır.
-  - [`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/gym-unity) OpenAI Gym arayüzünü destekleyen Unity sahneniz için bir Python kapsayıcı sağlar.
+  - [`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/gym-unity) Gymnasium arayüzünü destekleyen Unity sahneniz için bir Python kapsayıcı sağlar.
   <!-- düzenle learning-envir... -->
 - Unity [Project](../Project/) klasörü
   [örnek ortamlar](Learning-Environment-Examples.md) ile başlamanıza yardımcı olacak araç setinin çeşitli özelliklerini vurgulayan sahneler içermektedir.

diff --git a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
@@ -1,10 +1,16 @@
+"""
+An adapter between Unity ml-agents BaseEnv and Gymnasium Env.
+
+Remixed from https://github.com/Unity-Technologies/ml-agents/blob/develop/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py
+"""
+
 import itertools
 
 import numpy as np
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import gym
-from gym import error, spaces
+import gymnasium as gym
+from gymnasium import error, spaces
 
 from mlagents_envs.base_env import ActionTuple, BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
@@ -20,7 +26,7 @@ class UnityGymException(error.Error):
 
 
 logger = logging_util.get_logger(__name__)
-GymStepResult = Tuple[np.ndarray, float, bool, Dict]
+GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict]
 
 
 class UnityToGymWrapper(gym.Env):
@@ -107,13 +113,13 @@ def __init__(
             self.action_size = self.group_spec.action_spec.discrete_size
             branches = self.group_spec.action_spec.discrete_branches
             if self.group_spec.action_spec.discrete_size == 1:
-                self._action_space = spaces.Discrete(branches[0])
+                self.action_space = spaces.Discrete(branches[0])
             else:
                 if flatten_branched:
                     self._flattener = ActionFlattener(branches)
-                    self._action_space = self._flattener.action_space
+                    self.action_space = self._flattener.action_space
                 else:
-                    self._action_space = spaces.MultiDiscrete(branches)
+                    self.action_space = spaces.MultiDiscrete(branches)
 
         elif self.group_spec.action_spec.is_continuous():
             if flatten_branched:
@@ -124,15 +130,15 @@ def __init__(
 
             self.action_size = self.group_spec.action_spec.continuous_size
             high = np.array([1] * self.group_spec.action_spec.continuous_size)
-            self._action_space = spaces.Box(-high, high, dtype=np.float32)
+            self.action_space = spaces.Box(-high, high, dtype=np.float32)
         else:
             raise UnityGymException(
                 "The gym wrapper does not provide explicit support for both discrete "
                 "and continuous actions."
             )
 
         if action_space_seed is not None:
-            self._action_space.seed(action_space_seed)
+            self.action_space.seed(action_space_seed)
 
         # Set observations space
         list_spaces: List[gym.Space] = []
@@ -147,11 +153,11 @@ def __init__(
             high = np.array([np.inf] * self._get_vec_obs_size())
             list_spaces.append(spaces.Box(-high, high, dtype=np.float32))
         if self._allow_multiple_obs:
-            self._observation_space = spaces.Tuple(list_spaces)
+            self.observation_space = spaces.Tuple(list_spaces)
         else:
-            self._observation_space = list_spaces[0]  # only return the first one
+            self.observation_space = list_spaces[0]  # only return the first one
 
-    def reset(self) -> Union[List[np.ndarray], np.ndarray]:
+    def reset(self) -> Tuple[Union[List[np.ndarray], np.ndarray], Dict]:
         """Resets the state of the environment and returns an initial observation.
         Returns: observation (object/list): the initial observation of the
         space.
@@ -163,7 +169,7 @@ def reset(self) -> Union[List[np.ndarray], np.ndarray]:
         self.game_over = False
 
         res: GymStepResult = self._single_step(decision_step)
-        return res[0]
+        return res[0], {}
 
     def step(self, action: List[Any]) -> GymStepResult:
         """Run one timestep of the environment's dynamics. When end of
@@ -229,7 +235,7 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu
 
         done = isinstance(info, TerminalSteps)
 
-        return (default_observation, info.reward[0], done, {"step": info})
+        return (default_observation, info.reward[0], done, False, {"step": info})
 
     def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray:
         if self.uint8_visual:
@@ -303,23 +309,9 @@ def _check_agents(n_agents: int) -> None:
             raise UnityGymException(
                 f"There can only be one Agent in the environment but {n_agents} were detected."
             )
-
-    @property
-    def metadata(self):
-        return {"render.modes": ["rgb_array"]}
-
-    @property
-    def reward_range(self) -> Tuple[float, float]:
-        return -float("inf"), float("inf")
-
-    @property
-    def action_space(self) -> gym.Space:
-        return self._action_space
-
-    @property
-    def observation_space(self):
-        return self._observation_space
-
+
+    metadata = {"render.modes": ["rgb_array"]}
+    reward_range = (-float("inf"), float("inf"))
 
 class ActionFlattener:
     """