diff --git a/agents/gpt.py b/agents/gpt.py index 49733ff..48abec2 100644 --- a/agents/gpt.py +++ b/agents/gpt.py @@ -1,10 +1,11 @@ from dataclasses import dataclass, field +from typing import Any, Dict, List from api.classes import Agent, AvailableActions, Action, Observation, Rules -import random import openai import api.util as util import ast import json +from PIL import Image action_format_instructions_no_openended = """\ @@ -30,29 +31,38 @@ @dataclass class OpenAITextAgent(Agent): - openai_model: str - agent_type_id: str - system_message: str = "You are an agent playing a game. Select the action that maximizes your probability of winning." - max_retries: int = 3 - transparent_reasoning: bool = False + openai_model : str + agent_type_id : str + system_message : str = "You are an agent playing a game. Select the action that maximizes your probability of winning." + max_retries : int = 3 + transparent_reasoning : bool = False mode: int = 0 # 0 = normal, 1 = chain of thought, 2 = babble and prune - + def print(self, *args, **kwargs): if self.transparent_reasoning: print(self.agent_type_id, *args, **kwargs) + + def get_user_message_content(self, text_prompt: str, image: Image) -> List[Dict[str, Any]]: + return [ + { + "type": "text", + "text": text_prompt + } + ] - def take_action( - self, - rules: Rules, - observation: Observation, - available_actions: AvailableActions, - show_state: bool, - ): + def get_request_params(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: + return { + "model": self.openai_model, + "messages": messages, + "response_format": { "type": "json_object" } + } + + def take_action(self, rules: Rules, observation: Observation, available_actions: AvailableActions, show_state : bool) -> Action: valid_actions = [] prompt = f"You are playing a game called {rules.title}. The rules are as follows:\n{rules.summary}\n" if rules.additional_details != None: prompt += "The following are headings with additional information about the rules that you can expand by taking the action Explain().\n" - details_dict = {f"H{i+1}": topic for i, topic in enumerate(rules.additional_details)} + details_dict = {f"H{i+1}": topic + " - " + description for i, (topic, description) in enumerate(rules.additional_details.items())} prompt += json.dumps(details_dict, indent=4) valid_actions.extend(f"Explain({h})" for h in list(details_dict.keys())) @@ -83,7 +93,10 @@ def take_action( ): prompt += "Return the action Explain() to receive additional info about what any of the above actions do.\n" - messages = [{"role": "system", "content": self.system_message}] + messages = [ + {"role": "system", "content": self.system_message}, + {"role": "user", "content": self.get_user_message_content(prompt, observation.image)}, + ] # Chain of Thought if self.mode == 1: @@ -117,7 +130,7 @@ def take_action( ) messages.append({"role": "assistant", "content": response}) prompt = "" - + self.print( f"GPT listed the following actions as possibilities: {response}" ) @@ -126,24 +139,20 @@ def take_action( prompt += str(list(available_actions.predefined)) prompt += "\nOr if you choose an openended action, you must return json with an 'action' key which contains one of the following valid actions and an 'openeded_response' key which contains your reponse to the prompt:\n" prompt += str(list(available_actions.openended)) + #prompt += "\nMake sure to return ONLY a JSON. It should contain nothing outside the curly braces of the JSON." messages.append({"role": "user", "content": prompt}) - + + + #print(prompt) result = None + for _ in range(self.max_retries): - response = ( - openai_client.chat.completions.create( - model=self.openai_model, - response_format={"type": "json_object"}, - messages=messages, - ) - .choices[0] - .message.content - ) + response = openai_client.chat.completions.create(**self.get_request_params(messages)).choices[0].message.content messages.append({"role": "assistant", "content": response}) self.print("GPT responded with", response) try: - action = ast.literal_eval(response) + action = ast.literal_eval(util.extract_json(response)) except: self.print("GPT returned invalid JSON") continue @@ -154,27 +163,23 @@ def take_action( messages.append({"role": "user", "content": error_message}) continue + if action["action"] in valid_actions: self.print("GPT chose valid action", action) result = action break - + self.print("GPT returned invalid action", action) error_message = f"{action['action']} is not one of the valid actions. " error_message += "As a reminder, the valid actions are as follows:\n" error_message += f"{str(list(valid_actions))}\n" error_message += "Please return a json with the key 'action' with the action you choose and (optionally) the key 'openended_response' if you select openended response action." messages.append({"role": "user", "content": error_message}) + if result == None: - self.print( - f"WARNING: GPT returned an a random action after {self.max_retries} tries" - ) + self.print(f"WARNING: GPT returned an a random action after {self.max_retries} tries") return Action(action_id=None) - return Action( - action_id=result["action"], - openended_response=result.get("openended_response"), - ) - + return Action(action_id=result["action"], openended_response=result.get("openended_response")) @dataclass class ChatGPTText(OpenAITextAgent): @@ -199,3 +204,30 @@ class BabbleAndPrune(OpenAITextAgent): openai_model: str = "gpt-4-1106-preview" agent_type_id: str = "b&p" mode: int = 2 + +@dataclass +class GPT4Vision(OpenAITextAgent): + openai_model : str = "gpt-4-vision-preview" + agent_type_id : str = "gpt-4-vision" + is_vision_agent : bool = True + + def get_user_message_content(self, text_prompt: str, image: Image) -> List[Dict[str, str]]: + content = super().get_user_message_content(text_prompt, image) + if image is not None: + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{util.base64_encode_image(image)}", + "detail": "low" + } + }) + return content + + def get_request_params(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: + return { + "model": self.openai_model, + "messages": messages, + # As vision models have a low(but undocumented?) default value for below parameter + # https://community.openai.com/t/documented-max-token-default-is-incorrect-for-gpt-4-vision-preview/507329 + "max_tokens": 600, + } \ No newline at end of file diff --git a/agents/random_agent.py b/agents/random_agent.py index a958b45..e6174fe 100644 --- a/agents/random_agent.py +++ b/agents/random_agent.py @@ -6,6 +6,6 @@ class RandomAgent(Agent): agent_type_id : str = "random" - def take_action(self, rules : Rules, observation: Observation, available_actions: AvailableActions, show_state : bool): + def take_action(self, rules : Rules, observation: Observation, available_actions: AvailableActions, show_state : bool) -> Action: actions = list(available_actions.predefined.keys()) return Action(action_id=random.choice(actions)) \ No newline at end of file diff --git a/api/classes.py b/api/classes.py index 430de24..0c9569e 100644 --- a/api/classes.py +++ b/api/classes.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Optional, Tuple +from typing import List, Dict, Optional, Tuple, Type from dataclasses import dataclass, field from abc import abstractmethod from PIL import Image @@ -6,8 +6,24 @@ @dataclass class Observation: - text : str + text : str = "" image : Image = None + + def __eq__(self, other): + if not isinstance(other, Observation): + return False + + # Check text equality + if self.text != other.text: + return False + + # Check image equality + if self.image is None and other.image is None: + return True + elif self.image is None or other.image is None: + return False + else: + return (self.image.tobytes() == other.image.tobytes()) @dataclass class AvailableActions: @@ -30,7 +46,7 @@ class Agent: agent_type_id : str @abstractmethod - def take_action(self, rules : dict, observation: Observation, available_actions : AvailableActions): + def take_action(self, rules : dict, observation: Observation, available_actions : AvailableActions, show_state : bool) -> Action: pass @dataclass @@ -51,7 +67,7 @@ class Game: agent_2_kwargs : dict = field(default_factory=dict) # kwargs to pass to the agent 2 class when initializing. @abstractmethod - def init_game(self, agent_1: Agent, agent_2: Agent): + def init_game(self, agent_1: Type[Agent], agent_2: Type[Agent]): pass @abstractmethod diff --git a/api/play_game.py b/api/play_game.py index 92e72d1..a61eea2 100644 --- a/api/play_game.py +++ b/api/play_game.py @@ -81,8 +81,10 @@ def play_game(agent_1_path, agent_2_path, game_path, num_matches = 1, save_resul util.save_json(matches, "matches.json") print("Saved match information") - agent_1_rating = agent_1_rating + K * (player_1_score - agent_1_expected_score) + agent_1_rating = agent_1_rating + K * (player_1_score - agent_1_expected_score) agent_2_rating = agent_2_rating + K * (player_2_score - agent_2_expected_score) + # Without below line, we get a KeyError: '' + all_ratings.setdefault(game_class.id, {}) all_ratings[game_class.id][agent_1_id] = agent_1_rating all_ratings[game_class.id][agent_2_id] = agent_2_rating print("Updated elos:") diff --git a/api/util.py b/api/util.py index 1ed3f40..df6d7b5 100644 --- a/api/util.py +++ b/api/util.py @@ -1,6 +1,10 @@ import importlib import os import json +from PIL import Image +from io import BytesIO +import base64 +import re def save_json(data, file_path): if not os.path.exists(file_path): @@ -8,7 +12,6 @@ def save_json(data, file_path): with open(file_path, "w") as f: json.dump(data, f, indent=4) - def load_json(file_path): if not os.path.exists(file_path): raise ValueError(f"File {file_path} does not exist") @@ -18,4 +21,20 @@ def load_json(file_path): def import_class(class_path): module_path, class_name = class_path.rsplit(".", 1) module = importlib.import_module(module_path) - return getattr(module, class_name) \ No newline at end of file + return getattr(module, class_name) + +def base64_encode_image(image: Image) -> str: + img_buffer = BytesIO() + image.save(img_buffer, format="PNG") + img_str = base64.b64encode(img_buffer.getvalue()).decode('utf-8') + return img_str + +def extract_json(input: str) -> dict: + json_match = re.search(r'{.*}', input, re.DOTALL) + if json_match == None: + raise ValueError(f"Could not find JSON in input: {input}") + json_content = json_match.group(0) + return json_content + # Parse the JSON content into a Python dictionary + response_data = json.loads(json_content) + return response_data \ No newline at end of file diff --git a/games/atari/README.md b/games/atari/README.md new file mode 100644 index 0000000..156646a --- /dev/null +++ b/games/atari/README.md @@ -0,0 +1,64 @@ +# Multiplayer Atari Games via PettingZoo +This folder contains ported Atari Games made available in [PettingZoo](https://pettingzoo.farama.org/), a multi agent games environment with an API similar to [OpenAI gym](https://gymnasium.farama.org/). + + +## Installation + +Running these Atari games make use of [AtariARI](github.com/mila-iqia/atari-representation-learning.git +) and [PettingZoo](https://pettingzoo.farama.org/environments/atari/boxing/) libraries + +### 0. Install pip3 + +If not already installed(i.e. if pip3 command not found), install pip3: +> sudo apt-get install python3-pip + +Then, upgrade pip + +>python3 -m pip install --upgrade pip + +### 1. Install AtariARI + +Successfully run below 2 commands + +>pip3 install 'gym[atari]' + +>pip3 install git+https://github.com/mila-iqia/atari-representation-learning.git + +### 2. Install PettingZoo + +Run +>pip3 install 'pettingzoo[atari] + +### 3. Install misc libraries + +> pip3 install matplotlib + +> pip3 install autorom + +> AutoROM + +## PettingZoo implementation of realtime games + +Atari games were suppsosed to appear realtime for humans, but under the hood they are programmed as turn based games with tens of turns per second. + +To a human, a game running at full speed still appears realtime. + +PettingZoo models these games as [Agent Environment Cycle](https://pettingzoo.farama.org/api/aec/) environments. + +![Alt text](image.png) + +At each step, a player(depending on turn) is queried for their next move. + +## GameBench implementation of PettingZoo games + +Agents are run in background threads. The agent loop is: + +1. Get current game state +2. Query agent on what action should be done +3. Store this action in a variable Act + +At every turn, we query the stored action Act for that player and execute it. + +## Current list of games + +1. [Boxing](https://pettingzoo.farama.org/environments/atari/boxing/) \ No newline at end of file diff --git a/games/atari/boxing.py b/games/atari/boxing.py new file mode 100644 index 0000000..055f8bd --- /dev/null +++ b/games/atari/boxing.py @@ -0,0 +1,274 @@ + +#%% +import threading +from time import sleep +from atariari.benchmark.wrapper import ram2label +from pettingzoo.atari import boxing_v2 +from dataclasses import dataclass +from typing import List, Dict, Tuple +from api.classes import Observation, Action, Agent, AvailableActions, Game, Rules +from PIL import Image +import logging +import random +import asyncio +import matplotlib.pyplot as plt +import threading + +# %% + +boxing_rules = Rules( + title= "Atari 2600: Boxing", + summary=""" + Boxing is an adversarial game where precise control and appropriate responses to your opponent are key. + The players have two minutes (around 1200 steps) to duke it out in the ring. Each step, they can move and punch. + """, + additional_details={"Scoring": """ + Scoring hinges on landing punches on the opponent's head, with precision in aligning your punches crucial for effectiveness. Long-range jabs score one point, while power punches at closer range score two. Defensive blocking is achieved by positioning your gloves against the opponent's punches. + + The key to scoring and defense is maintaining the correct distance and ensuring your fist aligns with the opponent's head. Misalignment leads to hitting the opponent's gloves, which doesn't score points. A knockout results in 100 points and ends the game; otherwise, the highest scorer at the round's end is declared the winner, with ties possible. + + Strategic gameplay involves cornering the opponent against the ropes to restrict their movement and increase scoring opportunities. By forcing the opponent into a corner, they have fewer escape routes, allowing for a barrage of scoring punches. Players must also be wary of being cornered themselves, as this limits defensive and offensive maneuvers. + + In terms of scoring, each successful punch not only scores points but also pushes the opponent back slightly, enhancing control over the ring. Dominating by driving the opponent to the ropes can lead to a series of scoring punches. However, players must avoid being trapped against the ropes to maintain the upper hand. + """, +}) + +actions_explanation_text = """ +Your choice of actions are: + - 0 - No operation + - 1 - Fire + - 2 - Move up + - 3 - Move right + - 4 - Move left + - 5 - Move down + - 6 - Move upright + - 7 - Move upleft + - 8 - Move downright + - 9 - Move downleft + - 10 - Fire up + - 11 - Fire right + - 12 - Fire left + - 13 - Fire down + - 14 - Fire upright + - 15 - Fire upleft + - 16 - Fire downright + - 17 - Fire downleft + + Each action is preceded by its index, and each index-action pair is separated by a newline. + You can choose: + 1. Which direction to move (8 directions, plus stay put) + 2. Whether to punch or not + - The no-punch actions are prefixed with Move, and the punch actions are prefixed with "Fire". + - "No Operation" means stay where you are and don't punch. +""" + +observation_explanation_text = """Game state: {state}. The explanations of the keys are: + - player_x, player_y: The coordinates of the your player + - enemy_x, enemy_y: The coordinates of the enemy + - enemy_score, player_score: The current scores of the enemy and player + - clock: The number of seconds left in the game. + (0,0) is the top left corner. (30,4) is the topleftmost you can be. (109,87) is the bottom rightmost you can be. + An image of the game board is also provided. Your player's colour is {player_colour} +""" +# %% +actions = { + "no operation": "0", + "fire": "1", + "move up": "2", + "move right": "3", + "move left": "4", + "move down": "5", + "move upright": "6", + "move upleft": "7", + "move downright": "8", + "move downleft": "9", + "fire up": "10", + "fire right": "11", + "fire left": "12", + "fire down": "13", + "fire upright": "14", + "fire upleft": "15", + "fire downright": "16", + "fire downleft": "17" +} + + +# Takes an action string and returns an int corresponding to the action if valid. +# If invalid, returns 0(no-op) +def parse_action(act: str) -> int: + try: + # Try to convert the action to an integer + act_int = int(act) + # If the conversion is successful and the integer is within the desired range, return it + if 0 <= act_int <= 17: + return act_int + # If the integer is not a valid action, return no-op + else: + return 0 + except ValueError: + # If the action can't be converted to an integer, treat it as an action description + return int(actions.get(act, "0")) + +# %% +def switch_player_enemy_keys(dictionary: Dict[str, int])-> Dict[str, int]: + new_dict = {} + for key, value in dictionary.items(): + if 'player' in key: + new_key = key.replace('player', 'enemy') + elif 'enemy' in key: + new_key = key.replace('enemy', 'player') + else: + new_key = key + new_dict[new_key] = value + return new_dict + +# Test the function +# dictionary = {'player_x': 31, 'player_y': 5, 'enemy_x': 109, 'enemy_y': 87, 'enemy_score': 0, 'clock': 89, 'player_score': 0} +# print(switch_player_enemy_keys(dictionary)) +# %% +@dataclass +class AtariBoxing(Game): + rules : Rules = boxing_rules + id :str = "atari_boxing" + + #env = boxing_v2.env(render_mode='ansi',obs_type='ram') + env = boxing_v2.env(render_mode='rgb_array',obs_type='ram', max_cycles=1000) + # The higher this number, the more reaction speed matters. And the more disadvantage high-latency agents are at. + moves_per_second_per_agent = 120.0 + # The probability of the graphical state being shown to the agent + show_state = False + logger = logging.getLogger("atari.boxing") + logger.addHandler(logging.StreamHandler()) + logger.setLevel(logging.WARN) + if(show_state): + logger.setLevel(logging.INFO) + + def init_game(self, agent_1: Agent, agent_2: Agent): + # Each agent is a tuple (pettingzoo agent ID, GameBench agent, stored action) + # A player keeps executing its stored action until it receives a new one + self.agents = [["first_0",agent_1(team_id=0, agent_id=0, **self.agent_1_kwargs),0], ["second_0",agent_2(team_id=1, agent_id=0, **self.agent_2_kwargs),0]] + self.game_is_over = False + self.env.reset() + + def get_observation(self, agent : Agent) -> Tuple[Observation, AvailableActions]: + atari_observation, _, _, _, _ = self.env.last() + # Interpret the RAM state + state = ram2label('boxing',atari_observation) + obs = Observation() + player_colour = 'grey' + if agent == self.agents[0][1]: + player_colour = 'white' + elif agent == self.agents[1][1]: + player_colour = 'black' + state = switch_player_enemy_keys(state) + + obs.text = observation_explanation_text.format(state=str(state), player_colour=player_colour) + obs.image = Image.fromarray(self.env.render()) + + acts = AvailableActions( + instructions = actions_explanation_text, + predefined = actions, + openended={} + ) + #self.logger.info(f"Agent {agent} observation={obs.text}") + return (obs, acts) + + def update(self, action : Action, available_actions : AvailableActions, agent : Agent): + self.agents[agent.team_id][2] = parse_action(action.action_id) + # print(f'stored action {self.agents[agent.team_id][2]} for agent {agent.team_id}') + + + def agent_loop(self, agent: int): + self.logger.info(f"Agent {agent} task entered") + last_observation = None + while not self.game_is_over: + observation, available_actions = self.get_observation(self.agents[agent][1]) + if (last_observation == observation): + # If nothing changed since last observations, no need to act just now + self.logger.debug(f"Agent {agent} going to sleep") + #self.logger.info(f"observation={observation.text}") + last_observation = observation + sleep(1) + self.logger.debug(f"Agent {agent} has awoken") + continue + last_observation = observation + #self.logger.info(f"Agent {agent} got observation {observation.text}") + # Query the agent for what action should be taken + action = self.agents[agent][1].take_action(rules=self.rules, observation=observation, available_actions=available_actions, show_state = self.show_state) + self.logger.info(f"Agent {agent} selected action {action.action_id}") + # Update the stored action + self.update(action, available_actions, self.agents[agent][1]) + + self.logger.info(f"Agent {agent} task exiting; Game is over") + + def play(self) -> Tuple[float, float]: + self.env.render() + # # Create an event loop + # loop = asyncio.new_event_loop() + + # # Set the event loop for the new thread + # asyncio.set_event_loop(loop) + + # # Fire and forget the agent threads + # for agent in range(2): + # loop.create_task(self.agent_loop(agent)) + + # # Start the event loop in a new thread + # threading.Thread(target=loop.run_forever).start() + + agentthreads = [] + + for agent in range(2): + agentthreads.append(threading.Thread(target=self.agent_loop, args=(agent,))) + agentthreads[-1].start() + + self.logger.info("Agent processes fired and forgotten") + + + # Create the matplotlib image + fig, ax = plt.subplots() + img = ax.imshow(self.env.render()) + plt.ion() + plt.show() + + for agent in self.env.agent_iter(): + observation, _, termination, truncation, _ = self.env.last() + + # Display graphical board state + img.set_data(self.env.render()) + #plt.figure() + fig.canvas.draw() + fig.canvas.flush_events() + + if termination or truncation: + self.game_is_over = True + self.env.close() + # Parse the state to get the scores + state = ram2label('boxing',observation) + return float(state['player_score']), float(state['enemy_score']) + + # Fetch the stored action for this agent + if agent == "first_0": + action = self.agents[0][2] + elif agent == "second_0": + action = self.agents[1][2] + + # Execute the stored action + self.env.step(action) + self.logger.debug(f'Took action {action} for agent {agent}') + #self.env.step(parse_action("move downright")) + + # Show the board + # if self.show_state: + # state = ram2label('boxing',observation) + # print(state) + + # Sleep for a bit to make the game playable + sleep(1.0/self.moves_per_second_per_agent/2) + + plt.ioff() + self.env.close() + + for proc in agentthreads: + proc.join() +# %% diff --git a/games/atari/image.png b/games/atari/image.png new file mode 100644 index 0000000..f16fd19 Binary files /dev/null and b/games/atari/image.png differ diff --git a/games/atari/piprequirements.txt b/games/atari/piprequirements.txt new file mode 100644 index 0000000..3130a17 --- /dev/null +++ b/games/atari/piprequirements.txt @@ -0,0 +1,115 @@ +-e git+https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail.git@41332b78dfb50321c29bade65f9d244387f68a60#egg=a2c_ppo_acktr +ale-py==0.8.1 +annotated-types==0.6.0 +anyio==4.2.0 +archspec @ file:///croot/archspec_1697725767277/work +asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work +atariari @ git+https://github.com/mila-iqia/atari-representation-learning.git@a06f52ca3bfafbd8a805254e10ea8f159251a0b4 +AutoROM==0.6.1 +beautifulsoup4 @ file:///croot/beautifulsoup4-split_1681493039619/work +boltons @ file:///croot/boltons_1677628692245/work +Brotli @ file:///tmp/abs_ecyw11_7ze/croots/recipe/brotli-split_1659616059936/work +certifi==2023.11.17 +cffi @ file:///croot/cffi_1700254295673/work +charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work +click==8.1.7 +cloudpickle==3.0.0 +comm @ file:///croot/comm_1671231121260/work +conda @ file:///croot/conda_1708369113911/work +conda-libmamba-solver @ file:///croot/conda-libmamba-solver_1706733287605/work/src +conda-package-handling @ file:///croot/conda-package-handling_1690999929514/work +conda_package_streaming @ file:///croot/conda-package-streaming_1690987966409/work +contourpy==1.2.0 +cycler==0.12.1 +debugpy @ file:///croot/debugpy_1690905042057/work +decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work +distro @ file:///croot/distro_1701455004953/work +exceptiongroup @ file:///croot/exceptiongroup_1706031385326/work +executing @ file:///opt/conda/conda-bld/executing_1646925071911/work +Farama-Notifications==0.0.4 +filelock @ file:///croot/filelock_1700591183607/work +fire==0.5.0 +fonttools==4.47.2 +fsspec @ file:///croot/fsspec_1701286474621/work +-e git+https://github.com/Joshuaclymer/GameBench.git@cd142338eebded2ecbd94e5ebd2a1ce0585bccbd#egg=gamebench +gmpy2 @ file:///tmp/build/80754af9/gmpy2_1645455533097/work +gym==0.26.2 +gym-notices==0.0.8 +gymnasium==0.29.1 +h11==0.14.0 +httpcore==1.0.2 +httpx==0.26.0 +idna @ file:///croot/idna_1666125576474/work +importlib-resources==6.1.1 +ipykernel @ file:///croot/ipykernel_1705933831282/work +ipython @ file:///croot/ipython_1704833016303/work +jedi @ file:///tmp/build/80754af9/jedi_1644315229345/work +Jinja2 @ file:///croot/jinja2_1706733616596/work +joblib @ file:///croot/joblib_1685113087166/work +jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work +jsonpointer==2.1 +jupyter_client @ file:///croot/jupyter_client_1699455897726/work +jupyter_core @ file:///croot/jupyter_core_1698937308754/work +kiwisolver==1.4.5 +libmambapy @ file:///croot/mamba-split_1704219408234/work/libmambapy +MarkupSafe @ file:///croot/markupsafe_1704205993651/work +matplotlib==3.8.2 +matplotlib-inline @ file:///opt/conda/conda-bld/matplotlib-inline_1662014470464/work +menuinst @ file:///croot/menuinst_1706732933928/work +mkl-fft @ file:///croot/mkl_fft_1695058164594/work +mkl-random @ file:///croot/mkl_random_1695059800811/work +mkl-service==2.4.0 +mpmath @ file:///croot/mpmath_1690848262763/work +multi-agent-ale-py==0.1.11 +nest-asyncio @ file:///croot/nest-asyncio_1708532673751/work +networkx @ file:///croot/networkx_1690561992265/work +numpy @ file:///croot/numpy_and_numpy_base_1708638617955/work/dist/numpy-1.26.4-cp310-cp310-linux_x86_64.whl#sha256=d8cd837ed43e87f77e6efaa08e8de927ca030a1c9c5d04624432d6fb9a74a5ee +openai==1.7.2 +opencv-python==4.9.0.80 +packaging @ file:///croot/packaging_1693575174725/work +pandas==2.1.4 +parso @ file:///opt/conda/conda-bld/parso_1641458642106/work +pettingzoo==1.24.2 +pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work +pillow==10.2.0 +platformdirs @ file:///croot/platformdirs_1692205439124/work +pluggy @ file:///tmp/build/80754af9/pluggy_1648024709248/work +prompt-toolkit @ file:///croot/prompt-toolkit_1704404351921/work +psutil @ file:///opt/conda/conda-bld/psutil_1656431268089/work +ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl +pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work +pybullet==3.2.6 +pycosat @ file:///croot/pycosat_1696536503704/work +pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +pydantic==2.5.3 +pydantic_core==2.14.6 +pygame==2.3.0 +Pygments @ file:///croot/pygments_1684279966437/work +pyparsing==3.1.1 +PySocks @ file:///home/builder/ci_310/pysocks_1640793678128/work +python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work +pytz==2023.3.post1 +pyzmq @ file:///croot/pyzmq_1705605076900/work +requests @ file:///croot/requests_1707355572290/work +ruamel.yaml @ file:///croot/ruamel.yaml_1666304550667/work +ruamel.yaml.clib @ file:///croot/ruamel.yaml.clib_1666302247304/work +scikit-learn @ file:///croot/scikit-learn_1694788527225/work +scipy @ file:///croot/scipy_1701295040508/work/dist/scipy-1.11.4-cp310-cp310-linux_x86_64.whl#sha256=a5e5a33a143f627fd4bde622162664c9ea97ff3af56f51c0f4c76ec9d1a340a3 +six @ file:///tmp/build/80754af9/six_1644875935023/work +sniffio==1.3.0 +soupsieve @ file:///croot/soupsieve_1696347547217/work +stable-baselines3==2.2.1 +stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work +sympy @ file:///croot/sympy_1701397643339/work +termcolor==2.4.0 +threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work +torch @ file:///croot/pytorch-select_1707782759820/work +tornado @ file:///croot/tornado_1696936946304/work +tqdm @ file:///croot/tqdm_1679561862951/work +traitlets @ file:///croot/traitlets_1671143879854/work +truststore @ file:///croot/truststore_1695244293384/work +typing_extensions @ file:///croot/typing_extensions_1705599297034/work +tzdata==2023.4 +urllib3 @ file:///croot/urllib3_1707770551213/work +wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work +zstandard @ file:///croot/zstandard_1677013143055/work diff --git a/games/atari/requirements.txt b/games/atari/requirements.txt new file mode 100644 index 0000000..feade4c --- /dev/null +++ b/games/atari/requirements.txt @@ -0,0 +1,136 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +_libgcc_mutex=0.1=main +_openmp_mutex=5.1=1_gnu +a2c-ppo-acktr=0.0.1=dev_0 +ale-py=0.8.1=pypi_0 +annotated-types=0.6.0=pypi_0 +anyio=4.2.0=pypi_0 +asttokens=2.0.5=pyhd3eb1b0_0 +atariari=0.0.1=pypi_0 +autorom=0.6.1=pypi_0 +beautifulsoup4=4.12.2=py310h06a4308_0 +blas=1.0=mkl +bzip2=1.0.8=h7b6447c_0 +ca-certificates=2023.12.12=h06a4308_0 +certifi=2023.11.17=pypi_0 +charset-normalizer=3.3.2=pypi_0 +click=8.1.7=pypi_0 +cloudpickle=3.0.0=pypi_0 +comm=0.1.2=py310h06a4308_0 +contourpy=1.2.0=pypi_0 +cycler=0.12.1=pypi_0 +debugpy=1.6.7=py310h6a678d5_0 +decorator=5.1.1=pyhd3eb1b0_0 +distro=1.9.0=pypi_0 +exceptiongroup=1.2.0=pypi_0 +executing=0.8.3=pyhd3eb1b0_0 +farama-notifications=0.0.4=pypi_0 +filelock=3.13.1=py310h06a4308_0 +fire=0.5.0=pypi_0 +fonttools=4.47.2=pypi_0 +fsspec=2023.12.2=pypi_0 +gamebench=0.0.1=dev_0 +gmp=6.2.1=h295c915_3 +gmpy2=2.1.2=py310heeb90bb_0 +gym=0.26.2=pypi_0 +gym-notices=0.0.8=pypi_0 +gymnasium=0.29.1=pypi_0 +h11=0.14.0=pypi_0 +httpcore=1.0.2=pypi_0 +httpx=0.26.0=pypi_0 +idna=3.6=pypi_0 +importlib-resources=6.1.1=pypi_0 +intel-openmp=2023.1.0=hdb19cb5_46306 +ipykernel=6.25.0=py310h2f386ee_0 +ipython=8.20.0=py310h06a4308_0 +jedi=0.18.1=py310h06a4308_1 +jinja2=3.1.2=py310h06a4308_0 +joblib=1.2.0=py310h06a4308_0 +jupyter_client=8.6.0=py310h06a4308_0 +jupyter_core=5.5.0=py310h06a4308_0 +kiwisolver=1.4.5=pypi_0 +ld_impl_linux-64=2.38=h1181459_1 +libffi=3.4.4=h6a678d5_0 +libgcc-ng=11.2.0=h1234567_1 +libgfortran-ng=11.2.0=h00389a5_1 +libgfortran5=11.2.0=h1234567_1 +libgomp=11.2.0=h1234567_1 +libsodium=1.0.18=h7b6447c_0 +libstdcxx-ng=11.2.0=h1234567_1 +libuuid=1.41.5=h5eee18b_0 +llvm-openmp=14.0.6=h9e868ea_0 +markupsafe=2.1.3=py310h5eee18b_0 +matplotlib=3.8.2=pypi_0 +matplotlib-inline=0.1.6=py310h06a4308_0 +mkl=2023.1.0=h213fc3f_46344 +mkl-service=2.4.0=py310h5eee18b_1 +mkl_fft=1.3.8=py310h5eee18b_0 +mkl_random=1.2.4=py310hdb19cb5_0 +mpc=1.1.0=h10f8cd9_1 +mpfr=4.0.2=hb69a4c5_1 +mpmath=1.3.0=py310h06a4308_0 +multi-agent-ale-py=0.1.11=pypi_0 +ncurses=6.4=h6a678d5_0 +nest-asyncio=1.5.6=py310h06a4308_0 +networkx=3.1=py310h06a4308_0 +numpy=1.26.3=py310h5f9d8c6_0 +numpy-base=1.26.3=py310hb5e798b_0 +openai=1.7.2=pypi_0 +opencv-python=4.9.0.80=pypi_0 +openssl=3.0.12=h7f8727e_0 +packaging=23.1=py310h06a4308_0 +pandas=2.1.4=pypi_0 +parso=0.8.3=pyhd3eb1b0_0 +pettingzoo=1.24.2=pypi_0 +pexpect=4.8.0=pyhd3eb1b0_3 +pillow=10.2.0=pypi_0 +pip=23.3.1=py310h06a4308_0 +platformdirs=3.10.0=py310h06a4308_0 +prompt-toolkit=3.0.43=py310h06a4308_0 +prompt_toolkit=3.0.43=hd3eb1b0_0 +psutil=5.9.0=py310h5eee18b_0 +ptyprocess=0.7.0=pyhd3eb1b0_2 +pure_eval=0.2.2=pyhd3eb1b0_0 +pybullet=3.2.6=pypi_0 +pydantic=2.5.3=pypi_0 +pydantic-core=2.14.6=pypi_0 +pygame=2.3.0=pypi_0 +pygments=2.15.1=py310h06a4308_1 +pyparsing=3.1.1=pypi_0 +python=3.10.13=h955ad1f_0 +python-dateutil=2.8.2=pyhd3eb1b0_0 +pytorch=2.1.2=py3.10_cpu_0 +pytorch-mutex=1.0=cpu +pytz=2023.3.post1=pypi_0 +pyyaml=6.0.1=py310h5eee18b_0 +pyzmq=25.1.0=py310h6a678d5_0 +readline=8.2=h5eee18b_0 +requests=2.31.0=pypi_0 +scikit-learn=1.3.0=py310h1128e8f_1 +scipy=1.11.4=py310h5f9d8c6_0 +setuptools=68.2.2=py310h06a4308_0 +six=1.16.0=pyhd3eb1b0_1 +sniffio=1.3.0=pypi_0 +soupsieve=2.5=py310h06a4308_0 +sqlite=3.41.2=h5eee18b_0 +stable-baselines3=2.2.1=pypi_0 +stack_data=0.2.0=pyhd3eb1b0_0 +sympy=1.12=py310h06a4308_0 +tbb=2021.8.0=hdb19cb5_0 +termcolor=2.4.0=pypi_0 +threadpoolctl=2.2.0=pyh0d69192_0 +tk=8.6.12=h1ccaba5_0 +tornado=6.3.3=py310h5eee18b_0 +tqdm=4.66.1=pypi_0 +traitlets=5.7.1=py310h06a4308_0 +typing_extensions=4.9.0=py310h06a4308_0 +tzdata=2023.4=pypi_0 +urllib3=2.1.0=pypi_0 +wcwidth=0.2.5=pyhd3eb1b0_0 +wheel=0.41.2=py310h06a4308_0 +xz=5.4.5=h5eee18b_0 +yaml=0.2.5=h7b6447c_0 +zeromq=4.3.4=h2531618_0 +zlib=1.2.13=h5eee18b_0 diff --git a/scripts/test_atari_boxing.sh b/scripts/test_atari_boxing.sh new file mode 100755 index 0000000..725be76 --- /dev/null +++ b/scripts/test_atari_boxing.sh @@ -0,0 +1,7 @@ +python api/play_game.py \ + --agent_1_path agents.random_agent.RandomAgent \ + --agent_2_path agents.gpt.GPT4Vision \ + --game_path games.atari.boxing.AtariBoxing \ + --show_state \ + --num_matches 1 \ + --agent_2_kwargs '{"transparent_reasoning": True}' \ No newline at end of file