From 595c935410e7684d69e1964c414e1acc6e158f19 Mon Sep 17 00:00:00 2001 From: hannw Date: Wed, 1 Oct 2025 23:27:00 +0000 Subject: [PATCH] Add experiment scripts --- .../envs/werewolf/scripts/__init__.py | 0 .../envs/werewolf/scripts/add_audio.py | 425 ++++++++++++++++++ .../scripts/configs/audio/standard.yaml | 24 + .../scripts/configs/run/block_basic.yaml | 102 +++++ .../scripts/configs/run/comprehensive.yaml | 100 +++++ ...ve_DisableDoctorConsecutiveSave_large.yaml | 104 +++++ .../run/roundrobin_discussion_large.yaml | 103 +++++ .../run/roundrobin_discussion_small.yaml | 103 +++++ .../configs/run/rule_experiment/standard.yaml | 103 +++++ ...SelfSave_DisableDoctorConsecutiveSave.yaml | 104 +++++ ..._DisableDoctorSelfSave_SeerRevealTeam.yaml | 105 +++++ ...tEliminationNoReveal_DayExileNoReveal.yaml | 105 +++++ ...minationRevealTeam_DayExileRevealTeam.yaml | 105 +++++ .../standard_disable_doctor_self_save.yaml | 103 +++++ .../standard_parallel_voting.yaml | 103 +++++ ...standard_parallel_voting_no_tie_exile.yaml | 103 +++++ ...rd_parallel_voting_roundbiddiscussion.yaml | 105 +++++ .../scripts/configs/run/run_config.yaml | 58 +++ .../run/vertex_api_example_config.yaml | 115 +++++ .../envs/werewolf/scripts/measure_cost.py | 251 +++++++++++ .../scripts/plot_existing_trajectories.py | 135 ++++++ .../envs/werewolf/scripts/rerender_html.py | 87 ++++ .../envs/werewolf/scripts/run.py | 93 ++++ .../envs/werewolf/scripts/run_block.py | 237 ++++++++++ .../werewolf/scripts/run_pairwise_matrix.py | 222 +++++++++ .../envs/werewolf/scripts/self_play.py | 196 ++++++++ .../envs/werewolf/scripts/utils.py | 47 ++ 27 files changed, 3338 insertions(+) create mode 100644 kaggle_environments/envs/werewolf/scripts/__init__.py create mode 100644 kaggle_environments/envs/werewolf/scripts/add_audio.py create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml create mode 100644 kaggle_environments/envs/werewolf/scripts/measure_cost.py create mode 100644 kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py create mode 100644 kaggle_environments/envs/werewolf/scripts/rerender_html.py create mode 100644 kaggle_environments/envs/werewolf/scripts/run.py create mode 100644 kaggle_environments/envs/werewolf/scripts/run_block.py create mode 100644 kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py create mode 100644 kaggle_environments/envs/werewolf/scripts/self_play.py create mode 100644 kaggle_environments/envs/werewolf/scripts/utils.py diff --git a/kaggle_environments/envs/werewolf/scripts/__init__.py b/kaggle_environments/envs/werewolf/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/kaggle_environments/envs/werewolf/scripts/add_audio.py b/kaggle_environments/envs/werewolf/scripts/add_audio.py new file mode 100644 index 00000000..433afc39 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/add_audio.py @@ -0,0 +1,425 @@ +import argparse +import hashlib +import http.server +import json +import logging +import os +import socketserver +import wave + +import yaml +from dotenv import load_dotenv +from google import genai +from google.api_core.exceptions import GoogleAPICallError +from google.cloud import texttospeech +from google.genai import types + +from kaggle_environments.envs.werewolf.game.consts import EventName +from kaggle_environments.envs.werewolf.runner import setup_logger + +logger = logging.getLogger(__name__) + + +def load_config(config_path): + """Loads the configuration from a YAML file.""" + with open(config_path, "r") as f: + return yaml.safe_load(f) + + +def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): + """Saves PCM audio data to a WAV file.""" + with wave.open(filename, "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(sample_width) + wf.setframerate(rate) + wf.writeframes(pcm) + + +def get_tts_audio_genai(client, text: str, voice_name: str) -> bytes | None: + """Fetches TTS audio from Gemini API.""" + if not text or not client: + return None + try: + response = client.models.generate_content( + model="gemini-2.5-flash-preview-tts", + contents=text, + config=types.GenerateContentConfig( + response_modalities=["AUDIO"], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice_name) + ) + ), + ), + ) + return response.candidates[0].content.parts[0].inline_data.data + except (GoogleAPICallError, ValueError) as e: + logger.error(f" - Error generating audio for '{text[:30]}...': {e}") + return None + + +def get_tts_audio_vertex( + client, text: str, voice_name: str, model_name: str = "gemini-2.5-flash-preview-tts" +) -> bytes | None: + """Fetches TTS audio from Vertex AI API.""" + if not text or not client: + return None + try: + synthesis_input = texttospeech.SynthesisInput(text=text) + + voice = texttospeech.VoiceSelectionParams(language_code="en-US", name=voice_name, model_name=model_name) + + audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3, sample_rate_hertz=24000) + + response = client.synthesize_speech( + input=synthesis_input, + voice=voice, + audio_config=audio_config, + ) + return response.audio_content + except (GoogleAPICallError, ValueError) as e: + logger.error(f" - Error generating audio using Vertex AI for '{text[:30]}...': {e}") + return None + + +def extract_game_data_from_json(replay_json): + """Extracts dialogue and events from a replay JSON object.""" + logger.info("Extracting game data from replay...") + unique_speaker_messages = set() + dynamic_moderator_messages = set() + moderator_log_steps = replay_json.get("info", {}).get("MODERATOR_OBSERVATION", []) + + for step_log in moderator_log_steps: + for data_entry in step_log: + # We must read from 'json_str' to match the werewolf.js renderer + json_str = data_entry.get("json_str") + data_type = data_entry.get("data_type") # We still need this for filtering + + try: + # Parse the event data from the json_str, just like the JS does + event = json.loads(json_str) + data = event.get("data", {}) # Get the data payload from inside the parsed event + event_name = event.get("event_name") + description = event.get("description", "") + day_count = event.get("day") + + except json.JSONDecodeError as e: + logger.warning(f" - Skipping log entry, failed to parse json_str: {e}") + continue + + # This logic below remains the same, but it now correctly uses + # the 'data' payload from the parsed 'json_str'. + if data_type == "ChatDataEntry": + if data.get("actor_id") and data.get("message"): + unique_speaker_messages.add((data["actor_id"], data["message"])) + elif data_type == "DayExileVoteDataEntry": + if data.get("actor_id") and data.get("target_id"): + dynamic_moderator_messages.add(f"{data['actor_id']} votes to exile {data['target_id']}.") + elif data_type == "WerewolfNightVoteDataEntry": + if data.get("actor_id") and data.get("target_id"): + dynamic_moderator_messages.add(f"{data['actor_id']} votes to eliminate {data['target_id']}.") + elif data_type == "SeerInspectActionDataEntry": + if data.get("actor_id") and data.get("target_id"): + dynamic_moderator_messages.add(f"{data['actor_id']} inspects {data['target_id']}.") + elif data_type == "DoctorHealActionDataEntry": + if data.get("actor_id") and data.get("target_id"): + dynamic_moderator_messages.add(f"{data['actor_id']} heals {data['target_id']}.") + elif data_type == "DayExileElectedDataEntry": + if all(k in data for k in ["elected_player_id", "elected_player_role_name"]): + dynamic_moderator_messages.add( + f"{data['elected_player_id']} was exiled by vote. Their role was a {data['elected_player_role_name']}." + ) + elif data_type == "WerewolfNightEliminationDataEntry": + if all(k in data for k in ["eliminated_player_id", "eliminated_player_role_name"]): + dynamic_moderator_messages.add( + f"{data['eliminated_player_id']} was eliminated. Their role was a {data['eliminated_player_role_name']}." + ) + elif data_type == "DoctorSaveDataEntry": + if "saved_player_id" in data: + dynamic_moderator_messages.add(f"{data['saved_player_id']} was attacked but saved by a Doctor!") + elif data_type == "SeerInspectResultDataEntry": + if data.get("role"): + dynamic_moderator_messages.add( + f"{data['actor_id']} saw {data['target_id']}'s role is {data['role']}." + ) + elif data.get("team"): + dynamic_moderator_messages.add( + f"{data['actor_id']} saw {data['target_id']}'s team is {data['team']}." + ) + elif data_type == "GameEndResultsDataEntry": + if "winner_team" in data: + dynamic_moderator_messages.add(f"The game is over. The {data['winner_team']} team has won!") + elif data_type == "WerewolfNightEliminationElectedDataEntry": + if "elected_target_player_id" in data: + dynamic_moderator_messages.add( + f"The werewolves have chosen to eliminate {data['elected_target_player_id']}." + ) + elif event_name == EventName.DAY_START: + dynamic_moderator_messages.add(f"Day {day_count} begins!") + elif event_name == EventName.NIGHT_START: + dynamic_moderator_messages.add(f"Night {day_count} begins!") + elif event_name == EventName.MODERATOR_ANNOUNCEMENT: + if "discussion rule is" in description: + dynamic_moderator_messages.add("Discussion begins!") + elif "Voting phase begins" in description: + dynamic_moderator_messages.add("Exile voting begins!") + + logger.info(f"Found {len(unique_speaker_messages)} unique player messages.") + logger.info(f"Found {len(dynamic_moderator_messages)} dynamic moderator messages.") + return unique_speaker_messages, dynamic_moderator_messages + + +def generate_audio_files( + client, + tts_provider, + unique_speaker_messages, + dynamic_moderator_messages, + player_voice_map, + audio_config, + output_dir, +): + """Generates and saves all required audio files, returning a map for the HTML.""" + logger.info("Extracting dialogue and generating audio files...") + audio_map = {} + paths = audio_config["paths"] + audio_dir = os.path.join(output_dir, paths["audio_dir_name"]) + moderator_voice = audio_config["voices"]["moderator"] + static_moderator_messages = audio_config["audio"]["static_moderator_messages"] + + messages_to_generate = [] + for key, message in static_moderator_messages.items(): + messages_to_generate.append(("moderator", key, message, moderator_voice)) + for message in dynamic_moderator_messages: + messages_to_generate.append(("moderator", message, message, moderator_voice)) + for speaker_id, message in unique_speaker_messages: + voice = player_voice_map.get(speaker_id) + if voice: + messages_to_generate.append((speaker_id, message, message, voice)) + else: + logger.warning(f" - Warning: No voice found for speaker: {speaker_id}") + + for speaker, key, message, voice in messages_to_generate: + map_key = f"{speaker}:{key}" + filename = hashlib.md5(map_key.encode()).hexdigest() + ".wav" + audio_path_on_disk = os.path.join(audio_dir, filename) + audio_path_for_html = os.path.join(paths["audio_dir_name"], filename) + + if not os.path.exists(audio_path_on_disk): + logger.info(f' - Generating audio for {speaker} ({voice}): "{message[:40]}..." ') + audio_content = None + if tts_provider == "vertex_ai": + model_name = audio_config.get("vertex_ai_model", "gemini-2.5-flash-preview-tts") + audio_content = get_tts_audio_vertex(client, message, voice_name=voice, model_name=model_name) + else: # google_genai + audio_content = get_tts_audio_genai(client, message, voice_name=voice) + + if audio_content: + wave_file(audio_path_on_disk, audio_content) + audio_map[map_key] = audio_path_for_html + else: + audio_map[map_key] = audio_path_for_html + + return audio_map + + +def generate_debug_audio_files( + output_dir, client, tts_provider, unique_speaker_messages, dynamic_moderator_messages, audio_config +): + """Generates a single debug audio file and maps all events to it.""" + logger.info("Generating single debug audio for UI testing...") + paths = audio_config["paths"] + debug_audio_dir = os.path.join(output_dir, paths["debug_audio_dir_name"]) + os.makedirs(debug_audio_dir, exist_ok=True) + audio_map = {} + + debug_message = "Testing start, testing end." + filename = "debug_audio.wav" + audio_path_on_disk = os.path.join(debug_audio_dir, filename) + audio_path_for_html = os.path.join(paths["debug_audio_dir_name"], filename) + + if not os.path.exists(audio_path_on_disk): + logger.info(f' - Generating debug audio: "{debug_message}"') + audio_content = None + if tts_provider == "vertex_ai": + model_name = audio_config.get("vertex_ai_model", "gemini-2.5-flash-preview-tts") + debug_voice = "Charon" + audio_content = get_tts_audio_vertex(client, debug_message, voice_name=debug_voice, model_name=model_name) + else: + debug_voice = "achird" + audio_content = get_tts_audio_genai(client, debug_message, voice_name=debug_voice) + + if audio_content: + wave_file(audio_path_on_disk, audio_content) + else: + logger.error(" - Failed to generate debug audio. The map will be empty.") + return {} + else: + logger.info(f" - Using existing debug audio file: {audio_path_on_disk}") + + static_moderator_messages = audio_config["audio"]["static_moderator_messages"] + + messages_to_map = [] + for key in static_moderator_messages: + messages_to_map.append(("moderator", key)) + for message in dynamic_moderator_messages: + messages_to_map.append(("moderator", message)) + for speaker_id, message in unique_speaker_messages: + messages_to_map.append((speaker_id, message)) + + for speaker, key in messages_to_map: + map_key = f"{speaker}:{key}" + audio_map[map_key] = audio_path_for_html + + logger.info(f" - Mapped all {len(audio_map)} audio events to '{audio_path_for_html}'") + return audio_map + + +def render_html(existing_html_path, audio_map, output_file): + """Reads an existing HTML replay, injects the audio map, and saves it.""" + logger.info(f"Reading existing HTML from: {existing_html_path}") + with open(existing_html_path, "r", encoding="utf-8") as f: + html_content = f.read() + + logger.info("Injecting the local audio map into the HTML...") + audio_map_json = json.dumps(audio_map) + injection_script = f"" + html_content = html_content.replace("", f"{injection_script}") + + with open(output_file, "w", encoding="utf-8") as f: + f.write(html_content) + logger.info(f"Successfully generated audio-enabled HTML at: {output_file}") + + +def start_server(directory, port, filename): + """Starts a local HTTP server to serve the replay.""" + logger.info(f"\nStarting local server to serve from the '{directory}' directory...") + + class Handler(http.server.SimpleHTTPRequestHandler): + def __init__(self, *args, **kwargs): + super().__init__(*args, directory=directory, **kwargs) + + with socketserver.TCPServer(("", port), Handler) as httpd: + print(f"\nServing replay at: http://localhost:{port}/{filename}") + print("Open this URL in your web browser.") + print(f"Or you can zip the '{directory}' directory and share it.") + print("Press Ctrl+C to stop the server.") + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nServer stopped.") + + +def main(): + """Main function to add audio to a Werewolf replay.""" + parser = argparse.ArgumentParser(description="Add audio to a Werewolf game replay.") + parser.add_argument( + "-i", "--run_dir", type=str, required=True, help="Path to the directory of a game run generated by run.py." + ) + parser.add_argument( + "-o", + "--output_dir", + type=str, + help="Output directory for the audio-enabled replay. Defaults to 'werewolf_replay_audio' inside the run directory.", + ) + parser.add_argument( + "-c", + "--config_path", + type=str, + default=os.path.join(os.path.dirname(__file__), "configs/audio/standard.yaml"), + help="Path to the audio configuration YAML file.", + ) + parser.add_argument( + "--debug-audio", action="store_true", help="Generate a single debug audio file for all events for UI testing." + ) + parser.add_argument( + "--serve", action="store_true", help="Start a local HTTP server to view the replay after generation." + ) + parser.add_argument( + "--tts-provider", + type=str, + default="vertex_ai", + choices=["vertex_ai", "google_genai"], + help="The TTS provider to use for audio synthesis.", + ) + args = parser.parse_args() + + if not args.output_dir: + args.output_dir = os.path.join(args.run_dir, "werewolf_replay_audio") + + os.makedirs(args.output_dir, exist_ok=True) + setup_logger(output_dir=args.output_dir, base_name="add_audio") + + logger.info(f"Loading audio config from: {args.config_path}") + audio_config = load_config(args.config_path) + + replay_json_path = os.path.join(args.run_dir, "werewolf_game.json") + logger.info(f"Loading game replay from: {replay_json_path}") + if not os.path.exists(replay_json_path): + logger.error(f"Replay file not found: {replay_json_path}") + logger.error("Please ensure you provide a valid run directory created by run.py.") + return + with open(replay_json_path, "r") as f: + replay_data = json.load(f) + + game_config = replay_data["configuration"] + player_voices = audio_config["voices"]["players"] + player_voice_map = { + agent_config["id"]: player_voices.get(agent_config["id"]) for agent_config in game_config["agents"] + } + + load_dotenv() + client = None + if args.tts_provider == "vertex_ai": + if not os.getenv("GOOGLE_CLOUD_PROJECT"): + logger.error("Error: GOOGLE_CLOUD_PROJECT environment variable not found. It is required for Vertex AI.") + return + try: + client = texttospeech.TextToSpeechClient() + except Exception as e: + logger.error(f"Failed to initialize Vertex AI client: {e}") + logger.error("Please ensure you have authenticated with 'gcloud auth application-default login'") + return + else: # google_genai + if not os.getenv("GEMINI_API_KEY"): + logger.error( + "Error: GEMINI_API_KEY environment variable not found. Audio generation with google.genai requires it." + ) + return + client = genai.Client() + + unique_speaker_messages, dynamic_moderator_messages = extract_game_data_from_json(replay_data) + + paths = audio_config["paths"] + audio_dir = os.path.join(args.output_dir, paths["audio_dir_name"]) + os.makedirs(audio_dir, exist_ok=True) + + if args.debug_audio: + audio_map = generate_debug_audio_files( + args.output_dir, + client, + args.tts_provider, + unique_speaker_messages, + dynamic_moderator_messages, + audio_config, + ) + else: + audio_map = generate_audio_files( + client, + args.tts_provider, + unique_speaker_messages, + dynamic_moderator_messages, + player_voice_map, + audio_config, + args.output_dir, + ) + + original_html_path = os.path.join(args.run_dir, "werewolf_game.html") + output_html_file = os.path.join(args.output_dir, paths["output_html_filename"]) + render_html(original_html_path, audio_map, output_html_file) + + if args.serve: + start_server(args.output_dir, audio_config["server"]["port"], paths["output_html_filename"]) + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml b/kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml new file mode 100644 index 00000000..77d65e77 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/audio/standard.yaml @@ -0,0 +1,24 @@ +# Settings for the dump_audio.py script +server: + port: 7999 +paths: + audio_dir_name: "audio" + debug_audio_dir_name: "debug_audio" + output_html_filename: "replay.html" +voices: + moderator: "enceladus" + players: + Kai: 'Kore' + Jordan: 'Charon' + Charlie: 'Leda' + Taylor: 'Despina' + Alex: 'Erinome' + Jamie: 'Gacrux' + Quinn: 'Achird' + Casey: 'Puck' +audio: + static_moderator_messages: + night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]" + day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]" + discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]" + voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml new file mode 100644 index 00000000..4d2017a2 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/block_basic.yaml @@ -0,0 +1,102 @@ +# Settings for the dump_audio.py script +script_settings: + server: + port: 7999 + paths: + audio_dir_name: "audio" + debug_audio_dir_name: "debug_audio" + output_html_filename: "replay.html" + voices: + moderator: "enceladus" + players: + gemini-2.5-flash: 'Kore' + deepseek-r1: 'Charon' + gpt-oss-120b: 'Leda' + qwen3: 'Despina' + "gpt-4.1": 'Erinome' + "o4-mini": 'Gacrux' + "gemini-2.5-pro": 'Achird' + "grok-4": 'Puck' + audio: + static_moderator_messages: + night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]" + day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]" + discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]" + voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]" + +# Configuration for the Werewolf game environment +game_config: + actTimeout: 300 + runTimeout: 3600 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 1 + agents: + - role: "Werewolf" + id: "gemini-2.5-pro" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini/gemini-2.5-pro" + agent_harness_name: "llm_harness" + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Werewolf" + id: "deepseek-r1" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/together_ai/deepseek-ai/DeepSeek-R1" + display_name: "together_ai/deepseek-ai/DeepSeek-R1" + agent_harness_name: "llm_harness" + llms: + - model_name: "together_ai/deepseek-ai/DeepSeek-R1" + parameters: { "max_tokens": 163839 } + - role: "Doctor" + id: "gpt-5" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/gpt-5" + display_name: "gpt-5" + agent_harness_name: "llm_harness" + llms: + - model_name: "gpt-5" + - role: "Seer" + id: "qwen3" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + display_name: "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + agent_harness_name: "llm_harness" + llms: + - model_name: "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + - role: "Villager" + id: "claude-4-sonnet" + thumbnail: "https://images.seeklogo.com/logo-png/55/1/claude-logo-png_seeklogo-554534.png" + agent_id: "llm/claude-4-sonnet-20250514" + display_name: "claude-4-sonnet-20250514" + agent_harness_name: "llm_harness" + llms: + - model_name: "claude-4-sonnet-20250514" + - role: "Villager" + id: "zai-glm-4.5-air" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/together_ai/zai-org/GLM-4.5-Air-FP8" + display_name: "zai-glm-4.5-air" + agent_harness_name: "llm_harness" + llms: + - model_name: "together_ai/zai-org/GLM-4.5-Air-FP8" + parameters: { "max_tokens": 100000 } + - role: "Villager" + id: "kimi-k2" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/kimi-logo-png_seeklogo-611650.png" + agent_id: "llm/together_ai/moonshotai/Kimi-K2-Instruct" + display_name: "kimi-k2" + agent_harness_name: "llm_harness" + llms: + - model_name: "together_ai/moonshotai/Kimi-K2-Instruct" + parameters: { "max_tokens": 100000 } + - role: "Villager" + id: "grok-4" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/grok-logo-png_seeklogo-613403.png" + agent_id: "llm/xai/grok-4-latest" + display_name: "xai/grok-4-latest" + agent_harness_name: "llm_harness" + llms: + - model_name: "xai/grok-4-latest" \ No newline at end of file diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml new file mode 100644 index 00000000..942368a1 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/comprehensive.yaml @@ -0,0 +1,100 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 300 + runTimeout: 3600 + discussion_protocol: + name: "TurnByTurnBiddingDiscussion" + params: + max_turns: 16 + bid_result_public: false + day_voting_protocol: + name: "SequentialVoting" + werewolf_night_vote_protocol: + name: "SequentialVoting" + night_elimination_reveal_level: no_reveal + day_exile_reveal_level: no_reveal + agents: + - role: "Werewolf" + id: "gemini-2.5-flash" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm_harness/gemini/gemini-2.5-flash" + display_name: "gemini/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "deepseek-r1" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm_harness/together_ai/deepseek-ai/DeepSeek-R1" + display_name: "together_ai/deepseek-ai/DeepSeek-R1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "together_ai/deepseek-ai/DeepSeek-R1" + parameters: { "max_tokens": 163839 } + - role: "Doctor" + role_params: + allow_self_save: true + id: "gpt-oss-120b" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm_harness/together_ai/openai/gpt-oss-120b" + display_name: "together_ai/openai/gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "together_ai/openai/gpt-oss-120b" + - role: "Seer" + id: "qwen3" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm_harness/together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + display_name: "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput" + - role: "Villager" + id: "gpt-4.1" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm_harness/gpt-4.1" + display_name: "gpt-4.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gpt-4.1" + - role: "Villager" + id: "o4-mini" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm_harness/o4-mini" + display_name: "o4-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "o4-mini" + - role: "Villager" + id: "gemini-2.5-pro" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm_harness/gemini/gemini-2.5-pro" + display_name: "gemini/gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "grok-4" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/grok-logo-png_seeklogo-613403.png" + agent_id: "llm_harness/xai/grok-4-latest" + display_name: "xai/grok-4-latest" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "xai/grok-4-latest" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml new file mode 100644 index 00000000..c6afca15 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_DisableDoctorSelfSave_DisableDoctorConsecutiveSave_large.yaml @@ -0,0 +1,104 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + allow_consecutive_saves: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-5" + display_name: "gpt-5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-5" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4.1" + display_name: "gpt-4.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4.1" + - role: "Villager" + id: "Quinn" + thumbnail: "https://images.seeklogo.com/logo-png/55/1/claude-logo-png_seeklogo-554534.png" + agent_id: "llm/openrouter/anthropic/claude-sonnet-4" + display_name: "claude-sonnet-4" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/claude-sonnet-4" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/grok-logo-png_seeklogo-613403.png" + agent_id: "llm/openrouter/x-ai/grok-4" + display_name: "grok-4" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/x-ai/grok-4" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml new file mode 100644 index 00000000..67f3bec5 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_large.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-5" + display_name: "gpt-5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-5" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4.1" + display_name: "gpt-4.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4.1" + - role: "Villager" + id: "Quinn" + thumbnail: "https://images.seeklogo.com/logo-png/55/1/claude-logo-png_seeklogo-554534.png" + agent_id: "llm/openrouter/anthropic/claude-sonnet-4" + display_name: "claude-sonnet-4" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/claude-sonnet-4" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/grok-logo-png_seeklogo-613403.png" + agent_id: "llm/openrouter/x-ai/grok-4" + display_name: "grok-4" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/x-ai/grok-4" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml new file mode 100644 index 00000000..d01695f6 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/roundrobin_discussion_small.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml new file mode 100644 index 00000000..d01695f6 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml new file mode 100644 index 00000000..bbb753a4 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_DisableDoctorConsecutiveSave.yaml @@ -0,0 +1,104 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + allow_consecutive_saves: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml new file mode 100644 index 00000000..bfd73fbe --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam.yaml @@ -0,0 +1,105 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + role_params: + reveal_level: "team" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml new file mode 100644 index 00000000..1f85a32b --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationNoReveal_DayExileNoReveal.yaml @@ -0,0 +1,105 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: no_reveal + day_exile_reveal_level: no_reveal + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + role_params: + reveal_level: "team" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml new file mode 100644 index 00000000..a7d562c1 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_DisableDoctorSelfSave_SeerRevealTeam_NightEliminationRevealTeam_DayExileRevealTeam.yaml @@ -0,0 +1,105 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: team + day_exile_reveal_level: team + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + role_params: + reveal_level: "team" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml new file mode 100644 index 00000000..6352f2a0 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_disable_doctor_self_save.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: false + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml new file mode 100644 index 00000000..dc4e8b47 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SimultaneousMajority" + params: + tie_break: 'random' + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml new file mode 100644 index 00000000..225d8962 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_no_tie_exile.yaml @@ -0,0 +1,103 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + assign_random_first_speaker: true + day_voting_protocol: + name: "SimultaneousMajority" + params: + tie_break: 'no_elected' + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml new file mode 100644 index 00000000..2ff52c0d --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/rule_experiment/standard_parallel_voting_roundbiddiscussion.yaml @@ -0,0 +1,105 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 900 + runTimeout: 7200 + discussion_protocol: + name: "RoundByRoundBiddingDiscussion" + params: + bidding: + name: "SimpleBiddingProtocol" + max_rounds: 2 + bid_result_public: true + day_voting_protocol: + name: "SimultaneousMajority" + params: + tie_break: 'random' + werewolf_night_vote_protocol: + name: "SequentialVoting" + params: + assign_random_first_voter: true + night_elimination_reveal_level: role + day_exile_reveal_level: role + agents: + - role: "Werewolf" + id: "Kai" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/deepseek-ai-icon-logo-png_seeklogo-611473.png" + agent_id: "llm/openrouter/deepseek/deepseek-chat-v3.1" + display_name: "deepseek-chat-v3.1" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/deepseek/deepseek-chat-v3.1" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Charlie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-4o-mini" + display_name: "gpt-4o-mini" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-4o-mini" + - role: "Seer" + id: "Taylor" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-235b-a22b-2507" + display_name: "qwen3-235b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-235b-a22b-2507" + - role: "Villager" + id: "Alex" + thumbnail: "https://z-cdn.chatglm.cn/z-ai/static/logo.svg" + agent_id: "llm/openrouter/z-ai/glm-4.5" + display_name: "glm-4.5" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/z-ai/glm-4.5" + - role: "Villager" + id: "Jamie" + thumbnail: "https://images.seeklogo.com/logo-png/46/1/chatgpt-logo-png_seeklogo-465219.png" + agent_id: "llm/openrouter/openai/gpt-oss-120b" + display_name: "gpt-oss-120b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/openai/gpt-oss-120b" + - role: "Villager" + id: "Quinn" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "gemini-2.5-pro" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Villager" + id: "Casey" + thumbnail: "https://images.seeklogo.com/logo-png/61/1/qwen-icon-logo-png_seeklogo-611724.png" + agent_id: "llm/openrouter/qwen/qwen3-30b-a3b" + display_name: "qwen3-30b" + agent_harness_name: "llm_harness" + chat_mode: "text" + enable_bid_reasoning: false + llms: + - model_name: "openrouter/qwen/qwen3-30b-a3b" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml new file mode 100644 index 00000000..0f306f55 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/run_config.yaml @@ -0,0 +1,58 @@ +# Configuration for the Werewolf game environment +game_config: + seed: 42 + actTimeout: 300 + runTimeout: 3600 + discussion_protocol: + name: "TurnByTurnBiddingDiscussion" + params: + max_turns: 16 + day_voting_protocol: + name: "SequentialVoting" + werewolf_night_vote_protocol: + name: "SequentialVoting" + night_elimination_reveal_level: no_reveal + day_exile_reveal_level: no_reveal + agents: + - role: "Werewolf" + id: "Player1" + agent_id: "llm/gemini/gemini-2.5-flash" + display_name: "Player1 (Flash)" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "gemini/gemini-2.5-flash" + - role: "Werewolf" + id: "Player2" + agent_id: "llm/gemini/gemini-2.5-pro" + display_name: "Player2 (Pro)" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "gemini/gemini-2.5-pro" + - role: "Doctor" + role_params: + allow_self_save: true + id: "Player3" + agent_id: "random" + display_name: "Player3 (Random)" + - role: "Seer" + id: "Player4" + agent_id: "random" + display_name: "Player4 (Random)" + - role: "Villager" + id: "Player5" + agent_id: "random" + display_name: "Player5 (Random)" + - role: "Villager" + id: "Player6" + agent_id: "random" + display_name: "Player6 (Random)" + - role: "Villager" + id: "Player7" + agent_id: "random" + display_name: "Player7 (Random)" + - role: "Villager" + id: "Player8" + agent_id: "random" + display_name: "Player8 (Random)" diff --git a/kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml b/kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml new file mode 100644 index 00000000..da821507 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/configs/run/vertex_api_example_config.yaml @@ -0,0 +1,115 @@ +# Settings for the dump_audio.py script +script_settings: + server: + port: 7999 + paths: + audio_dir_name: "audio" + debug_audio_dir_name: "debug_audio" + output_html_filename: "replay.html" + voices: + moderator: "enceladus" + players: + "gemini-2.0-flash-lite-001_1": 'Kore' + "gemini-2.0-flash-lite-001_2": 'Charon' + "gemini-2.5-flash_3": 'Leda' + "gemini-2.5-flash_4": 'Despina' + "gemini-2.5-flash_5": 'Erinome' + "gemini-2.5-flash_6": 'Gacrux' + "gemini-2.5-flash_7": 'Achird' + "gemini-2.5-flash_8": 'Puck' + audio: + static_moderator_messages: + night_begins: "(rate=\"fast\", volume=\"soft\", voice=\"mysterious\")[As darkness descends, the village falls silent.](rate=\"medium\", pitch=\"-2st\")[Everyone, close your eyes.]" + day_begins: "(rate=\"fast\", volume=\"loud\")[Wake up, villagers!] (rate=\"medium\", voice=\"neutral\")[The sun rises on a new day.] (break=\"50ms\") (rate=\"medium\", voice=\"somber\")[Let's see who survived the night.]" + discussion_begins: "(voice=\"authoritative\")[The town meeting now begins.] (voice=\"neutral\")[You have a few minutes to discuss and find the werewolves among you.] (voice=\"authoritative\")[Begin.]" + voting_begins: "(rate=\"slow\", voice=\"serious\")[The time for talk is over.] (break=\"50ms\") (rate=\"medium\", volume=\"loud\", voice=\"dramatic\")[Now, you must cast your votes!]" + +# Configuration for the Werewolf game environment +game_config: + seed: 123 + actTimeout: 300 + runTimeout: 3600 + discussion_protocol: + name: "RoundRobinDiscussion" + params: + max_rounds: 2 + day_voting_protocol: + name: "SequentialVoting" + werewolf_night_vote_protocol: + name: "SequentialVoting" +# reveal_night_elimination_role: false +# reveal_day_exile_role: false + allow_doctor_self_save: true + agents: + - role: "Werewolf" + id: "Alex" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001" + display_name: "vertex_ai/gemini-2.0-flash-lite-001" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001" + - role: "Werewolf" + id: "Jordan" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.0-flash-lite-001" + display_name: "vertex_ai/gemini-2.0-flash-lite-001" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.0-flash-lite-001" + - role: "Doctor" + id: "Taylor" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" + - role: "Seer" + id: "Casey" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" + - role: "Villager" + id: "Riley" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" + - role: "Villager" + id: "Jamie" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" + - role: "Villager" + id: "Morgan" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" + - role: "Villager" + id: "Skyler" + thumbnail: "https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png" + agent_id: "llm/vertex_ai/gemini-2.5-flash" + display_name: "llm/vertex_ai/gemini-2.5-flash" + agent_harness_name: "llm_harness" + chat_mode: "text" + llms: + - model_name: "llm/vertex_ai/gemini-2.5-flash" \ No newline at end of file diff --git a/kaggle_environments/envs/werewolf/scripts/measure_cost.py b/kaggle_environments/envs/werewolf/scripts/measure_cost.py new file mode 100644 index 00000000..26258b41 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/measure_cost.py @@ -0,0 +1,251 @@ +import argparse +import json +import logging +import os +import random +from datetime import datetime + +import matplotlib.pyplot as plt +import numpy as np +import yaml + +from kaggle_environments.envs.werewolf.runner import run_werewolf, setup_logger +from kaggle_environments.envs.werewolf.werewolf import LLM_MODEL_NAMES, CostSummary + +logger = logging.getLogger(__name__) + +AGENT_NAMES = ["Alex", "Jordan", "Taylor", "Casey", "Riley", "Jamie", "Morgan", "Skyler"] +DEFAULT_MODEL = "gemini/gemini-2.5-flash" + + +def setup_game_config(max_turns: int, base_config: dict, model_name: str): + """ + Sets up the game configuration for a single run. + """ + config = base_config.copy() + + # Define roles and shuffle them + roles = ["Werewolf", "Werewolf", "Doctor", "Seer", "Villager", "Villager", "Villager", "Villager"] + random.shuffle(roles) + random.shuffle(AGENT_NAMES) + + # Create agent configurations + agents_config = [] + for i, role in enumerate(roles): + player_name = AGENT_NAMES[i] + agents_config.append( + { + "role": role, + "id": player_name, + "agent_id": f"llm/{model_name}", + "display_name": f"{model_name}/{player_name}", + "agent_harness_name": "llm_harness", + "chat_mode": "text", + "llms": [{"model_name": model_name}], + } + ) + + config["agents"] = agents_config + + # Update discussion protocol with the specified max_turns + if "discussion_protocol" in config and config["discussion_protocol"]["name"] == "TurnByTurnBiddingDiscussion": + config["discussion_protocol"]["params"]["max_turns"] = max_turns + else: + logger.warning("Could not find 'TurnByTurnBiddingDiscussion' protocol to set max_turns.") + + # Set a new random seed for each game to ensure role/name shuffling is different + config["seed"] = random.randint(0, 2**32 - 1) + + agent_harnesses = [f"llm/{model_name}"] * len(roles) + + return config, agent_harnesses + + +def plot_results(summary_data, output_dir): + """ + Plots the results and saves them to files. + """ + max_turns = sorted([int(k) for k in summary_data.keys()]) + metrics = ["total_cost", "total_tokens", "total_prompt_tokens", "total_completion_tokens"] + + for metric in metrics: + means = [summary_data[str(t)][metric]["mean"] for t in max_turns] + stds = [summary_data[str(t)][metric]["std"] for t in max_turns] + + plt.figure(figsize=(10, 6)) + plt.errorbar(max_turns, means, yerr=stds, fmt="-o", capsize=5, ecolor="red", markeredgecolor="black") + plt.xlabel("Maximum Turns in Discussion") + plt.ylabel(metric.replace("_", " ").title()) + plt.title(f"{metric.replace('_', ' ').title()} vs. Maximum Turns") + plt.grid(True, which="both", linestyle="--", linewidth=0.5) + plt.xticks(max_turns) + + plot_filename = os.path.join(output_dir, f"{metric}_vs_max_turns.png") + plt.savefig(plot_filename) + plt.close() + logger.info(f"Saved plot: {plot_filename}") + + +def plot_token_trajectories(trajectories_data, output_dir): + """ + Plots token usage trajectories, grouped by max_turns, and saves them to files. + """ + for metric, trajectories_by_turns in trajectories_data.items(): + if not trajectories_by_turns: + continue + + plt.figure(figsize=(12, 8)) + + # Create a color map for the different turn settings + turn_keys = sorted(trajectories_by_turns.keys(), key=int) + colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys))) + color_map = {turns: color for turns, color in zip(turn_keys, colors)} + + for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])): + for i, traj in enumerate(trajectories): + # Only add a label to the first trajectory of each group for a clean legend + label = f"Max Turns: {turns}" if i == 0 else None + plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label) + + plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories") + plt.xlabel("Query Step") + plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step") + plt.grid(True, which="both", linestyle="--", linewidth=0.5) + plt.legend() + + plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png") + plt.savefig(plot_filename) + plt.close() + logger.info(f"Saved trajectory plot: {plot_filename}") + + +def main(): + parser = argparse.ArgumentParser(description="Measure LLM cost for the Werewolf game.") + parser.add_argument( + "-c", + "--config_path", + type=str, + default=os.path.join(os.path.dirname(__file__), "configs/run/comprehensive.yaml"), + help="Path to the base YAML configuration file.", + ) + parser.add_argument( + "-o", + "--output_dir", + type=str, + default="cost_measurement", + help="Output directory for logs, replays, and results.", + ) + parser.add_argument( + "-m", + "--model_name", + type=str, + default=DEFAULT_MODEL, + choices=LLM_MODEL_NAMES, + help="LiteLLM model name to use for all agents.", + ) + parser.add_argument("-d", "--disable_debug_mode", action="store_true", help="Disable debug mode.") + + args = parser.parse_args() + + # Create a unique subdirectory for this run + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + run_output_dir = os.path.join(args.output_dir, f"run_{timestamp}") + os.makedirs(run_output_dir, exist_ok=True) + + log_filename = f"measure_cost_{timestamp}" + setup_logger(output_dir=run_output_dir, base_name=log_filename) + logger.info(f"Starting cost measurement script. Results will be saved in: {run_output_dir}") + + # Load base game configuration + with open(args.config_path, "r") as f: + base_config = yaml.safe_load(f).get("game_config", {}) + + max_turns_to_test = [8, 12, 16, 20, 24] + runs_per_setting = 3 + results = { + str(t): {"total_cost": [], "total_tokens": [], "total_prompt_tokens": [], "total_completion_tokens": []} + for t in max_turns_to_test + } + all_trajectories = { + "total_tokens": {str(t): [] for t in max_turns_to_test}, + "reasoning_tokens": {str(t): [] for t in max_turns_to_test}, + "text_tokens": {str(t): [] for t in max_turns_to_test}, + } + + for turns in max_turns_to_test: + logger.info(f"--- Starting runs for max_turns = {turns} ---") + for run in range(runs_per_setting): + base_name = f"game_turns_{turns}_run_{run + 1}" + logger.info(f"Starting {base_name}...") + + game_config, agent_harnesses = setup_game_config(turns, base_config, args.model_name) + + try: + final_env = run_werewolf( + output_dir=run_output_dir, + base_name=base_name, + config=game_config, + agents=agent_harnesses, + debug=not args.disable_debug_mode, + ) + + # Extract cost summary + cost_summary_dict = final_env.info.get("GAME_END", {}).get("cost_summary", {}) + if cost_summary_dict: + cost_summary = CostSummary(**cost_summary_dict) + results[str(turns)]["total_cost"].append(cost_summary.total_cost) + results[str(turns)]["total_tokens"].append(cost_summary.total_tokens) + results[str(turns)]["total_prompt_tokens"].append(cost_summary.total_prompt_tokens) + results[str(turns)]["total_completion_tokens"].append(cost_summary.total_completion_tokens) + logger.info(f"Finished {base_name}. Total Cost: ${cost_summary.total_cost:.4f}") + + for agent_summary in cost_summary.cost_per_agent: + if agent_summary.data and agent_summary.data.usage_history: + usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history] + + total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts] + all_trajectories["total_tokens"][str(turns)].append(total_tokens_traj) + + reasoning_tokens_traj = [ + usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0 + for usage in usage_history_dicts + ] + all_trajectories["reasoning_tokens"][str(turns)].append(reasoning_tokens_traj) + + text_tokens_traj = [ + (u.get("completion_tokens", 0) or 0) + - (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0) + for u in usage_history_dicts + ] + all_trajectories["text_tokens"][str(turns)].append(text_tokens_traj) + else: + logger.error(f"Could not find cost summary for {base_name}.") + + except Exception as e: + logger.error(f"An error occurred during {base_name}: {e}", exc_info=True) + + # Calculate mean and standard deviation + summary_data = {} + for turns, metrics in results.items(): + summary_data[turns] = {} + for metric, values in metrics.items(): + if values: + summary_data[turns][metric] = {"mean": np.mean(values), "std": np.std(values), "raw_values": values} + else: + summary_data[turns][metric] = {"mean": 0, "std": 0, "raw_values": []} + + # Save summary to JSON + summary_filename = os.path.join(run_output_dir, "cost_analysis_summary.json") + with open(summary_filename, "w") as f: + json.dump(summary_data, f, indent=4) + logger.info(f"Saved summary results to {summary_filename}") + + # Plot results + plot_results(summary_data, run_output_dir) + plot_token_trajectories(all_trajectories, run_output_dir) + + logger.info("--- Cost measurement script finished ---") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py b/kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py new file mode 100644 index 00000000..1a798c2e --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/plot_existing_trajectories.py @@ -0,0 +1,135 @@ +import argparse +import glob +import json +import logging +import os +import re +import sys + +import matplotlib.pyplot as plt +import numpy as np + +from kaggle_environments.envs.werewolf.werewolf import CostSummary + +# Add the project root to the Python path to allow importing from kaggle_environments +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) +if project_root not in sys.path: + sys.path.insert(0, project_root) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def plot_token_trajectories(trajectories_data, output_dir): + """ + Plots token usage trajectories, grouped by max_turns, and saves them to files. + """ + for metric, trajectories_by_turns in trajectories_data.items(): + if not trajectories_by_turns: + logger.warning(f"No data found for metric '{metric}'. Skipping plot.") + continue + + plt.figure(figsize=(12, 8)) + + # Create a color map for the different turn settings + turn_keys = sorted(trajectories_by_turns.keys(), key=int) + colors = plt.cm.viridis(np.linspace(0, 1, len(turn_keys))) + color_map = {turns: color for turns, color in zip(turn_keys, colors)} + + for turns, trajectories in sorted(trajectories_by_turns.items(), key=lambda item: int(item[0])): + for i, traj in enumerate(trajectories): + if not all(isinstance(x, (int, float)) for x in traj): + logger.error( + f"Trajectory for metric '{metric}' (turns={turns}) contains non-numeric data. Skipping." + ) + continue + # Only add a label to the first trajectory of each group for a clean legend + label = f"Max Turns: {turns}" if i == 0 else None + plt.plot(np.arange(len(traj)), traj, linestyle="-", alpha=0.4, color=color_map[turns], label=label) + + plt.title(f"{metric.replace('_', ' ').title()} per Query Step Trajectories") + plt.xlabel("Query Step") + plt.ylabel(f"{metric.replace('_', ' ').title()} per Query Step") + plt.grid(True, which="both", linestyle="--", linewidth=0.5) + plt.legend() + + plot_filename = os.path.join(output_dir, f"{metric}_trajectories.png") + plt.savefig(plot_filename) + plt.close() + logger.info(f"Saved trajectory plot: {plot_filename}") + + +def main(): + parser = argparse.ArgumentParser( + description="Load data from a measure_cost.py output directory and generate token trajectory plots." + ) + parser.add_argument( + "-i", + "--input_dir", + type=str, + required=True, + help="Path to the output directory of a previous measure_cost.py run.", + ) + args = parser.parse_args() + + if not os.path.isdir(args.input_dir): + logger.error(f"Input directory not found: {args.input_dir}") + return + + logger.info(f"Loading data from: {args.input_dir}") + + all_trajectories = {"total_tokens": {}, "reasoning_tokens": {}, "text_tokens": {}} + + # Find all game replay JSON files + game_files = glob.glob(os.path.join(args.input_dir, "game_*_run_*.json")) + if not game_files: + logger.error(f"No game replay files (game_*_run_*.json) found in {args.input_dir}.") + return + + logger.info(f"Found {len(game_files)} game replay files to process.") + + for game_file in game_files: + # Extract max_turns from filename + match = re.search(r"game_turns_(\d+)_run_", os.path.basename(game_file)) + if not match: + logger.warning(f"Could not parse max_turns from filename: {game_file}. Skipping.") + continue + turns = match.group(1) + + with open(game_file, "r") as f: + game_data = json.load(f) + + cost_summary_dict = game_data.get("info", {}).get("GAME_END", {}).get("cost_summary") + if not cost_summary_dict: + logger.warning(f"No cost_summary found in {game_file}. Skipping.") + continue + + cost_summary = CostSummary(**cost_summary_dict) + + for agent_summary in cost_summary.cost_per_agent: + if agent_summary.data and agent_summary.data.usage_history: + usage_history_dicts = [usage.model_dump() for usage in agent_summary.data.usage_history] + + total_tokens_traj = [usage.get("total_tokens", 0) or 0 for usage in usage_history_dicts] + all_trajectories["total_tokens"].setdefault(turns, []).append(total_tokens_traj) + + reasoning_tokens_traj = [ + usage.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0 + for usage in usage_history_dicts + ] + all_trajectories["reasoning_tokens"].setdefault(turns, []).append(reasoning_tokens_traj) + + text_tokens_traj = [ + (u.get("completion_tokens", 0) or 0) + - (u.get("completion_tokens_details", {}).get("reasoning_tokens", 0) or 0) + for u in usage_history_dicts + ] + all_trajectories["text_tokens"].setdefault(turns, []).append(text_tokens_traj) + + logger.info("Finished processing all files. Generating plots...") + plot_token_trajectories(all_trajectories, args.input_dir) + logger.info(f"--- Script finished. Plots saved in {args.input_dir} ---") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/rerender_html.py b/kaggle_environments/envs/werewolf/scripts/rerender_html.py new file mode 100644 index 00000000..c8851975 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/rerender_html.py @@ -0,0 +1,87 @@ +import argparse +import json +import logging +import os + +from kaggle_environments import make + +# Configure logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") + + +def main(): + """ + Rerenders a Werewolf game replay HTML file from an existing game record JSON. + This is useful for updating the replay viewer to the latest version without + rerunning the entire game simulation. + """ + parser = argparse.ArgumentParser( + description="Rerender a Werewolf game HTML replay from a JSON game record.", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-i", + "--input_json", + type=str, + required=True, + help="Path to the input game record JSON file (e.g., werewolf_game.json).", + ) + parser.add_argument( + "-o", "--output_html", type=str, required=True, help="Path to write the newly rendered HTML output file." + ) + args = parser.parse_args() + + logging.info(f"Loading game record from: {args.input_json}") + if not os.path.exists(args.input_json): + logging.error(f"Error: Input file not found at {args.input_json}") + return + + try: + with open(args.input_json, "r", encoding="utf-8") as f: + replay_data = json.load(f) + except json.JSONDecodeError: + logging.error(f"Error: Failed to decode JSON from {args.input_json}. The file might be corrupted.") + return + except Exception as e: + logging.error(f"An unexpected error occurred while reading the file: {e}") + return + + logging.info("Successfully loaded game data. Initializing Kaggle environment...") + + # The environment name should be stored in the replay, but we default to 'werewolf' + env_name = replay_data.get("name", "werewolf") + if env_name != "werewolf": + logging.warning(f"Game record is for '{env_name}', but we are rendering with the 'werewolf' environment.") + + try: + # Recreate the environment state from the replay file + env = make( + "werewolf", + configuration=replay_data.get("configuration"), + steps=replay_data.get("steps", []), + info=replay_data.get("info", {}), + ) + logging.info("Environment initialized. Rendering new HTML...") + + # Render the HTML. This will use the werewolf.js file included in the + # installed kaggle_environments package. + html_content = env.render(mode="html") + + output_dir = os.path.dirname(args.output_html) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + with open(args.output_html, "w", encoding="utf-8") as f: + f.write(html_content) + + logging.info(f"Successfully rerendered HTML to: {args.output_html}") + + except Exception as e: + logging.error(f"An error occurred during environment creation or rendering: {e}") + logging.error( + "Please ensure the 'kaggle_environments' package is correctly installed and the JSON file is valid." + ) + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/run.py b/kaggle_environments/envs/werewolf/scripts/run.py new file mode 100644 index 00000000..97c882f8 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/run.py @@ -0,0 +1,93 @@ +import argparse +import logging +import os +import random + +import yaml + +from kaggle_environments.envs.werewolf.harness.base import LLMWerewolfAgent +from kaggle_environments.envs.werewolf.runner import ( + LogExecutionTime, + append_timestamp_to_dir, + log_git_hash, + run_werewolf, + setup_logger, +) +from kaggle_environments.envs.werewolf.werewolf import LLM_SYSTEM_PROMPT, AgentFactoryWrapper, register_agents + +logger = logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser(description="Run a single Werewolf game.") + parser.add_argument( + "-c", + "--config_path", + type=str, + default=os.path.join(os.path.dirname(__file__), "configs/run/run_config.yaml"), + help="Path to the YAML configuration file.", + ) + parser.add_argument( + "-o", "--output_dir", type=str, default="werewolf_run", help="Output directory for the log and replay file." + ) + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode.") + parser.add_argument( + "-r", "--random_agents", action="store_true", help="Use random agents for all players for fast testing." + ) + parser.add_argument( + "-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory." + ) + parser.add_argument( + "-s", "--shuffle_roles", action="store_true", help="If provided, shuffle the roles provided in the config." + ) + + args = parser.parse_args() + + # Create a unique subdirectory for this run + run_output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir) + + os.makedirs(run_output_dir, exist_ok=True) + + base_name = "werewolf_game" + setup_logger(output_dir=run_output_dir, base_name=base_name) + + log_git_hash() + + # Load game configuration + with open(args.config_path, "r") as f: + config = yaml.safe_load(f) + game_config = config.get("game_config", {}) + + # shuffle roles + if args.shuffle_roles: + role_and_params = [(agent["role"], agent.get("role_params", {})) for agent in game_config["agents"]] + random.shuffle(role_and_params) + for agent, (new_role, new_role_params) in zip(game_config["agents"], role_and_params): + agent["role"] = new_role + agent["role_params"] = new_role_params + + # Extract agent harnesses from the config and register the agents + agents_ = [agent.get("agent_id", "random") for agent in game_config.get("agents", [])] + agent_dict = {} + for agent_name in agents_: + if agent_name.startswith("llm/"): + model_name = agent_name.lstrip("llm/") + agent_dict[agent_name] = AgentFactoryWrapper( + LLMWerewolfAgent, model_name=model_name, system_prompt=LLM_SYSTEM_PROMPT + ) + register_agents(agent_dict) + + if args.random_agents: + logger.info("Using random agents for all players.") + agents_ = ["random"] * len(agents_) + + logger.info(f"Starting Werewolf game run. Output will be saved to: {run_output_dir}") + with LogExecutionTime(logger_obj=logger, task_str="single game"): + run_werewolf( + output_dir=run_output_dir, base_name=base_name, config=game_config, agents=agents_, debug=args.debug + ) + logger.info(f"Game finished. Replay and log saved in: {run_output_dir}") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/run_block.py b/kaggle_environments/envs/werewolf/scripts/run_block.py new file mode 100644 index 00000000..3d82d3b5 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/run_block.py @@ -0,0 +1,237 @@ +import argparse +import collections +import logging +import math +import multiprocessing +import os +import random +from itertools import permutations +from typing import Any, Dict, List + +import tenacity +import yaml +from tqdm import tqdm + +from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger +from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli + +# Initialize a placeholder logger +logger = logging.getLogger(__name__) + + +def load_config(config_path): + """Loads the configuration from a YAML file.""" + with open(config_path, "r") as f: + return yaml.safe_load(f) + + +def get_all_unique_role_configs(role_configs: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]: + """ + Generates all unique permutations of role configurations. + A role configuration is a dict with 'role' and 'role_params'. + """ + + def make_hashable(config): + role = config["role"] + params = config.get("role_params", {}) + if params: + return role, frozenset(params.items()) + return role, frozenset() + + def make_unhashable(hashable_config): + role, params_frozenset = hashable_config + return {"role": role, "role_params": dict(params_frozenset)} + + hashable_configs = [make_hashable(c) for c in role_configs] + all_perms_hashable = list(set(permutations(hashable_configs))) + all_perms = [[make_unhashable(c) for c in p] for p in all_perms_hashable] + return all_perms + + +run_single_game_with_retry = tenacity.retry( + wait=tenacity.wait_random_exponential(multiplier=1, min=2, max=10), + stop=tenacity.stop_after_attempt(3), + before_sleep=tenacity.before_sleep_log(logger, logging.INFO), +)(run_single_game_cli) + + +def game_runner_wrapper(args): + """Wrapper to unpack arguments for the multiprocessing pool.""" + game_dir, game_config, use_random_agents, debug, _, _ = args + run_single_game_with_retry(game_dir, game_config, use_random_agents, debug) + + +def generate_game_tasks(output_dir, num_blocks, config, use_random_agents, debug, shuffle_player_ids): + """ + Generates all game configurations for the entire experiment. + """ + base_game_config = config["game_config"] + players_data = base_game_config["agents"] + base_role_configs = [{"role": agent["role"], "role_params": agent.get("role_params", {})} for agent in players_data] + + logger.info("Generating all unique role configurations...") + all_role_configs = get_all_unique_role_configs(base_role_configs) + logger.info(f"Found {len(all_role_configs)} unique arrangements.") + + available_role_configs = [] + + for block_index in range(num_blocks): + block_dir = os.path.join(output_dir, f"block_{block_index}") + os.makedirs(block_dir, exist_ok=True) + + if not available_role_configs: + if num_blocks > len(all_role_configs): + logger.warning("Sampling with replacement as num_blocks > unique configurations.") + available_role_configs = list(all_role_configs) + random.shuffle(available_role_configs) + + block_role_config = available_role_configs.pop() + random.shuffle(players_data) + current_players_deque = collections.deque(players_data) + + for game_in_block in range(len(players_data)): + game_dir = os.path.join(block_dir, f"game_{game_in_block}") + os.makedirs(game_dir, exist_ok=True) + + current_players = list(current_players_deque) + game_agents_config = [ + {**player_config, **block_role_config[i]} for i, player_config in enumerate(current_players) + ] + + if shuffle_player_ids: + player_ids = [agent["id"] for agent in game_agents_config] + random.shuffle(player_ids) + for i, agent in enumerate(game_agents_config): + agent["id"] = player_ids[i] + + game_config = {**base_game_config, "agents": game_agents_config} + yield (game_dir, game_config, use_random_agents, debug, block_index, game_in_block) + current_players_deque.rotate(1) + + +def run_experiment( + output_dir, num_blocks, config, use_random_agents, debug, parallel, num_processes, shuffle_player_ids +): + """ + Runs a tournament by generating all game tasks and processing them, + potentially in parallel. + """ + if debug: + logger.warning("Debug mode is enabled. Forcing sequential execution.") + + base_game_config = config["game_config"] + players_data = base_game_config["agents"] + total_games = num_blocks * len(players_data) + + if parallel: + logger.info(f"Running games in parallel with up to {num_processes} processes.") + + game_tasks = generate_game_tasks(output_dir, num_blocks, config, use_random_agents, debug, shuffle_player_ids) + + with tqdm(total=total_games, desc="Processing Games") as pbar: + if parallel: + with multiprocessing.Pool(processes=num_processes) as pool: + for _ in pool.imap_unordered(game_runner_wrapper, game_tasks): + pbar.update(1) + else: + for task_args in game_tasks: + game_runner_wrapper(task_args) + pbar.update(1) + + logger.info("All game tasks have been processed.") + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + default_config_path = os.path.join(script_dir, "configs", "run", "run_config.yaml") + + parser = argparse.ArgumentParser( + description="Run a block-design experiment for the Werewolf game, " + "where each block is a complete role rotation amongst the players." + ) + parser.add_argument( + "-o", + "--output_dir", + type=str, + help="Output directory for game replays and logs.", + default="werewolf_block_experiment", + ) + parser.add_argument( + "-c", "--config", type=str, default=default_config_path, help="Path to the base configuration YAML file." + ) + parser.add_argument( + "-b", + "--num_blocks", + type=int, + default=10, + help="Number of blocks to run. Each block is a complete role rotation.", + ) + parser.add_argument( + "-r", "--use_random_agents", action="store_true", help="Use random agents for all players for fast testing." + ) + parser.add_argument( + "-d", + "--debug", + action="store_true", + help="Enable debug mode for the game environment. " + "Note that you can use debug mode to enable intra game sequential execution.", + ) + parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.") + parser.add_argument( + "-n", "--num_processes", type=int, default=None, help="Number of processes for parallel execution." + ) + parser.add_argument( + "-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory." + ) + parser.add_argument( + "-s", + "--shuffle_player_ids", + action="store_true", + help="Shuffle player ids for each game to account for name bias.", + ) + + args = parser.parse_args() + + output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir) + + os.makedirs(output_dir, exist_ok=True) + + setup_logger(output_dir, "run_block") + + config = load_config(args.config) + + num_players = len(config.get("game_config", {}).get("agents", [])) + if args.num_processes is None: + num_processes = multiprocessing.cpu_count() * 0.9 + if not args.debug: + num_processes /= num_players + num_processes = max(1, math.floor(num_processes)) + else: + num_processes = args.num_processes + + logger.info("Starting experiment with the following settings:") + logger.info(f"Output Directory: {output_dir}") + logger.info(f"Number of Blocks: {args.num_blocks}") + logger.info(f"Parallel Execution: {args.parallel}") + if args.parallel: + logger.info(f"Number of Processes: {num_processes}") + logger.info(f"Debug Mode: {args.debug}") + logger.info(f"Use Random Agents: {args.use_random_agents}") + logger.info(f"Shuffle Player IDs: {args.shuffle_player_ids}") + + with LogExecutionTime(logger_obj=logger, task_str="block experiment"): + run_experiment( + output_dir=output_dir, + num_blocks=args.num_blocks, + config=config, + use_random_agents=args.use_random_agents, + debug=args.debug, + parallel=args.parallel, + num_processes=num_processes, + shuffle_player_ids=args.shuffle_player_ids, + ) + logger.info("Experiment finished successfully.") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py b/kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py new file mode 100644 index 00000000..55ada7fb --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/run_pairwise_matrix.py @@ -0,0 +1,222 @@ +"""Run pairwise zero-sum setting where one player play the entire team of Werewolf and another player play +the team of Villager. Given a config, we play all possible pairwise combinations N times. +""" + +import argparse +import logging +import math +import multiprocessing +import os +import random +from copy import deepcopy +from typing import List + +import tenacity +import yaml +from tqdm import tqdm + +from kaggle_environments.envs.werewolf.game.consts import RoleConst +from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger +from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli + +# Initialize a placeholder logger +logger = logging.getLogger(__name__) + + +def load_config(config_path): + """Loads the configuration from a YAML file.""" + with open(config_path, "r") as f: + return yaml.safe_load(f) + + +def get_team_roles(base_roles: List[str]) -> (List[str], List[str]): + """Partitions roles into villager and werewolf teams.""" + villager_roles = [] + werewolf_roles = [] + for role_name in base_roles: + role = RoleConst(role_name) + if role == RoleConst.WEREWOLF: + werewolf_roles.append(role_name) + else: + villager_roles.append(role_name) + return villager_roles, werewolf_roles + + +run_single_game_with_retry = tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=2, max=10), + stop=tenacity.stop_after_attempt(3), + before_sleep=tenacity.before_sleep_log(logger, logging.INFO), +)(run_single_game_cli) + + +def game_runner_wrapper(args): + """Wrapper to unpack arguments for the multiprocessing pool.""" + game_dir, game_config, use_random_agents, debug, _, _ = args + run_single_game_with_retry(game_dir, game_config, use_random_agents, debug) + + +def assign_roles_dup_agents(roles, agent_config, player_ids): + agents = [deepcopy(agent_config) for _ in range(len(roles))] + for role, agent, player_id in zip(roles, agents, player_ids): + agent["role"] = role + agent["id"] = player_id + return agents + + +def prepare_pairwise_agents(villager_roles, werewolf_roles, player_a_config, player_b_config, player_ids): + pid_v, pid_w = player_ids[: len(villager_roles)], player_ids[len(villager_roles) :] + agents_v = assign_roles_dup_agents(villager_roles, player_a_config, pid_v) + agents_w = assign_roles_dup_agents(werewolf_roles, player_b_config, pid_w) + agents = agents_v + agents_w + return agents + + +def generate_game_tasks(output_dir, num_tournaments, config, use_random_agents, debug): + """ + Generates game configurations for a pairwise matrix tournament. + """ + base_game_config = config["game_config"] + all_players = base_game_config["agents"] + num_players = len(all_players) + base_roles = [agent["role"] for agent in all_players] + player_ids = [agent["id"] for agent in all_players] + + villager_roles, werewolf_roles = get_team_roles(base_roles) + + if not werewolf_roles: + raise ValueError("Configuration must include at least one werewolf role.") + if not villager_roles: + raise ValueError("Configuration must include at least one villager role.") + + for tourney_idx in range(num_tournaments): + for i in range(num_players): + for j in range(num_players): + game_dir = os.path.join(output_dir, f"tourney_{tourney_idx}", f"game_{i}_vs_{j}") + os.makedirs(game_dir, exist_ok=True) + + player_a_config = all_players[i] + player_b_config = all_players[j] + + game_agents_config = prepare_pairwise_agents( + villager_roles, werewolf_roles, player_a_config, player_b_config, player_ids + ) + + # since name has to be unique and all names come from config, we by default shuffle all names + # since name might change + random.shuffle(player_ids) + for agent_ind, agent in enumerate(game_agents_config): + agent["id"] = player_ids[agent_ind] + + random.shuffle(game_agents_config) + + game_config = {**base_game_config, "agents": game_agents_config} + yield game_dir, game_config, use_random_agents, debug, tourney_idx, f"{i}_vs_{j}" + + +def run_tournament(output_dir, num_tournaments, config, use_random_agents, debug, parallel, num_processes): + """ + Runs a tournament by generating all game tasks and processing them, + potentially in parallel. + """ + total_games = num_tournaments * len(config["game_config"]["agents"]) ** 2 + + if parallel: + logger.info(f"Running games in parallel with up to {num_processes} processes.") + + game_tasks = generate_game_tasks(output_dir, num_tournaments, config, use_random_agents, debug) + + # the following shuffle is to reduce the load of a particular LLM api + game_tasks = [*game_tasks] + random.shuffle(game_tasks) + + with tqdm(total=total_games, desc="Processing Games") as pbar: + if parallel: + with multiprocessing.Pool(processes=num_processes) as pool: + for _ in pool.imap_unordered(game_runner_wrapper, game_tasks): + pbar.update(1) + else: + for task_args in game_tasks: + game_runner_wrapper(task_args) + pbar.update(1) + + logger.info("All game tasks have been processed.") + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + default_config_path = os.path.join(script_dir, "configs", "run", "run_config.yaml") + + parser = argparse.ArgumentParser(description="Run a pairwise matrix tournament for the Werewolf game.") + parser.add_argument( + "-o", + "--output_dir", + type=str, + help="Output directory for game replays and logs.", + default="werewolf_pairwise_matrix", + ) + parser.add_argument( + "-c", "--config", type=str, default=default_config_path, help="Path to the base configuration YAML file." + ) + parser.add_argument( + "-t", + "--num_tournaments", + type=int, + default=1, + help="Number of tournaments to run. Each tournament is a full N*N matrix of games.", + ) + parser.add_argument( + "-r", "--use_random_agents", action="store_true", help="Use random agents for all players for fast testing." + ) + parser.add_argument( + "-d", + "--debug", + action="store_true", + help="Enable debug mode for the game environment. Forces sequential execution.", + ) + parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.") + parser.add_argument( + "-n", "--num_processes", type=int, default=None, help="Number of processes for parallel execution." + ) + parser.add_argument( + "-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory." + ) + + args = parser.parse_args() + + output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir) + + os.makedirs(output_dir, exist_ok=True) + + setup_logger(output_dir, "run_pairwise_matrix") + + config = load_config(args.config) + + if args.num_processes is None: + num_processes = max(1, math.floor(multiprocessing.cpu_count() * 0.8)) + else: + num_processes = args.num_processes + + logger.info("Starting tournament with the following settings:") + logger.info(f"Output Directory: {output_dir}") + logger.info(f"Number of Tournaments: {args.num_tournaments}") + logger.info(f"Parallel Execution: {args.parallel}") + if args.parallel: + logger.info(f"Number of Processes: {num_processes}") + logger.info(f"Debug Mode: {args.debug}") + logger.info(f"Use Random Agents: {args.use_random_agents}") + + with LogExecutionTime(logger_obj=logger, task_str="pairwise matrix tournament"): + run_tournament( + output_dir=output_dir, + num_tournaments=args.num_tournaments, + config=config, + use_random_agents=args.use_random_agents, + debug=args.debug, + parallel=args.parallel, + num_processes=num_processes, + ) + logger.info("Tournament finished successfully.") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/self_play.py b/kaggle_environments/envs/werewolf/scripts/self_play.py new file mode 100644 index 00000000..edd4b0c0 --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/self_play.py @@ -0,0 +1,196 @@ +"""Run the settings in a given config with all agents llm agents by substituting all with a single model. +This is useful for example to evaluate the game rule balance. +""" + +import argparse +import copy +import logging +import multiprocessing +import os +import random +from concurrent.futures import ThreadPoolExecutor, as_completed + +import tenacity +import yaml +from tqdm import tqdm + +from kaggle_environments.envs.werewolf.runner import LogExecutionTime, append_timestamp_to_dir, setup_logger +from kaggle_environments.envs.werewolf.scripts.utils import run_single_game_cli + +logger = logging.getLogger(__name__) + + +run_single_game_with_retry = tenacity.retry( + wait=tenacity.wait_random_exponential(multiplier=1, min=2, max=10), + stop=tenacity.stop_after_attempt(3), + before_sleep=tenacity.before_sleep_log(logger, logging.INFO), +)(run_single_game_cli) + + +def game_runner_wrapper(args): + """Wrapper to unpack arguments for the multiprocessing pool.""" + game_dir, game_config, use_random_agents, debug = args + run_single_game_with_retry(game_dir, game_config, use_random_agents, debug) + + +def shuffle_field(agents, field_name): + values = [agent[field_name] for agent in agents] + random.shuffle(values) + for agent, value in zip(agents, values): + agent[field_name] = value + + +def run_self_play_games( + model_name, + thumbnail, + output_dir, + num_games, + config, + use_random_agents, + debug, + parallel, + num_processes, + shuffle_roles, +): + """ + Generates and runs game tasks for the self-play experiment. + """ + if debug: + logger.warning("Debug mode is enabled. Forcing sequential execution.") + + game_tasks = [] + base_game_config = config["game_config"] + + # modify the config to use a single model + agents = base_game_config["agents"] + for agent in agents: + agent["thumbnail"] = thumbnail + agent["agent_id"] = f"llm/{model_name}" + agent["display_name"] = os.path.basename(model_name) + agent["llms"][0]["model_name"] = model_name + + for i in range(num_games): + game_output_dir = os.path.join(output_dir, f"game_{i}") + os.makedirs(game_output_dir, exist_ok=True) + + game_config = copy.deepcopy(base_game_config) + + if shuffle_roles: + logger.info(f"Shuffling roles for game {i}") + role_configs = [ + {"role": agent["role"], "role_params": agent.get("role_params", {})} for agent in game_config["agents"] + ] + random.shuffle(role_configs) + for agent, role_config in zip(game_config["agents"], role_configs): + agent["role"] = role_config["role"] + agent["role_params"] = role_config["role_params"] + + # shuffle player ids + logger.info(f"Shuffling player ids for game {i}") + shuffle_field(game_config["agents"], "id") + + task = (game_output_dir, game_config, use_random_agents, debug) + game_tasks.append(task) + + with tqdm(total=num_games, desc="Running Self-Play Games") as pbar: + if parallel: + with ThreadPoolExecutor(max_workers=num_processes) as executor: + futures = [executor.submit(game_runner_wrapper, task) for task in game_tasks] + for future in as_completed(futures): + # You could also add error handling here by checking future.exception() + pbar.update(1) + else: + for task in game_tasks: + game_runner_wrapper(task) + pbar.update(1) + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + default_config_path = os.path.join(script_dir, "configs", "run", "roundrobin_discussion_small.yaml") + + parser = argparse.ArgumentParser(description="Run N self-play Werewolf games based on a configuration file.") + parser.add_argument( + "-c", "--config_path", type=str, default=default_config_path, help="Path to the YAML configuration file." + ) + parser.add_argument( + "-o", + "--output_dir", + type=str, + default="werewolf_self_play", + help="Output directory for the log and replay files.", + ) + parser.add_argument( + "-m", + "--model_name", + type=str, + default="gemini/gemini-2.5-flash", + help="The model name by litellm for self play.", + ) + parser.add_argument( + "-t", + "--thumbnail", + type=str, + default="https://logos-world.net/wp-content/uploads/2025/01/Google-Gemini-Symbol.png", + help="The thumbnail image url.", + ) + parser.add_argument("-n", "--num_games", type=int, default=1, help="Number of self-play games to run.") + parser.add_argument("-d", "--debug", action="store_true", help="Enable debug mode.") + parser.add_argument( + "-r", "--random_agents", action="store_true", help="Use random agents for all players for fast testing." + ) + parser.add_argument( + "-a", "--append_timestamp_to_dir", action="store_true", help="Append a timestamp to the output directory." + ) + parser.add_argument( + "-s", "--shuffle_roles", action="store_true", help="If provided, shuffle the roles for each game." + ) + parser.add_argument("-p", "--parallel", action="store_true", help="Run games in parallel using multiple processes.") + parser.add_argument("--num_processes", type=int, default=None, help="Number of processes for parallel execution.") + + args = parser.parse_args() + + run_output_dir = append_timestamp_to_dir(args.output_dir, append=args.append_timestamp_to_dir) + os.makedirs(run_output_dir, exist_ok=True) + setup_logger(output_dir=run_output_dir, base_name="self_play") + + with open(args.config_path, "r") as f: + config = yaml.safe_load(f) + + num_processes = args.num_processes + if args.parallel and num_processes is None: + # Default to 4x the number of CPUs for I/O bound tasks + num_processes = multiprocessing.cpu_count() * 4 + + logger.info("Starting self-play with the following settings:") + logger.info(f"Model Name: {args.model_name}") + logger.info(f"Thumbnail: {args.thumbnail}") + logger.info(f"Output Directory: {run_output_dir}") + logger.info(f"Number of Games: {args.num_games}") + logger.info(f"Config Path: {args.config_path}") + logger.info(f"Parallel Execution: {args.parallel}") + if args.parallel: + logger.info(f"Number of Processes: {num_processes}") + logger.info(f"Debug Mode: {args.debug}") + logger.info(f"Use Random Agents: {args.random_agents}") + logger.info(f"Shuffle Roles: {args.shuffle_roles}") + + with LogExecutionTime(logger_obj=logger, task_str=f"{args.num_games} self-play games"): + run_self_play_games( + model_name=args.model_name, + thumbnail=args.thumbnail, + output_dir=run_output_dir, + num_games=args.num_games, + config=config, + use_random_agents=args.random_agents, + debug=args.debug, + parallel=args.parallel, + num_processes=num_processes, + shuffle_roles=args.shuffle_roles, + ) + + logger.info("Self-play run finished successfully.") + + +if __name__ == "__main__": + main() diff --git a/kaggle_environments/envs/werewolf/scripts/utils.py b/kaggle_environments/envs/werewolf/scripts/utils.py new file mode 100644 index 00000000..ed91676d --- /dev/null +++ b/kaggle_environments/envs/werewolf/scripts/utils.py @@ -0,0 +1,47 @@ +import logging +import os +import subprocess +import sys + +import yaml + +logger = logging.getLogger(__name__) + + +def run_single_game_cli(game_dir, game_config, use_random_agents, debug): + """ + Sets up and runs a single game instance by calling run.py. Running a separate process has the distinct advantage + of an atomic game execution unit, so the logging and dumps including html render and json are cleaner. + """ + out_config = {"game_config": game_config} + config_path = os.path.join(game_dir, "config.yaml") + with open(config_path, "w") as f: + yaml.dump(out_config, f, default_flow_style=False) + + run_py_path = os.path.join(os.path.dirname(__file__), "run.py") + cmd = [ + sys.executable, + run_py_path, + "--config_path", + config_path, + "--output_dir", + game_dir, + ] + if use_random_agents: + cmd.append("--random_agents") + if debug: + cmd.append("--debug") + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + logger.info(f"Game in {game_dir} completed successfully.") + if result.stdout: + logger.info(result.stdout) + if result.stderr: + logger.warning(f"Stderr (non-fatal) from game in {game_dir}: {result.stderr}") + except subprocess.CalledProcessError as e: + error_message = ( + f"Error running game in {game_dir}.\nReturn Code: {e.returncode}\nStdout: {e.stdout}\nStderr: {e.stderr}" + ) + logger.error(error_message) + raise RuntimeError(error_message) from e