diff --git a/.gitignore b/.gitignore index 1253fc2b..52010e6b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ venv/ __pycache__/ outputs -.vscode/ \ No newline at end of file +.vscode/ +*.swp diff --git a/align_system/algorithms/cage_action_parameter_completion_adm_component.py b/align_system/algorithms/cage_action_parameter_completion_adm_component.py new file mode 100644 index 00000000..fd3a2a8e --- /dev/null +++ b/align_system/algorithms/cage_action_parameter_completion_adm_component.py @@ -0,0 +1,134 @@ +import copy +import json + +from rich.highlighter import JSONHighlighter + +from align_system.utils import logging +from align_system.utils import adm_utils +from align_system.algorithms.abstracts import ADMComponent +from align_system.prompt_engineering.outlines_prompts import ( + action_selection_prompt, + scenario_state_description_1, + followup_clarify_hostnames_cage, + cage_hostname_choice_json_schema, + ) +from align_system.data_models.dialog import DialogElement + +log = logging.getLogger(__name__) +JSON_HIGHLIGHTER = JSONHighlighter() + + +class CAGEActionParameterCompletionADMComponent(ADMComponent): + def __init__(self, + structured_inference_engine): + self.structured_inference_engine = structured_inference_engine + + # TODO: Copied from outlines_adm.py; should use a common template/prompt + def _state_to_top_level_prompt(self, scenario_state, actions): + """ + Generate prompt dialog based on given state and actions + """ + choices = adm_utils.format_choices( + [a.unstructured for a in actions], + actions, + scenario_state + ) + + scenario_description = scenario_state_description_1(scenario_state) + prompt = action_selection_prompt(scenario_description, choices) + + return prompt, choices + + def run_returns(self): + return ('chosen_action', + 'action_parameter_completion_dialog') + + def run(self, + scenario_state, + actions, + choices, + chosen_choice, + dialog=None, + alignment_target=None): + if dialog is None: + # If prior steps didn't provide any dialog/context, use a + # sensible default: + prompt, _ = self._state_to_top_level_prompt( + scenario_state, + actions) + + dialog = [DialogElement(role='user', + content=prompt, + tags=['parameter_completion'])] + + # If last dialog message is an 'assistant' message, remove it + # as we'll generate one for each follow-up needed. (Dialogs + # should have alternating assistant/user elements) + if dialog[-1].role == 'assistant': + dialog.pop() + + chosen_choice_idx = choices.index(chosen_choice) + chosen_action = actions[chosen_choice_idx] + + action_to_take, output_dialog = self.populate_action_parameters( + scenario_state, chosen_action, dialog) + + return action_to_take, output_dialog + + def populate_action_parameters(self, scenario_state, action_to_take, dialog): + if action_to_take.name in { 'Analyse', 'Misinform', 'Remove', 'Restore'}: + action_to_take, selected_hostname, selected_hostname_idx, dialog =\ + self.ensure_hostname_is_populated(scenario_state, action_to_take, dialog) + + + return action_to_take, dialog + + def ensure_hostname_is_populated(self, + scenario_state, + action_to_take, + dialog): + if action_to_take.hostname is None: + # Use follow up prompt to define selected_character + hostnames = [c for c in scenario_state.hostnames ] + + + dialog.append(DialogElement(role='assistant', + content='{} I would choose to {}'.format( + action_to_take.justification, + action_to_take.unstructured), + tags=['parameter_completion'])) + dialog.append(DialogElement(role='user', + content=followup_clarify_hostnames_cage(hostnames), + tags=['parameter_completion'])) + dialog_text = self.structured_inference_engine.dialog_to_prompt(dialog) + + + log.info("[bold]*DIALOG PROMPT*[/bold]", + extra={"markup": True}) + log.info(dialog_text) + + selected_hostname = self.structured_inference_engine.run_inference( + dialog_text, + cage_hostname_choice_json_schema(json.dumps(hostnames))) + + selected_hostname_idx = hostnames.index(selected_hostname['hostname_choice']) + + log.info("[bold]*STRUCTURED RESPONSE*[/bold]", + extra={"markup": True}) + log.info(selected_hostname, extra={"highlighter": JSON_HIGHLIGHTER}) + + action_to_take.hostname = hostnames[selected_hostname_idx] + else: + # Use action_to_take.hostname_id to define selected_hostname + selected_hostname = {} + for char_index in range(len(scenario_state.hostnames)): + hostname = scenario_state.hostnames[char_index] + if hostname == action_to_take.hostname: + selected_hostname['hostname_choice'] = hostname + selected_hostname_idx = char_index + break + + selected_hostname['brief_reasoning'] = action_to_take.justification + + return action_to_take, selected_hostname, selected_hostname_idx, dialog + diff --git a/align_system/algorithms/misc_itm_adm_components.py b/align_system/algorithms/misc_itm_adm_components.py index eea1380a..e94774e8 100644 --- a/align_system/algorithms/misc_itm_adm_components.py +++ b/align_system/algorithms/misc_itm_adm_components.py @@ -48,6 +48,15 @@ def run(self, scenario_state, actions): return choices +class CAGEITMFormatChoicesADMComponent(ADMComponent): + def run_returns(self): + return ('choices') + + def run(self, scenario_state, actions): + choices = adm_utils.cage_format_choices(actions) + + return choices + class JustificationFromReasonings(ADMComponent): def run_returns(self): diff --git a/align_system/configs/alignment_target/cage/confidentiality-1.0.yaml b/align_system/configs/alignment_target/cage/confidentiality-1.0.yaml new file mode 100644 index 00000000..9a2d777f --- /dev/null +++ b/align_system/configs/alignment_target/cage/confidentiality-1.0.yaml @@ -0,0 +1,5 @@ +id: confidentiality-1.0 +kdma_values: + - kdes: null + kdma: confidentiality + value: 1.0 diff --git a/align_system/configs/experiment/phase2_july_collab/pipeline_baseline_cage.yaml b/align_system/configs/experiment/phase2_july_collab/pipeline_baseline_cage.yaml new file mode 100644 index 00000000..4b4ea96b --- /dev/null +++ b/align_system/configs/experiment/phase2_july_collab/pipeline_baseline_cage.yaml @@ -0,0 +1,39 @@ +# @package _global_ +defaults: + - override /adm: pipeline_baseline + - override /inference_engine@adm.structured_inference_engine: outlines_structured_greedy + - override /interface: cage_cia + + +adm: + step_definitions: + format_choices: + _target_: align_system.algorithms.misc_itm_adm_components.CAGEITMFormatChoicesADMComponent + outlines_baseline: + scenario_description_template: + _target_: align_system.prompt_engineering.outlines_prompts.CAGEScenarioDescription + prompt_template: + _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt + system_prompt_template: + _target_: align_system.prompt_engineering.outlines_prompts.CAGEACDSystemPrompt + + enable_caching: true + + action_parameter_completion: + _target_: align_system.algorithms.cage_action_parameter_completion_adm_component.CAGEActionParameterCompletionADMComponent + + instance: + steps: + # Reference the step instances we want to use in order + - ${ref:adm.step_definitions.format_choices} + - ${ref:adm.step_definitions.outlines_baseline} + - ${ref:adm.step_definitions.action_parameter_completion} + - ${ref:adm.step_definitions.ensure_chosen_action} + - ${ref:adm.step_definitions.populate_choice_info} + +apply_action_filtering: false +save_scoring_output: true + +force_determinism: true +align_to_target: true +sort_available_actions: false diff --git a/align_system/configs/interface/cage.yaml b/align_system/configs/interface/cage.yaml new file mode 100644 index 00000000..39de90ba --- /dev/null +++ b/align_system/configs/interface/cage.yaml @@ -0,0 +1,2 @@ +_target_: align_system.interfaces.cage_action_based_service.CAGEActionBasedServiceInterface + diff --git a/align_system/configs/interface/cage_cia.yaml b/align_system/configs/interface/cage_cia.yaml new file mode 100644 index 00000000..e90ce59f --- /dev/null +++ b/align_system/configs/interface/cage_cia.yaml @@ -0,0 +1,2 @@ +_target_: align_system.interfaces.cia_cage_service.CAGEActionBasedServiceInterface + diff --git a/align_system/interfaces/cage_action_based_service.py b/align_system/interfaces/cage_action_based_service.py new file mode 100644 index 00000000..27a67ab8 --- /dev/null +++ b/align_system/interfaces/cage_action_based_service.py @@ -0,0 +1,223 @@ +import argparse +from uuid import uuid4 +import inspect +from collections import defaultdict + + +from align_system.utils import logging +from align_system.interfaces.abstracts import ( + Interface, + ActionBasedScenarioInterface) + +from swagger_client.models import ( + State, + Action, + Character, + Supplies, + Injury, + Environment, + DecisionEnvironment, + Aid, + SimEnvironment, MetaInfo, +) + +#import cyborg +#cyborg.Agents.LLMAgents.config.config_vars.NO_LLM_AGENTS=True +#from cyborg.Agents.LLMAgents.config.config_vars import BLUE_AGENT_NAME +#from cyborg.Agents.LLMAgents.llm_adapter.obs_formatter import format_observation +#from cyborg import cyborg, CYBORG_VERSION +#from cyborg.Agents import SleepAgent, EnterpriseGreenAgent, FiniteStateRedAgent +#from cyborg.Simulator.Scenarios import EnterpriseScenarioGenerator +#from align_system.interfaces.submission import Submission +#from cyborg.Evaluation.Cybermonics.submission import Submission + +from CybORG import CybORG +from CybORG.Agents import B_lineAgent, SleepAgent +from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent +from CybORG.Agents.SimpleAgents.BlueLoadAgent import BlueLoadAgent +from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent +from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent +from CybORG.Agents.Wrappers import BlueTableWrapper + + +log = logging.getLogger(__name__) + + +class CAGEActionBasedServiceInterface(Interface): + EPISODE_LENGTH=3 + seed = None + cyborg_version = '1.2' + scenario = 'Scenario1b' + def __init__(self, + n_rollouts:int = 2, + ): + self.n_rollouts = n_rollouts + self.current_rollout = 0 + self.wrapped_cyborg = None + + def start_scenario(self): + self.current_rollout += 1 + log.info("Starting CAGE Scenario") #f"*ADM Name*: {self.username}") + if self.current_rollout > self.n_rollouts: + log.info("Reached max # of CAGE rollouts") + self.current_rollout = "" + + # TODO: we need to set up the CAGE environment here, and specify what agents are doing the scenario + path = str(inspect.getfile(CybORG)) + path = path[:-10] + '/Shared/Scenarios/Scenario1b.yaml' + + print(f'using CybORG v{self.cyborg_version}, {self.scenario}\n') + + cyborg = CybORG(path, 'sim', agents={'Red': B_lineAgent}) + self.wrapped_cyborg = BlueTableWrapper(cyborg, output_mode = 'table') #'blue_table') + + + return CAGEActionBasedScenario(self.wrapped_cyborg, episode_length=self.EPISODE_LENGTH, episode_number = self.current_rollout) + + def get_session_alignment(self, alignment_target): + if self.wrapped_cyborg is not None: + rewards = self.wrapped_cyborg.get_rewards() + return rewards + else: + return None + #if 0: + # if self.training_session == 'full': + # # 'solo' training sessions are not able to retrieve an + # # alignment score + # return self.connection.get_session_alignment( + # self.session_id, alignment_target.id) + # else: + # return None + + def cli_parser(cls, parser=None): + pass + + def cli_parser_description(cls): + pass + + def init_from_parsed_args(cls, parsed_args): + pass + +class MetaInfo(object): + def __init__(self, scene_id): + self.scene_id = scene_id + + +class CAGEAlignmentTarget(object): + def __init__(self, kdma_values): + self.kdma_values = kdma_values + +class CAGEState: + def __init__(self, table, hostnames, scene_id): + self.meta_info = MetaInfo(scene_id) + self.unstructured = str(table) + self.hostnames = hostnames + self.scenario_complete = False + self.elapsed_time = 0 + + + + def to_dict(self): + return {'meta_info': {'scene_id':self.meta_info.scene_id}, + 'unstructured': self.unstructured, + 'hostnames': self.hostnames, + 'scenario_complete': self.scenario_complete} + + +class CAGEAction: + def __init__(self, cage_class): + self.cage_class = cage_class + self.name = cage_class.__name__ + self.hostname = None + self.justification = None + self.unstructured = self.name + self.kdma_association = None + self.action_id = self.name.lower() + + def to_dict(self): + return {'name': self.name, + "hostname": self.hostname, + "justification": self.justification, + "unstructured": self.unstructured, + "kdma_association": self.kdma_association, + "action_id": self.action_id} + + + +class CAGEActionBasedScenario(ActionBasedScenarioInterface): + agent_name = 'Blue' + def __init__(self, cyborg_sim, episode_length = 500, episode_number = 0): + self.done = False + self.hostnames =[] + self.episode_number = episode_number + self.episode_length = episode_length + self.scenario_count = 0 + + self.cyborg_sim = cyborg_sim + cage_obs = cyborg_sim.reset() #agent='blue_agent_4') + cage_act_space = self.cyborg_sim.get_action_space(self.agent_name) + self.hostnames = list(cage_act_space['hostname'].keys()) + self.obs = CAGEState(cage_obs.observation, self.hostnames, episode_number) + self.enrich_obs() + #self.obs.scenario_complete = False + + def enrich_obs(self): + self.obs.scenario_complete = self.done +# self.obs.meta_info = MetaInfo() +# self.obs.hostnames = self.hostnames +# setattr(self.obs.meta_info, 'scene_id', self.episode_number) + + def id(self): + return str(self.episode_number) + + def get_alignment_target(self): + return None + #target = CAGEAlignmentTarget([1,1,1]) + #return target + + def to_dict(self): + pass + #return self.scenario.__dict__ + + def data(self): + pass + #return self.scenario + + def get_available_actions(self): + ## TODO: get the action space in the format that align expects + cage_act_space = self.cyborg_sim.get_action_space(self.agent_name) + return [CAGEAction(k) for k in cage_act_space['action']] + + def _take_or_intend_action(self, align_system_action): + # Convert to proper 'Action' object prior to submission + if align_system_action.hostname is None: + action = align_system_action.cage_class(agent = "Blue", session = 0) + else: + action = align_system_action.cage_class(hostname = align_system_action.hostname, + agent = "Blue", session = 0) + + ## TODO takes an action and updates the state + res = self.cyborg_sim.step(action=action, agent='Blue') + self.scenario_count += 1 + cage_obs = res.observation + rew = res.reward + self.done = res.done or self.scenario_count >= self.episode_length + self.obs.unstructured = str(cage_obs) + return self.get_state() + + def intend_action(self, action): + return self._take_or_intend_action( + action, + ) + + def take_action(self, action): + return self._take_or_intend_action( + action, + ) + + def get_state(self): + #text_state = format_observation(self.obs, self.last_action, BLUE_AGENT_NAME) + ## convert the state into a string for the LLM + self.enrich_obs() + return self.obs #swagger_state + diff --git a/align_system/interfaces/cia_cage_service.py b/align_system/interfaces/cia_cage_service.py new file mode 100644 index 00000000..1e593544 --- /dev/null +++ b/align_system/interfaces/cia_cage_service.py @@ -0,0 +1,228 @@ +import argparse +from uuid import uuid4 +import inspect + + +from align_system.utils import logging +from align_system.interfaces.abstracts import ( + Interface, + ActionBasedScenarioInterface) + +#from align_system.interfaces.cia_triad import CIATriadMetric + +from swagger_client.models import ( + State, + Action, + Character, + Supplies, + Injury, + Environment, + DecisionEnvironment, + Aid, + SimEnvironment, MetaInfo, +) + +#import cyborg +#cyborg.Agents.LLMAgents.config.config_vars.NO_LLM_AGENTS=True +#from cyborg.Agents.LLMAgents.config.config_vars import BLUE_AGENT_NAME +#from cyborg.Agents.LLMAgents.llm_adapter.obs_formatter import format_observation +#from cyborg import cyborg, CYBORG_VERSION +#from cyborg.Agents import SleepAgent, EnterpriseGreenAgent, FiniteStateRedAgent +#from cyborg.Simulator.Scenarios import EnterpriseScenarioGenerator +#from align_system.interfaces.submission import Submission +#from cyborg.Evaluation.Cybermonics.submission import Submission + +from CybORG import CybORG +from CybORG.Agents import B_lineAgent, SleepAgent +from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent +from CybORG.Agents.SimpleAgents.BlueLoadAgent import BlueLoadAgent +from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent +from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent +from CybORG.Agents.Wrappers import BlueTableWrapper, CIAWrapper + + +log = logging.getLogger(__name__) + + +class CAGEActionBasedServiceInterface(Interface): + EPISODE_LENGTH=30 + seed = None + cyborg_version = '1.2' + scenario = 'Scenario1b' + def __init__(self, + n_rollouts:int = 5, + ): + self.n_rollouts = n_rollouts + self.current_rollout = 0 + self.wrapped_cyborg = None + + + def start_scenario(self): + self.current_rollout += 1 + log.info("Starting CAGE Scenario") #f"*ADM Name*: {self.username}") + if self.current_rollout > self.n_rollouts: + log.info("Reached max # of CAGE rollouts") + self.current_rollout = "" + + # TODO: we need to set up the CAGE environment here, and specify what agents are doing the scenario + path = str(inspect.getfile(CybORG)) + path = path[:-10] + '/Shared/Scenarios/Scenario1b.yaml' + + print(f'using CybORG v{self.cyborg_version}, {self.scenario}\n') + + cyborg = CybORG(path, 'sim', agents={'Red': B_lineAgent}) + if 0: + self.wrapped_cyborg = BlueTableWrapper(cyborg, output_mode = 'table') #'blue_table') + else: + self.wrapped_cyborg = CIAWrapper(env=cyborg, output_mode='table') + + return CAGEActionBasedScenario(self.wrapped_cyborg, episode_length=self.EPISODE_LENGTH, episode_number = self.current_rollout) + + def get_session_alignment(self, alignment_target): + if 0: + if self.wrapped_cyborg is not None: + rewards = self.wrapped_cyborg.get_rewards() + return rewards + else: + return None + else: + if self.wrapped_cyborg is not None: + cia_scores = self.wrapped_cyborg.get_cia_scores() + rewards = self.wrapped_cyborg.get_collected_rewards() + return cia_scores | rewards + #if 0: + # if self.training_session == 'full': + # # 'solo' training sessions are not able to retrieve an + # # alignment score + # return self.connection.get_session_alignment( + # self.session_id, alignment_target.id) + # else: + # return None + + def cli_parser(cls, parser=None): + pass + + def cli_parser_description(cls): + pass + + def init_from_parsed_args(cls, parsed_args): + pass + +class MetaInfo(object): + def __init__(self, scene_id): + self.scene_id = scene_id + + +#class CAGEAlignmentTarget(object): +# def __init__(self, kdma_values): +# self.kdma_values = kdma_values + +class CAGEState: + def __init__(self, table, hostnames, scene_id): + self.meta_info = MetaInfo(scene_id) + self.unstructured = str(table) + self.hostnames = hostnames + self.scenario_complete = False + self.elapsed_time = 0 + + + + def to_dict(self): + return {'meta_info': {'scene_id':self.meta_info.scene_id}, + 'unstructured': self.unstructured, + 'hostnames': self.hostnames, + 'scenario_complete': self.scenario_complete} + + +class CAGEAction: + def __init__(self, cage_class): + self.cage_class = cage_class + self.name = cage_class.__name__ + self.hostname = None + self.justification = None + self.unstructured = self.name + self.kdma_association = None + self.action_id = self.name.lower() + + def to_dict(self): + return {'name': self.name, + "hostname": self.hostname, + "justification": self.justification, + "unstructured": self.unstructured, + "kdma_association": self.kdma_association, + "action_id": self.action_id} + + + +class CAGEActionBasedScenario(ActionBasedScenarioInterface): + agent_name = 'Blue' + def __init__(self, cyborg_sim, episode_length = 500, episode_number = 0): + self.done = False + self.hostnames =[] + self.episode_number = episode_number + self.episode_length = episode_length + self.scenario_count = 0 + + self.cyborg_sim = cyborg_sim + cage_obs = cyborg_sim.reset() #agent='blue_agent_4') + cage_act_space = self.cyborg_sim.get_action_space(self.agent_name) + self.hostnames = list(cage_act_space['hostname'].keys()) + self.obs = CAGEState(cage_obs.observation, self.hostnames, episode_number) + self.enrich_obs() + #self.obs.scenario_complete = False + + def enrich_obs(self): + self.obs.scenario_complete = self.done + + def id(self): + return str(self.episode_number) + + def get_alignment_target(self): + ## This is defined via a configuration, not in here... + pass + + def to_dict(self): + pass + #return self.scenario.__dict__ + + def data(self): + pass + #return self.scenario + + def get_available_actions(self): + cage_act_space = self.cyborg_sim.get_action_space(self.agent_name) + return [CAGEAction(k) for k in cage_act_space['action']] + + def _take_or_intend_action(self, align_system_action): + # Convert to proper 'Action' object prior to submission + if align_system_action.hostname is None: + action = align_system_action.cage_class(agent = "Blue", session = 0) + else: + action = align_system_action.cage_class(hostname = align_system_action.hostname, + agent = "Blue", session = 0) + + ## TODO takes an action and updates the state + res = self.cyborg_sim.step(action=action, agent='Blue') + self.scenario_count += 1 + cage_obs = res.observation + rew = res.reward + self.done = res.done or self.scenario_count >= self.episode_length + self.obs.unstructured = str(cage_obs) + return self.get_state() + + def intend_action(self, action): + return self._take_or_intend_action( + action, + ) + + def take_action(self, action): + return self._take_or_intend_action( + action, + ) + + def get_state(self): + #text_state = format_observation(self.obs, self.last_action, BLUE_AGENT_NAME) + ## convert the state into a string for the LLM + self.enrich_obs() + return self.obs #swagger_state + diff --git a/align_system/interfaces/cia_triad.py b/align_system/interfaces/cia_triad.py new file mode 100644 index 00000000..80526cb3 --- /dev/null +++ b/align_system/interfaces/cia_triad.py @@ -0,0 +1,170 @@ +# Test file to run CybORG challenge 1 + + +from CybORG.Shared.Enums import TrinaryEnum +from pprint import pprint + +# Calculates CIA attributes given a CAGE simulation environment step +class CIATriadMetric: + + def __init__(self, + user_host_w:float=0.34, + enterprise_host_w:float=0.34, + operational_host_w:float=0.34, + availability_multiplier:float=1.5, + restoration_weight:float=0.5 + ): + + #weights assigned to different host types + self.user_host_w = user_host_w + self.enterprise_host_w = enterprise_host_w + self.operational_host_w = operational_host_w + + #multiplier for availability. Each step a system is down, availability + # is impacted x multipler + self.availability_multiplier = availability_multiplier + self.restoration_weight = restoration_weight + + #holds CIA over each step of the game + self.confidentialities = [] + self.integrities = [] + self.availabilities = [] + + self.network_topology = {} #holds information about each host on the network over time + + + def calculate_cia(self, env): + """Calculate CIA attributes from cyborg observation table. + + Args: + env (CybORG): CybORG class environment with dictionary-like observations + """ + + confidentiality, integrity, availability = 0,0,0 + + # Blue agent action posibilities: ['Sleep', 'Monitor', 'Analyse', 'Remove', 'Restore'] + blue_agent_env_action = env.get_last_action("Blue") + blue_agent_action = blue_agent_env_action.__class__.__name__ + blue_agent_obs = env.get_observation("Blue") + + red_agent_env_action = env.get_last_action("Red") + red_agent_action = red_agent_env_action.__class__.__name__ + red_agent_obs = env.get_observation("Red") + + + # Confidentiality: + # -1.0 x host type if red agent successfully exploits a host + # 1.0 x host type if blue agent successfully removes red agent access to a host + if red_agent_action == "ExploitRemoteService" and red_agent_obs["success"] == TrinaryEnum.TRUE: + exploited_host = red_agent_obs[str(red_agent_env_action.ip_address)]["System info"]["Hostname"] + self.network_topology[exploited_host]["Confidentiality"] = -1.0 + if blue_agent_action == "Remove" and blue_agent_obs["success"] == TrinaryEnum.TRUE: + # only count as an improvement in confidentiality if red agent had a shell open in the host + if blue_agent_env_action.hostname in self.network_topology and self.network_topology[blue_agent_env_action.hostname]["Confidentiality"] < 0: + self.network_topology[blue_agent_env_action.hostname]["Confidentiality"] = 1.0 + + # calculate confidentiality + for sys, details in self.network_topology.items(): + #weigh system based on host type + confidentiality += self.network_topology[sys]["Confidentiality"]*self.get_sys_weight(sys) + + + # Integrity: + # -1.0 x host type if red agent successfully gets root access on a host + # 1.0 x host type if blue agent successfully removes root access of a red agent + # or if red agent's root access fails (was removed in a prior step) + if red_agent_action == "PrivilegeEscalate": + if red_agent_obs["success"] == TrinaryEnum.TRUE: + self.network_topology[red_agent_env_action.hostname]["Integrity"] = -1.0 + + # calculate integrity + for sys, details in self.network_topology.items(): + #weigh system based on host type + integrity += self.network_topology[sys]["Integrity"]*self.get_sys_weight(sys) + + + # Availability: + # 1.0 x host type if service is available + # -1.0 x host type x availability_multiplier if service is not available + # and will be set to 1.0 if blue agent performs a "Restore" + if red_agent_action == "Impact" and red_agent_obs["success"] == TrinaryEnum.TRUE: + #if this was already impacted, apply multiplier + if self.network_topology[red_agent_env_action.hostname]["Available"] < 0: + self.network_topology[red_agent_env_action.hostname]["Available"] *= self.availability_multiplier + else: + self.network_topology[red_agent_env_action.hostname]["Available"] = -1.0 + if blue_agent_action == "Restore" and blue_agent_obs["success"] == TrinaryEnum.TRUE and \ + blue_agent_env_action.hostname in self.network_topology and self.network_topology[blue_agent_env_action.hostname]["Available"] < 0: + # doing restore resets the host to its initial state, so everything is restored + # causes disruption for next step though - reward reflects + self.network_topology[blue_agent_env_action.hostname]["Available"] = 1.0 + self.network_topology[blue_agent_env_action.hostname]["Confidentiality"] = 1.0 + self.network_topology[blue_agent_env_action.hostname]["Integrity"] = 1.0*self.restoration_weight + + + # calculate availability + for sys, details in self.network_topology.items(): + #weigh system based on host type + availability += self.network_topology[sys]["Available"]*self.get_sys_weight(sys) + + self.confidentialities.append(confidentiality) + self.integrities.append(integrity) + self.availabilities.append(availability) + + return confidentiality, integrity, availability + + def get_sys_weight(self, system_name): + # Calculate weight based on system_name string + sys_w = self.user_host_w + if "Enterprise" in system_name: + sys_w = self.enterprise_host_w + elif "Op" in system_name: + sys_w = self.operational_host_w + return sys_w + + def reset(self, network_observation): + # Reset network topology for referencing later + # and CIA scores + self.set_network_topology(network_observation) + self.confidentialities = [] + self.integrities = [] + self.availabilities = [] + + + def set_network_topology(self, network_observation): + # Set all the hosts and subnets based on intiial network observation contianing all + # CIA computation is based on this established network topology + self.network_topology = {} + for system, details in network_observation.items(): + if system == "Defender": + continue + + self.network_topology[system] = { + "Confidentiality": 1, + "Available": 1, + "Integrity": 1 + } + + def scores(self): + return { + "C": sum(self.confidentialities), + "I": sum(self.integrities), + "A": sum(self.availabilities) + } + + def __str__(self): + return str(self.scores()) + + +# +# Example: +# metric = CIATriadMetric() +# +# Initialize network topology: +# metric.set_network_topology(env.get_action_space("Blue")) +# +# Calculate CIA for one step: +# metric.calculate_cia(env) +# +# After finishing run to get final CIA scores: +# metric.scores() diff --git a/align_system/prompt_engineering/outlines_prompts.py b/align_system/prompt_engineering/outlines_prompts.py index bed8bcb1..a6bd242d 100644 --- a/align_system/prompt_engineering/outlines_prompts.py +++ b/align_system/prompt_engineering/outlines_prompts.py @@ -9,6 +9,126 @@ InjuryStatusEnum) +@outlines.prompt +def llms_are_acds_system_prompt(): + """ + # DESCRIPTION + + You are an expert in cyber security defending an enterprise network. Analyze the network state and choose the best action and parameter from the list of actions based on their description. + + Each time, you will receive an observation under `# OBSERVATION` that will describe the status of the network. The observation will follow the format described under `# OBSERVATION STRUCTURE`. + + Your task is to analyze the network state based on this observation, and choose the best action and parameter from the list of actions based on their description. + + Respond EXACTLY with one action: ONLY ONE action from `## AVAILABLE ACTIONS`, always including the required parameter. For , you can choose any of the hosts listed under `## HOSTNAMES` + + Review examples from `## EXAMPLE RESPONSES` if you are unsure. + + ## AVAILABLE ACTIONS + - Monitor: Collection of information about flagged malicious activity on the system. Does not require a hostname. Identifies hosts with network connections and associated processes that are identified as malicious. + - Analyse host:: Analyze a host for suspicious activity. Collection of further information relating to malware, on a specific host to enable blue to better identify if red is present on the system. Analyse should be executed when it is suspected that red agents have been active on the network. + - Remove host:: Attempting to remove red from a host by destroying MALICIOUS PROCESSES, files and services. This action attempts to stop all processes identified as malicious by the monitor action. Can succeed or fail, and should be executed when it is suspected that red agents have user-level compromise of a host. + - Restore host:: Restoring a system to a known good state. This has significant consequences for system availability. Ideal when detecting MALICIOUS PROCESSES with an admin-level compromise. + + ## EXAMPLE RESPONSES + - Example 1: + action: Remove host:User1 + reason: Host has been compromised + - Example 2: + action: Analyse host:Op_Host1 + reason: Host is in the same subnetwork as the critical operational server. + - Example 3: + action: Restore host:Enterprise1 + reason: Host has been detected to have a privileged escalation + + # ENVIRONMENT RULES + ## NETWORK STRUCTURE: + - 3 subnets: User subnetwork, Enterprise subnet w/ servers and the Defender host, and Operational subnet with Operational Server & Operational Hosts + - Each zone has 0-3 servers and 1-6 user hosts + + ## HOSTNAMES + - Defender + - Enterprise0 + - Enterprise1 + - Enterprise2 + - Op_Host0 + - Op_Host1 + - Op_Host2 + - Op_Server0 + - User0 + - User1 + - User2 + - User3 + - User4 + + ## DEFENSE SETUP: + - 1 Network Defender in the Enterprise subnet: + - Red team starts in User subnet + - Red maintains constant presence in User subnet + - Maintaining Operational Server in healthy state is KEY to the system owners + + + ## REWARD STRUCTURE: + - Penalties for: + * Green (user) agent work failures + * Invalid service connections + * Host unavailability (restore action) + * Red team impacts + - Penalties higher for Enterprise and Operational Servers vs Hosts + + + # OBSERVATION STRUCTURE + ``` + Last Action: # Last action executed + Last Action Status: # Status of the last action + + Suspicious Activity Detected: # Suspicious activity detected in your network + # Hostname and activity detected + # Hostname and activity detected + ``` + + `Last Action Status` can be one of the following: + - TRUE - the last action was successful. + - UNKNOWN - it is not possible to know the success of the action / the action does not respond with a success status + - FALSE - the action was unsuccessful. + + `Suspicious Activity Detected` will be `None` if no suspicious activity is detected. + If there is suspicious activity, the activity per host will contain one or more of the following: + - WARNING: A suspicious connection has been detected. Means that a red agent has attempted to connect to a host or a deployed decoy. + - ALERT: A red agent has been detected on the host. This is a user-level compromise. Can be addressed with a Remove or Restore action. + - CRITICAL: A red agent has been detected on the host with a privileged escalation. This is a critical compromise. Can be addressed only with a Restore action. + """ + + +@outlines.prompt +def cage_system_prompt(): + """ + # DESCRIPTION + You are an expert in cyber security defending a computer network. Analyze the network state and choose the best action and parameter from the list of actions based on their description. + Each time, you will receive an observation that will describe the status of the network as a table. + Your task is to analyze the network state based on this observation, and choose the best action and parameter from the list of actions based on their description. + Respond with one action: ONLY ONE action from `## AVAILABLE ACTIONS`, always including the required parameter. For , select a hostname from the observation table + Your goal is to: + 1. Maintain the critical operational server, so as to ensure information from it is not exploited. + 2. Where possible, maintain enterprise servers so as to ensure day-to-day operations of the network are not disrupted or revealed. + + The value "User" in the "Compromised" column of the observation table indicates a compromise which the 'Remove' action will handle. + The value "Priveleged" in the "Compromised" column of the observation table indicates a compromise which the 'Restore' action will handle, 'Remove' is not enough in this case. + In the 'Activity' column, 'Scan' indicates that the enemy agent is discovering a particular host's existence over the network. 'Exploit' indicates the host is being Compromised. + + To get to the critical operational server, the enemy agent needs to chain its way through various other servers to discover the information it needs to get to the operational server. + + The available actions at each step are: + - Monitor: Collection of information about flagged malicious activity on the system. Params: None (Note: This action occurs automatically if another action is chosen) + - Analyse: Collection of further information on a specific host to enable blue to better identify if red is present on the system. Params: Hostname + - Misinform: Setup of decoy services on a specific host. Green agents do not access these services, so any access is a clear example of red activity. Params: Hostname + - Remove: Attempting to remove red from a host by destroying malicious processes, files and services. This action attempts to stop all processes identified as malicious by the monitor action. Params: Hostname + - Restore: Restoring a system to a known good state. This has significant consequences for system availability. This action punishes Blue by -1. Params: Hostname + + + """ + + @outlines.prompt def baseline_system_prompt(): """ @@ -397,6 +517,17 @@ def scenario_state_description_with_relevant_char_info(scenario_state, character {{ scenario_state.unstructured.rstrip() }} """ +@outlines.prompt +def followup_clarify_hostnames_cage(hostnames): + """ + Please clarify which hostname to apply the action to + + HOSTNAMES: + {% for hostname in hostnames %} + - {{ hostname }} + {% endfor %} + """ + @outlines.prompt def followup_clarify_character(characters): """ @@ -532,6 +663,20 @@ def action_choice_json_schema(choices_json_str, reasoning_max_length=512): ''' +@outlines.prompt +def cage_hostname_choice_json_schema(choices_json_str): + ''' + {"$defs": {"HostnameChoice": {"enum": {{ choices_json_str }}, + "title": "HostnameChoice", + "type": "string"}}, + "properties": {"brief_reasoning": {"title": "Brief Reasoning", + "type": "string", "minLength": 1, "maxLength": 512}, + "hostname_choice": {"$ref": "#/$defs/HostnameChoice"}}, + "required": ["brief_reasoning", "hostname_choice"], + "title": "HostnameSelection", + "type": "object"} + ''' + @outlines.prompt def character_choice_json_schema(choices_json_str): ''' @@ -1149,6 +1294,12 @@ class DefaultITMBaselineSystemPrompt(): def __call__(self): return baseline_system_prompt() +class CAGESystemPrompt(): + def __call__(self): + return cage_system_prompt() +class CAGEACDSystemPrompt(): + def __call__(self): + return llms_are_acds_system_prompt() class PromptBasedBinaryITMSystemPrompt(): def __call__(self, target_kdma, target_value): @@ -1193,6 +1344,16 @@ def __call__(self, target_kdma, target_value): else: return high_utilitarianism_care_system_prompt() +@outlines.prompt +def cage_scenario_state_description(scenario_state): + """ + {{ scenario_state.unstructured }} + """ + + +class CAGEScenarioDescription(): + def __call__(self, scenario_state): + return cage_scenario_state_description(scenario_state) @outlines.prompt def phase2_scenario_state_description(scenario_state): diff --git a/align_system/utils/adm_utils.py b/align_system/utils/adm_utils.py index a11dbad4..eb716bc7 100644 --- a/align_system/utils/adm_utils.py +++ b/align_system/utils/adm_utils.py @@ -35,3 +35,19 @@ def format_choices(choices, available_actions, scenario_state): choices.append(a.unstructured) return choices + + + +def cage_format_choices( available_actions): #, scenario_state): + """ + Turn cage action classes into LLM-useful strings + Extracted from: https://github.com/cage-challenge/cage-challenge-1/tree/main?tab=readme-ov-file#appendix-a---blue-action-sets + """ + + #hostnames = available_actions['hostname'].keys() + choices = [] + for a in available_actions: #available_actions['action'].keys(): + choices.append(a.name) + + return choices + diff --git a/pyproject.toml b/pyproject.toml index 7834f1d7..a4a21011 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ url = "https://download.pytorch.org/whl/cu118" priority = "supplemental" [tool.poetry.dependencies] -python = ">=3.9,<3.13" +python = ">=3.10,<3.13" torch = { version = "^2.0.1", source = "pytorch" } transformers = "^4.49.0" llama-index = "^0.8.21" @@ -38,6 +38,10 @@ ubelt = "1.3.6" [tool.poetry.scripts] run_align_system = 'align_system.cli.run_align_system:main' + +[tool.poetry.group.cage.dependencies] +cyborg = {path = "../cage-challenge-1/CybORG", develop = true} + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"