ITM-Kitware · DennisMelamedKitware · Jul 28, 2025 · Aug 11, 2025 · Aug 11, 2025 · Aug 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@ venv/
 __pycache__/
 outputs
 
-.vscode/
+.vscode/
+*.swp
diff --git a/align_system/algorithms/cage_action_parameter_completion_adm_component.py b/align_system/algorithms/cage_action_parameter_completion_adm_component.py
@@ -0,0 +1,134 @@
+import copy
+import json
+
+from rich.highlighter import JSONHighlighter
+
+from align_system.utils import logging
+from align_system.utils import adm_utils
+from align_system.algorithms.abstracts import ADMComponent
+from align_system.prompt_engineering.outlines_prompts import (
+        action_selection_prompt,
+        scenario_state_description_1,
+        followup_clarify_hostnames_cage,
+        cage_hostname_choice_json_schema,
+    )
+from align_system.data_models.dialog import DialogElement
+
+log = logging.getLogger(__name__)
+JSON_HIGHLIGHTER = JSONHighlighter()
+
+
+class CAGEActionParameterCompletionADMComponent(ADMComponent):
+    def __init__(self,
+                 structured_inference_engine):
+        self.structured_inference_engine = structured_inference_engine
+
+    # TODO: Copied from outlines_adm.py; should use a common template/prompt
+    def _state_to_top_level_prompt(self, scenario_state, actions):
+        """
+        Generate prompt dialog based on given state and actions
+        """
+        choices = adm_utils.format_choices(
+            [a.unstructured for a in actions],
+            actions,
+            scenario_state
+        )
+
+        scenario_description = scenario_state_description_1(scenario_state)
+        prompt = action_selection_prompt(scenario_description, choices)
+
+        return prompt, choices
+
+    def run_returns(self):
+        return ('chosen_action',
+                'action_parameter_completion_dialog')
+
+    def run(self,
+            scenario_state,
+            actions,
+            choices,
+            chosen_choice,
+            dialog=None,
+            alignment_target=None):
+        if dialog is None:
+            # If prior steps didn't provide any dialog/context, use a
+            # sensible default:
+            prompt, _ = self._state_to_top_level_prompt(
+                scenario_state,
+                actions)
+
+            dialog = [DialogElement(role='user',
+                                    content=prompt,
+                                    tags=['parameter_completion'])]
+
+        # If last dialog message is an 'assistant' message, remove it
+        # as we'll generate one for each follow-up needed.  (Dialogs
+        # should have alternating assistant/user elements)
+        if dialog[-1].role == 'assistant':
+            dialog.pop()
+
+        chosen_choice_idx = choices.index(chosen_choice)
+        chosen_action = actions[chosen_choice_idx]
+
+        action_to_take, output_dialog = self.populate_action_parameters(
+                scenario_state, chosen_action, dialog)
+
+        return action_to_take, output_dialog
+
+    def populate_action_parameters(self, scenario_state, action_to_take, dialog):
+        if action_to_take.name in { 'Analyse', 'Misinform', 'Remove', 'Restore'}:
+            action_to_take, selected_hostname, selected_hostname_idx, dialog =\
+                self.ensure_hostname_is_populated(scenario_state, action_to_take, dialog)
+
+
+        return action_to_take, dialog
+
+    def ensure_hostname_is_populated(self,
+                                         scenario_state,
+                                         action_to_take,
+                                         dialog):
+        if action_to_take.hostname is None:
+            # Use follow up prompt to define selected_character
+            hostnames = [c for c in scenario_state.hostnames ]
+
+
+            dialog.append(DialogElement(role='assistant',
+                                        content='{}  I would choose to {}'.format(
+                                            action_to_take.justification,
+                                            action_to_take.unstructured),
+                                        tags=['parameter_completion']))
+            dialog.append(DialogElement(role='user',
+                                        content=followup_clarify_hostnames_cage(hostnames),
+                                        tags=['parameter_completion']))
+            dialog_text = self.structured_inference_engine.dialog_to_prompt(dialog)
+
+
+            log.info("[bold]*DIALOG PROMPT*[/bold]",
+                     extra={"markup": True})
+            log.info(dialog_text)
+
+            selected_hostname = self.structured_inference_engine.run_inference(
+                dialog_text,
+                cage_hostname_choice_json_schema(json.dumps(hostnames)))
+
+            selected_hostname_idx = hostnames.index(selected_hostname['hostname_choice'])
+
+            log.info("[bold]*STRUCTURED RESPONSE*[/bold]",
+                     extra={"markup": True})
+            log.info(selected_hostname, extra={"highlighter": JSON_HIGHLIGHTER})
+
+            action_to_take.hostname = hostnames[selected_hostname_idx]
+        else:
+            # Use action_to_take.hostname_id to define selected_hostname
+            selected_hostname = {}
+            for char_index in range(len(scenario_state.hostnames)):
+                hostname = scenario_state.hostnames[char_index]
+                if hostname == action_to_take.hostname:
+                    selected_hostname['hostname_choice'] = hostname
+                    selected_hostname_idx = char_index
+                    break
+
+            selected_hostname['brief_reasoning'] = action_to_take.justification
+
+        return action_to_take, selected_hostname, selected_hostname_idx, dialog
+
diff --git a/align_system/algorithms/misc_itm_adm_components.py b/align_system/algorithms/misc_itm_adm_components.py
@@ -48,6 +48,15 @@ def run(self, scenario_state, actions):
 
         return choices
 
+class CAGEITMFormatChoicesADMComponent(ADMComponent):
+    def run_returns(self):
+        return ('choices')
+
+    def run(self, scenario_state, actions):
+        choices = adm_utils.cage_format_choices(actions)
+
+        return choices
+
 
 class JustificationFromReasonings(ADMComponent):
     def run_returns(self):

diff --git a/align_system/configs/alignment_target/cage/confidentiality-1.0.yaml b/align_system/configs/alignment_target/cage/confidentiality-1.0.yaml
@@ -0,0 +1,5 @@
+id: confidentiality-1.0 
+kdma_values:
+  - kdes: null
+    kdma: confidentiality
+    value: 1.0
diff --git a/align_system/configs/experiment/phase2_july_collab/pipeline_baseline_cage.yaml b/align_system/configs/experiment/phase2_july_collab/pipeline_baseline_cage.yaml
@@ -0,0 +1,39 @@
+# @package _global_
+defaults:
+  - override /adm: pipeline_baseline
+  - override /[email protected]_inference_engine: outlines_structured_greedy
+  - override /interface: cage_cia
+
+
+adm:
+  step_definitions:
+    format_choices:
+      _target_: align_system.algorithms.misc_itm_adm_components.CAGEITMFormatChoicesADMComponent
+    outlines_baseline:
+      scenario_description_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.CAGEScenarioDescription
+      prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
+      system_prompt_template:
+        _target_: align_system.prompt_engineering.outlines_prompts.CAGEACDSystemPrompt
+
+      enable_caching: true
+
+    action_parameter_completion:
+      _target_: align_system.algorithms.cage_action_parameter_completion_adm_component.CAGEActionParameterCompletionADMComponent
+
+  instance:
+    steps:
+    # Reference the step instances we want to use in order
+    - ${ref:adm.step_definitions.format_choices}
+    - ${ref:adm.step_definitions.outlines_baseline}
+    - ${ref:adm.step_definitions.action_parameter_completion}
+    - ${ref:adm.step_definitions.ensure_chosen_action}
+    - ${ref:adm.step_definitions.populate_choice_info}
+
+apply_action_filtering: false
+save_scoring_output: true
+
+force_determinism: true
+align_to_target: true
+sort_available_actions: false
diff --git a/align_system/configs/interface/cage.yaml b/align_system/configs/interface/cage.yaml
@@ -0,0 +1,2 @@
+_target_: align_system.interfaces.cage_action_based_service.CAGEActionBasedServiceInterface
+
diff --git a/align_system/configs/interface/cage_cia.yaml b/align_system/configs/interface/cage_cia.yaml
@@ -0,0 +1,2 @@
+_target_: align_system.interfaces.cia_cage_service.CAGEActionBasedServiceInterface
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		_target_: align_system.interfaces.cage_action_based_service.CAGEActionBasedServiceInterface
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		_target_: align_system.interfaces.cia_cage_service.CAGEActionBasedServiceInterface