Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ venv/
__pycache__/
outputs

.vscode/
.vscode/
*.swp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import copy
import json

from rich.highlighter import JSONHighlighter

from align_system.utils import logging
from align_system.utils import adm_utils
from align_system.algorithms.abstracts import ADMComponent
from align_system.prompt_engineering.outlines_prompts import (
action_selection_prompt,
scenario_state_description_1,
followup_clarify_hostnames_cage,
cage_hostname_choice_json_schema,
)
from align_system.data_models.dialog import DialogElement

log = logging.getLogger(__name__)
JSON_HIGHLIGHTER = JSONHighlighter()


class CAGEActionParameterCompletionADMComponent(ADMComponent):
def __init__(self,
structured_inference_engine):
self.structured_inference_engine = structured_inference_engine

# TODO: Copied from outlines_adm.py; should use a common template/prompt
def _state_to_top_level_prompt(self, scenario_state, actions):
"""
Generate prompt dialog based on given state and actions
"""
choices = adm_utils.format_choices(
[a.unstructured for a in actions],
actions,
scenario_state
)

scenario_description = scenario_state_description_1(scenario_state)
prompt = action_selection_prompt(scenario_description, choices)

return prompt, choices

def run_returns(self):
return ('chosen_action',
'action_parameter_completion_dialog')

def run(self,
scenario_state,
actions,
choices,
chosen_choice,
dialog=None,
alignment_target=None):
if dialog is None:
# If prior steps didn't provide any dialog/context, use a
# sensible default:
prompt, _ = self._state_to_top_level_prompt(
scenario_state,
actions)

dialog = [DialogElement(role='user',
content=prompt,
tags=['parameter_completion'])]

# If last dialog message is an 'assistant' message, remove it
# as we'll generate one for each follow-up needed. (Dialogs
# should have alternating assistant/user elements)
if dialog[-1].role == 'assistant':
dialog.pop()

chosen_choice_idx = choices.index(chosen_choice)
chosen_action = actions[chosen_choice_idx]

action_to_take, output_dialog = self.populate_action_parameters(
scenario_state, chosen_action, dialog)

return action_to_take, output_dialog

def populate_action_parameters(self, scenario_state, action_to_take, dialog):
if action_to_take.name in { 'Analyse', 'Misinform', 'Remove', 'Restore'}:
action_to_take, selected_hostname, selected_hostname_idx, dialog =\
self.ensure_hostname_is_populated(scenario_state, action_to_take, dialog)


return action_to_take, dialog

def ensure_hostname_is_populated(self,
scenario_state,
action_to_take,
dialog):
if action_to_take.hostname is None:
# Use follow up prompt to define selected_character
hostnames = [c for c in scenario_state.hostnames ]


dialog.append(DialogElement(role='assistant',
content='{} I would choose to {}'.format(
action_to_take.justification,
action_to_take.unstructured),
tags=['parameter_completion']))
dialog.append(DialogElement(role='user',
content=followup_clarify_hostnames_cage(hostnames),
tags=['parameter_completion']))
dialog_text = self.structured_inference_engine.dialog_to_prompt(dialog)


log.info("[bold]*DIALOG PROMPT*[/bold]",
extra={"markup": True})
log.info(dialog_text)

selected_hostname = self.structured_inference_engine.run_inference(
dialog_text,
cage_hostname_choice_json_schema(json.dumps(hostnames)))

selected_hostname_idx = hostnames.index(selected_hostname['hostname_choice'])

log.info("[bold]*STRUCTURED RESPONSE*[/bold]",
extra={"markup": True})
log.info(selected_hostname, extra={"highlighter": JSON_HIGHLIGHTER})

action_to_take.hostname = hostnames[selected_hostname_idx]
else:
# Use action_to_take.hostname_id to define selected_hostname
selected_hostname = {}
for char_index in range(len(scenario_state.hostnames)):
hostname = scenario_state.hostnames[char_index]
if hostname == action_to_take.hostname:
selected_hostname['hostname_choice'] = hostname
selected_hostname_idx = char_index
break

selected_hostname['brief_reasoning'] = action_to_take.justification

return action_to_take, selected_hostname, selected_hostname_idx, dialog

9 changes: 9 additions & 0 deletions align_system/algorithms/misc_itm_adm_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def run(self, scenario_state, actions):

return choices

class CAGEITMFormatChoicesADMComponent(ADMComponent):
def run_returns(self):
return ('choices')

def run(self, scenario_state, actions):
choices = adm_utils.cage_format_choices(actions)

return choices


class JustificationFromReasonings(ADMComponent):
def run_returns(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id: confidentiality-1.0
kdma_values:
- kdes: null
kdma: confidentiality
value: 1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# @package _global_
defaults:
- override /adm: pipeline_baseline
- override /[email protected]_inference_engine: outlines_structured_greedy
- override /interface: cage_cia


adm:
step_definitions:
format_choices:
_target_: align_system.algorithms.misc_itm_adm_components.CAGEITMFormatChoicesADMComponent
outlines_baseline:
scenario_description_template:
_target_: align_system.prompt_engineering.outlines_prompts.CAGEScenarioDescription
prompt_template:
_target_: align_system.prompt_engineering.outlines_prompts.Phase2BaselinePrompt
system_prompt_template:
_target_: align_system.prompt_engineering.outlines_prompts.CAGEACDSystemPrompt

enable_caching: true

action_parameter_completion:
_target_: align_system.algorithms.cage_action_parameter_completion_adm_component.CAGEActionParameterCompletionADMComponent

instance:
steps:
# Reference the step instances we want to use in order
- ${ref:adm.step_definitions.format_choices}
- ${ref:adm.step_definitions.outlines_baseline}
- ${ref:adm.step_definitions.action_parameter_completion}
- ${ref:adm.step_definitions.ensure_chosen_action}
- ${ref:adm.step_definitions.populate_choice_info}

apply_action_filtering: false
save_scoring_output: true

force_determinism: true
align_to_target: true
sort_available_actions: false
2 changes: 2 additions & 0 deletions align_system/configs/interface/cage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
_target_: align_system.interfaces.cage_action_based_service.CAGEActionBasedServiceInterface

2 changes: 2 additions & 0 deletions align_system/configs/interface/cage_cia.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
_target_: align_system.interfaces.cia_cage_service.CAGEActionBasedServiceInterface

Loading