Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support bound inputs for array node tasks #3185

Merged
merged 7 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions flytekit/core/array_node_map_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
min_successes: Optional[int] = None,
min_success_ratio: Optional[float] = None,
bound_inputs: Optional[Set[str]] = None,
bound_inputs_values: Optional[Dict[str, Any]] = None,
**kwargs,
):
"""
Expand All @@ -49,6 +50,7 @@ def __init__(
:param min_successes: The minimum number of successful executions
:param min_success_ratio: The minimum ratio of successful executions
:param bound_inputs: The set of inputs that should be bound to the map task
:param bound_inputs_values: Inputs that are bound to the array node and will not be mapped over
:param kwargs: Additional keyword arguments to pass to the base class
"""
self._partial = None
Expand Down Expand Up @@ -78,10 +80,21 @@ def __init__(
if n_outputs > 1:
raise ValueError("Only tasks with a single output are supported in map tasks.")

# Note, bound_inputs are passed in during run time when executing the task
# so both values shouldn't be set at the same time
if bound_inputs and bound_inputs_values:
if bound_inputs != set(bound_inputs_values):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential comparison issue with set and dict

The condition bound_inputs != set(bound_inputs_values) might not work as expected when comparing a set with dictionary keys. Consider using bound_inputs != set(bound_inputs_values.keys()) to ensure proper comparison between the set and dictionary keys.

Code suggestion
Check the AI-generated fix before applying
Suggested change
if bound_inputs != set(bound_inputs_values):
if bound_inputs != set(bound_inputs_values.keys()):

Code Review Run #f8b189


Should Bito avoid suggestions like this for future reviews? (Manage Rules)

  • Yes, avoid them

raise ValueError("bound_inputs and bound_inputs_values should have the same keys if both set")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we include a test that triggers this error?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added


self._bound_inputs: Set[str] = bound_inputs or set(bound_inputs) if bound_inputs else set()
if self._partial:
self._bound_inputs.update(self._partial.keywords.keys())

self._bound_inputs_values: Dict[str, Any] = bound_inputs_values or {}
if self._bound_inputs_values:
# bounded input values override any collisions w/ partials
self._bound_inputs.update(set(self._bound_inputs_values))

# Transform the interface to List[Optional[T]] in case `min_success_ratio` is set
output_as_list_of_optionals = min_success_ratio is not None and min_success_ratio != 1 and n_outputs == 1
collection_interface = transform_interface_to_list_interface(
Expand Down Expand Up @@ -247,6 +260,8 @@ def __call__(self, *args, **kwargs):
if self._partial:
"""If partial exists, then mix-in all partial values"""
kwargs = {**self._partial.keywords, **kwargs}
# bounded input values override any collisions w/ partials
kwargs.update(self._bound_inputs_values)
return super().__call__(*args, **kwargs)

def _literal_map_to_python_input(
Expand Down
1 change: 1 addition & 0 deletions flytekit/tools/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ def get_serializable_array_node_map_task(
min_success_ratio=entity.min_success_ratio,
execution_mode=entity.execution_mode,
is_original_sub_node_interface=entity.is_original_sub_node_interface,
bound_inputs=entity.bound_inputs,
)


Expand Down
147 changes: 146 additions & 1 deletion tests/flytekit/unit/core/test_array_node_map_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
from flytekit.core.task import TaskMetadata
from flytekit.core.type_engine import TypeEngine
from flytekit.extras.accelerators import GPUAccelerator
from flytekit.models import types
from flytekit.models.literals import (
BindingData,
Literal,
LiteralMap,
LiteralOffloadedMetadata,
Scalar,
Primitive,
)
from flytekit.tools.translator import get_serializable
from flytekit.types.directory import FlyteDirectory
Expand Down Expand Up @@ -307,7 +311,11 @@ def task3(c: str, a: int, b: float) -> str:
m2 = map_task(functools.partial(task2, c=param_c))(a=param_a, b=param_b)
m3 = map_task(functools.partial(task3, c=param_c))(a=param_a, b=param_b)

assert m1 == m2 == m3 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"]
m4 = ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)
m5 = ArrayNodeMapTask(task2, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)
m6 = ArrayNodeMapTask(task3, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)

assert m1 == m2 == m3 == m4 == m5 == m6 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"]


def test_bounded_inputs_vars_order(serialization_settings):
Expand All @@ -322,6 +330,143 @@ def task1(a: int, b: float, c: str) -> str:
assert args[1] == "a,b,c"


def test_bound_inputs_collision():
@task()
def task1(a: int, b: float, c: str) -> str:
return f"{a} - {b} - {c}"

param_a = [1, 2, 3]
param_b = [0.1, 0.2, 0.3]
param_c = "c"
param_d = "d"

partial_task = functools.partial(task1, c=param_c)
m1 = ArrayNodeMapTask(partial_task, bound_inputs_values={"c": param_d})(a=param_a, b=param_b)

assert m1 == ["1 - 0.1 - d", "2 - 0.2 - d", "3 - 0.3 - d"]

with pytest.raises(ValueError, match="bound_inputs and bound_inputs_values should have the same keys if both set"):
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"b"})(a=param_a, b=param_b)

# no error raised
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"c"})(a=param_a, b=param_b)


@task()
def task_1(a: int, b: int, c: str) -> str:
return f"{a} - {b} - {c}"


@task()
def task_2() -> int:
return 2


def get_wf_bound_input(serialization_settings):
@workflow()
def wf1() -> List[str]:
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(b=[1, 2, 3], c=["a", "b", "c"])

return wf1


def get_wf_partials(serialization_settings):
@workflow()
def wf2() -> List[str]:
return ArrayNodeMapTask(functools.partial(task_1, a=1))(b=[1, 2, 3], c=["a", "b", "c"])

return wf2


def get_wf_bound_input_upstream(serialization_settings):

@workflow()
def wf3() -> List[str]:
a = task_2()
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": a})(b=[1, 2, 3], c=["a", "b", "c"])

return wf3


def get_wf_partials_upstream(serialization_settings):

@workflow()
def wf4() -> List[str]:
a = task_2()
return ArrayNodeMapTask(functools.partial(task_1, a=a))(b=[1, 2, 3], c=["a", "b", "c"])

return wf4


def get_wf_bound_input_partials_collision(serialization_settings):

@workflow()
def wf5() -> List[str]:
return ArrayNodeMapTask(functools.partial(task_1, a=1), bound_inputs_values={"a": 2})(b=[1, 2, 3], c=["a", "b", "c"])

return wf5


def get_wf_bound_input_overrides(serialization_settings):

@workflow()
def wf6() -> List[str]:
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(a=[1, 2, 3], b=[1, 2, 3], c=["a", "b", "c"])

return wf6


def get_int_binding(value):
return BindingData(scalar=Scalar(primitive=Primitive(integer=value)))


def get_str_binding(value):
return BindingData(scalar=Scalar(primitive=Primitive(string_value=value)))


def promise_binding(node_id, var):
return BindingData(promise=types.OutputReference(node_id=node_id, var=var))


B_BINDINGS_LIST = [get_int_binding(1), get_int_binding(2), get_int_binding(3)]
C_BINDINGS_LIST = [get_str_binding("a"), get_str_binding("b"), get_str_binding("c")]


@pytest.mark.parametrize(
("wf", "upstream_nodes", "expected_inputs"),
[
(get_wf_bound_input, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_partials, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_partials_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_partials_collision, {}, {"a": get_int_binding(2), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_overrides, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
]
)
def test_bound_inputs_serialization(wf, upstream_nodes, expected_inputs, serialization_settings):
wf_spec = get_serializable(OrderedDict(), serialization_settings, wf(serialization_settings))
assert len(wf_spec.template.nodes) == len(upstream_nodes) + 1
parent_node = wf_spec.template.nodes[len(upstream_nodes)]

assert len(parent_node.inputs) == len(expected_inputs)
inputs_map = {x.var: x for x in parent_node.inputs}

for param, expected_input in expected_inputs.items():
node_input = inputs_map[param]
assert node_input
if isinstance(expected_input, list):
bindings = node_input.binding.collection.bindings
assert len(bindings) == len(expected_inputs[param])
for i, binding in enumerate(bindings):
assert binding == expected_input[i]
else:
binding = node_input.binding
assert binding == expected_input

assert parent_node.array_node._bound_inputs == {"a"}
assert set(parent_node.upstream_node_ids) == set(upstream_nodes)


@pytest.mark.parametrize(
"min_success_ratio, should_raise_error",
[
Expand Down
Loading