Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support bound inputs for array node tasks #3185

Merged
merged 7 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions flytekit/core/array_node_map_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
min_successes: Optional[int] = None,
min_success_ratio: Optional[float] = None,
bound_inputs: Optional[Set[str]] = None,
bound_inputs_values: Optional[Dict[str, Any]] = None,
**kwargs,
):
"""
Expand All @@ -49,6 +50,7 @@
:param min_successes: The minimum number of successful executions
:param min_success_ratio: The minimum ratio of successful executions
:param bound_inputs: The set of inputs that should be bound to the map task
:param bound_inputs_values: Inputs that are bound to the array node and will not be mapped over
:param kwargs: Additional keyword arguments to pass to the base class
"""
self._partial = None
Expand Down Expand Up @@ -78,10 +80,21 @@
if n_outputs > 1:
raise ValueError("Only tasks with a single output are supported in map tasks.")

# Note, bound_inputs are passed in during run time when executing the task
# so both values shouldn't be set at the same time
if bound_inputs and bound_inputs_values:
if bound_inputs != set(bound_inputs_values):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential comparison issue with set and dict

The condition bound_inputs != set(bound_inputs_values) might not work as expected when comparing a set with dictionary keys. Consider using bound_inputs != set(bound_inputs_values.keys()) to ensure proper comparison between the set and dictionary keys.

Code suggestion
Check the AI-generated fix before applying
Suggested change
if bound_inputs != set(bound_inputs_values):
if bound_inputs != set(bound_inputs_values.keys()):

Code Review Run #f8b189


Should Bito avoid suggestions like this for future reviews? (Manage Rules)

  • Yes, avoid them

raise ValueError("bound_inputs and bound_inputs_values should have the same keys if both set")

Check warning on line 87 in flytekit/core/array_node_map_task.py

View check run for this annotation

Codecov / codecov/patch

flytekit/core/array_node_map_task.py#L87

Added line #L87 was not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we include a test that triggers this error?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added


self._bound_inputs: Set[str] = bound_inputs or set(bound_inputs) if bound_inputs else set()
if self._partial:
self._bound_inputs.update(self._partial.keywords.keys())

self._bound_inputs_values: Dict[str, Any] = bound_inputs_values or {}

Check warning on line 93 in flytekit/core/array_node_map_task.py

View check run for this annotation

Codecov / codecov/patch

flytekit/core/array_node_map_task.py#L93

Added line #L93 was not covered by tests
if self._bound_inputs_values:
# bounded input values override any collisions w/ partials
self._bound_inputs.update(set(self._bound_inputs_values))

Check warning on line 96 in flytekit/core/array_node_map_task.py

View check run for this annotation

Codecov / codecov/patch

flytekit/core/array_node_map_task.py#L96

Added line #L96 was not covered by tests

# Transform the interface to List[Optional[T]] in case `min_success_ratio` is set
output_as_list_of_optionals = min_success_ratio is not None and min_success_ratio != 1 and n_outputs == 1
collection_interface = transform_interface_to_list_interface(
Expand Down Expand Up @@ -247,6 +260,8 @@
if self._partial:
"""If partial exists, then mix-in all partial values"""
kwargs = {**self._partial.keywords, **kwargs}
# bounded input values override any collisions w/ partials
kwargs.update(self._bound_inputs_values)

Check warning on line 264 in flytekit/core/array_node_map_task.py

View check run for this annotation

Codecov / codecov/patch

flytekit/core/array_node_map_task.py#L264

Added line #L264 was not covered by tests
return super().__call__(*args, **kwargs)

def _literal_map_to_python_input(
Expand Down
1 change: 1 addition & 0 deletions flytekit/tools/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ def get_serializable_array_node_map_task(
min_success_ratio=entity.min_success_ratio,
execution_mode=entity.execution_mode,
is_original_sub_node_interface=entity.is_original_sub_node_interface,
bound_inputs=entity.bound_inputs,
)


Expand Down
149 changes: 148 additions & 1 deletion tests/flytekit/unit/core/test_array_node_map_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
from flytekit.core.task import TaskMetadata
from flytekit.core.type_engine import TypeEngine
from flytekit.extras.accelerators import GPUAccelerator
from flytekit.models import types
from flytekit.models.literals import (
BindingData,
Literal,
LiteralMap,
LiteralOffloadedMetadata,
Scalar,
Primitive,
)
from flytekit.tools.translator import get_serializable
from flytekit.types.directory import FlyteDirectory
Expand Down Expand Up @@ -307,7 +311,11 @@ def task3(c: str, a: int, b: float) -> str:
m2 = map_task(functools.partial(task2, c=param_c))(a=param_a, b=param_b)
m3 = map_task(functools.partial(task3, c=param_c))(a=param_a, b=param_b)

assert m1 == m2 == m3 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"]
m4 = ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)
m5 = ArrayNodeMapTask(task2, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)
m6 = ArrayNodeMapTask(task3, bound_inputs_values={"c": param_c})(a=param_a, b=param_b)

assert m1 == m2 == m3 == m4 == m5 == m6 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"]


def test_bounded_inputs_vars_order(serialization_settings):
Expand All @@ -322,6 +330,145 @@ def task1(a: int, b: float, c: str) -> str:
assert args[1] == "a,b,c"


def test_bound_inputs_collision():
@task()
def task1(a: int, b: float, c: str) -> str:
return f"{a} - {b} - {c}"

param_a = [1, 2, 3]
param_b = [0.1, 0.2, 0.3]
param_c = "c"
param_d = "d"

partial_task = functools.partial(task1, c=param_c)
m1 = ArrayNodeMapTask(partial_task, bound_inputs_values={"c": param_d})(a=param_a, b=param_b)

assert m1 == ["1 - 0.1 - d", "2 - 0.2 - d", "3 - 0.3 - d"]

with pytest.raises(ValueError, match="bound_inputs and bound_inputs_values should have the same keys if both set"):
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"b"})(a=param_a, b=param_b)

try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this also use pytest.raises?

Also, how does the error get triggered from union.map?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@thomasjpfan Should we use pytest.raises here since it's not raising an error?

We shouldn't really ever hit this error. Have this PR that exposes bound_inputs param for union.map

Copy link
Member

@thomasjpfan thomasjpfan Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah it's the other way around. In that case, I'm okay with just writing and let pytest see the original error.

# no error raised
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"c"})(a=param_a, b=param_b)

ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"c"})(a=param_a, b=param_b)
except Exception as e:
pytest.fail(f"Unexpected exception raised: {e}")


@task()
def task_1(a: int, b: int, c: str) -> str:
return f"{a} - {b} - {c}"


@task()
def task_2() -> int:
return 2


def get_wf_bound_input(serialization_settings):
@workflow()
def wf1() -> List[str]:
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(b=[1, 2, 3], c=["a", "b", "c"])

return wf1


def get_wf_partials(serialization_settings):
@workflow()
def wf2() -> List[str]:
return ArrayNodeMapTask(functools.partial(task_1, a=1))(b=[1, 2, 3], c=["a", "b", "c"])

return wf2


def get_wf_bound_input_upstream(serialization_settings):

@workflow()
def wf3() -> List[str]:
a = task_2()
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": a})(b=[1, 2, 3], c=["a", "b", "c"])

return wf3


def get_wf_partials_upstream(serialization_settings):

@workflow()
def wf4() -> List[str]:
a = task_2()
return ArrayNodeMapTask(functools.partial(task_1, a=a))(b=[1, 2, 3], c=["a", "b", "c"])

return wf4


def get_wf_bound_input_partials_collision(serialization_settings):

@workflow()
def wf5() -> List[str]:
return ArrayNodeMapTask(functools.partial(task_1, a=1), bound_inputs_values={"a": 2})(b=[1, 2, 3], c=["a", "b", "c"])

return wf5


def get_wf_bound_input_overrides(serialization_settings):

@workflow()
def wf6() -> List[str]:
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(a=[1, 2, 3], b=[1, 2, 3], c=["a", "b", "c"])

return wf6


def get_int_binding(value):
return BindingData(scalar=Scalar(primitive=Primitive(integer=value)))


def get_str_binding(value):
return BindingData(scalar=Scalar(primitive=Primitive(string_value=value)))


def promise_binding(node_id, var):
return BindingData(promise=types.OutputReference(node_id=node_id, var=var))


B_BINDINGS_LIST = [get_int_binding(1), get_int_binding(2), get_int_binding(3)]
C_BINDINGS_LIST = [get_str_binding("a"), get_str_binding("b"), get_str_binding("c")]


@pytest.mark.parametrize(
("wf", "upstream_nodes", "expected_inputs"),
[
(get_wf_bound_input, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_partials, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_partials_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_partials_collision, {}, {"a": get_int_binding(2), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
(get_wf_bound_input_overrides, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}),
]
)
def test_bound_inputs_serialization(wf, upstream_nodes, expected_inputs, serialization_settings):
wf_spec = get_serializable(OrderedDict(), serialization_settings, wf(serialization_settings))
assert len(wf_spec.template.nodes) == len(upstream_nodes) + 1
parent_node = wf_spec.template.nodes[len(upstream_nodes)]

assert len(parent_node.inputs) == len(expected_inputs)
inputs_map = {x.var: x for x in parent_node.inputs}

for param, expected_input in expected_inputs.items():
node_input = inputs_map[param]
assert node_input
if isinstance(expected_input, list):
bindings = node_input.binding.collection.bindings
assert len(bindings) == len(expected_inputs[param])
for i, binding in enumerate(bindings):
assert binding == expected_input[i]
else:
binding = node_input.binding
assert binding == expected_input

assert parent_node.array_node._bound_inputs == {"a"}
assert set(parent_node.upstream_node_ids) == set(upstream_nodes)


@pytest.mark.parametrize(
"min_success_ratio, should_raise_error",
[
Expand Down
Loading