-
Notifications
You must be signed in to change notification settings - Fork 313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support bound inputs for array node tasks #3185
Changes from 6 commits
3220eae
d22ad13
dc595a1
89c5895
7af739a
9252ce8
31a7ee2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ | |
min_successes: Optional[int] = None, | ||
min_success_ratio: Optional[float] = None, | ||
bound_inputs: Optional[Set[str]] = None, | ||
bound_inputs_values: Optional[Dict[str, Any]] = None, | ||
**kwargs, | ||
): | ||
""" | ||
|
@@ -49,6 +50,7 @@ | |
:param min_successes: The minimum number of successful executions | ||
:param min_success_ratio: The minimum ratio of successful executions | ||
:param bound_inputs: The set of inputs that should be bound to the map task | ||
:param bound_inputs_values: Inputs that are bound to the array node and will not be mapped over | ||
:param kwargs: Additional keyword arguments to pass to the base class | ||
""" | ||
self._partial = None | ||
|
@@ -78,10 +80,21 @@ | |
if n_outputs > 1: | ||
raise ValueError("Only tasks with a single output are supported in map tasks.") | ||
|
||
# Note, bound_inputs are passed in during run time when executing the task | ||
# so both values shouldn't be set at the same time | ||
if bound_inputs and bound_inputs_values: | ||
if bound_inputs != set(bound_inputs_values): | ||
raise ValueError("bound_inputs and bound_inputs_values should have the same keys if both set") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we include a test that triggers this error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added |
||
|
||
self._bound_inputs: Set[str] = bound_inputs or set(bound_inputs) if bound_inputs else set() | ||
if self._partial: | ||
self._bound_inputs.update(self._partial.keywords.keys()) | ||
|
||
self._bound_inputs_values: Dict[str, Any] = bound_inputs_values or {} | ||
if self._bound_inputs_values: | ||
# bounded input values override any collisions w/ partials | ||
self._bound_inputs.update(set(self._bound_inputs_values)) | ||
|
||
# Transform the interface to List[Optional[T]] in case `min_success_ratio` is set | ||
output_as_list_of_optionals = min_success_ratio is not None and min_success_ratio != 1 and n_outputs == 1 | ||
collection_interface = transform_interface_to_list_interface( | ||
|
@@ -247,6 +260,8 @@ | |
if self._partial: | ||
"""If partial exists, then mix-in all partial values""" | ||
kwargs = {**self._partial.keywords, **kwargs} | ||
# bounded input values override any collisions w/ partials | ||
kwargs.update(self._bound_inputs_values) | ||
return super().__call__(*args, **kwargs) | ||
|
||
def _literal_map_to_python_input( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,10 +16,14 @@ | |
from flytekit.core.task import TaskMetadata | ||
from flytekit.core.type_engine import TypeEngine | ||
from flytekit.extras.accelerators import GPUAccelerator | ||
from flytekit.models import types | ||
from flytekit.models.literals import ( | ||
BindingData, | ||
Literal, | ||
LiteralMap, | ||
LiteralOffloadedMetadata, | ||
Scalar, | ||
Primitive, | ||
) | ||
from flytekit.tools.translator import get_serializable | ||
from flytekit.types.directory import FlyteDirectory | ||
|
@@ -307,7 +311,11 @@ def task3(c: str, a: int, b: float) -> str: | |
m2 = map_task(functools.partial(task2, c=param_c))(a=param_a, b=param_b) | ||
m3 = map_task(functools.partial(task3, c=param_c))(a=param_a, b=param_b) | ||
|
||
assert m1 == m2 == m3 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"] | ||
m4 = ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c})(a=param_a, b=param_b) | ||
m5 = ArrayNodeMapTask(task2, bound_inputs_values={"c": param_c})(a=param_a, b=param_b) | ||
m6 = ArrayNodeMapTask(task3, bound_inputs_values={"c": param_c})(a=param_a, b=param_b) | ||
|
||
assert m1 == m2 == m3 == m4 == m5 == m6 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"] | ||
|
||
|
||
def test_bounded_inputs_vars_order(serialization_settings): | ||
|
@@ -322,6 +330,145 @@ def task1(a: int, b: float, c: str) -> str: | |
assert args[1] == "a,b,c" | ||
|
||
|
||
def test_bound_inputs_collision(): | ||
@task() | ||
def task1(a: int, b: float, c: str) -> str: | ||
return f"{a} - {b} - {c}" | ||
|
||
param_a = [1, 2, 3] | ||
param_b = [0.1, 0.2, 0.3] | ||
param_c = "c" | ||
param_d = "d" | ||
|
||
partial_task = functools.partial(task1, c=param_c) | ||
m1 = ArrayNodeMapTask(partial_task, bound_inputs_values={"c": param_d})(a=param_a, b=param_b) | ||
|
||
assert m1 == ["1 - 0.1 - d", "2 - 0.2 - d", "3 - 0.3 - d"] | ||
|
||
with pytest.raises(ValueError, match="bound_inputs and bound_inputs_values should have the same keys if both set"): | ||
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"b"})(a=param_a, b=param_b) | ||
|
||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this also use Also, how does the error get triggered from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thomasjpfan Should we use pytest.raises here since it's not raising an error? We shouldn't really ever hit this error. Have this PR that exposes bound_inputs param for union.map There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah it's the other way around. In that case, I'm okay with just writing and let pytest see the original error. # no error raised
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"c"})(a=param_a, b=param_b) |
||
ArrayNodeMapTask(task1, bound_inputs_values={"c": param_c}, bound_inputs={"c"})(a=param_a, b=param_b) | ||
except Exception as e: | ||
pytest.fail(f"Unexpected exception raised: {e}") | ||
|
||
|
||
@task() | ||
def task_1(a: int, b: int, c: str) -> str: | ||
return f"{a} - {b} - {c}" | ||
|
||
|
||
@task() | ||
def task_2() -> int: | ||
return 2 | ||
|
||
|
||
def get_wf_bound_input(serialization_settings): | ||
@workflow() | ||
def wf1() -> List[str]: | ||
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf1 | ||
|
||
|
||
def get_wf_partials(serialization_settings): | ||
@workflow() | ||
def wf2() -> List[str]: | ||
return ArrayNodeMapTask(functools.partial(task_1, a=1))(b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf2 | ||
|
||
|
||
def get_wf_bound_input_upstream(serialization_settings): | ||
|
||
@workflow() | ||
def wf3() -> List[str]: | ||
a = task_2() | ||
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": a})(b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf3 | ||
|
||
|
||
def get_wf_partials_upstream(serialization_settings): | ||
|
||
@workflow() | ||
def wf4() -> List[str]: | ||
a = task_2() | ||
return ArrayNodeMapTask(functools.partial(task_1, a=a))(b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf4 | ||
|
||
|
||
def get_wf_bound_input_partials_collision(serialization_settings): | ||
|
||
@workflow() | ||
def wf5() -> List[str]: | ||
return ArrayNodeMapTask(functools.partial(task_1, a=1), bound_inputs_values={"a": 2})(b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf5 | ||
|
||
|
||
def get_wf_bound_input_overrides(serialization_settings): | ||
|
||
@workflow() | ||
def wf6() -> List[str]: | ||
return ArrayNodeMapTask(task_1, bound_inputs_values={"a": 1})(a=[1, 2, 3], b=[1, 2, 3], c=["a", "b", "c"]) | ||
|
||
return wf6 | ||
|
||
|
||
def get_int_binding(value): | ||
return BindingData(scalar=Scalar(primitive=Primitive(integer=value))) | ||
|
||
|
||
def get_str_binding(value): | ||
return BindingData(scalar=Scalar(primitive=Primitive(string_value=value))) | ||
|
||
|
||
def promise_binding(node_id, var): | ||
return BindingData(promise=types.OutputReference(node_id=node_id, var=var)) | ||
|
||
|
||
B_BINDINGS_LIST = [get_int_binding(1), get_int_binding(2), get_int_binding(3)] | ||
C_BINDINGS_LIST = [get_str_binding("a"), get_str_binding("b"), get_str_binding("c")] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("wf", "upstream_nodes", "expected_inputs"), | ||
[ | ||
(get_wf_bound_input, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
(get_wf_partials, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
(get_wf_bound_input_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
(get_wf_partials_upstream, {"n0"}, {"a": promise_binding("n0", "o0"), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
(get_wf_bound_input_partials_collision, {}, {"a": get_int_binding(2), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
(get_wf_bound_input_overrides, {}, {"a": get_int_binding(1), "b": B_BINDINGS_LIST, "c": C_BINDINGS_LIST}), | ||
] | ||
) | ||
def test_bound_inputs_serialization(wf, upstream_nodes, expected_inputs, serialization_settings): | ||
wf_spec = get_serializable(OrderedDict(), serialization_settings, wf(serialization_settings)) | ||
assert len(wf_spec.template.nodes) == len(upstream_nodes) + 1 | ||
parent_node = wf_spec.template.nodes[len(upstream_nodes)] | ||
|
||
assert len(parent_node.inputs) == len(expected_inputs) | ||
inputs_map = {x.var: x for x in parent_node.inputs} | ||
|
||
for param, expected_input in expected_inputs.items(): | ||
node_input = inputs_map[param] | ||
assert node_input | ||
if isinstance(expected_input, list): | ||
bindings = node_input.binding.collection.bindings | ||
assert len(bindings) == len(expected_inputs[param]) | ||
for i, binding in enumerate(bindings): | ||
assert binding == expected_input[i] | ||
else: | ||
binding = node_input.binding | ||
assert binding == expected_input | ||
|
||
assert parent_node.array_node._bound_inputs == {"a"} | ||
assert set(parent_node.upstream_node_ids) == set(upstream_nodes) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"min_success_ratio, should_raise_error", | ||
[ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The condition
bound_inputs != set(bound_inputs_values)
might not work as expected when comparing a set with dictionary keys. Consider usingbound_inputs != set(bound_inputs_values.keys())
to ensure proper comparison between the set and dictionary keys.Code suggestion
Code Review Run #f8b189
Should Bito avoid suggestions like this for future reviews? (Manage Rules)