Skip to content

Commit

Permalink
Pinning affinity on stream tensor import/export. (#20016)
Browse files Browse the repository at this point in the history
The op semantics say the operand/result must be pinned to the affinity
specified and this was only happening before by chance.
  • Loading branch information
benvanik authored Feb 18, 2025
1 parent 3c14d0d commit 71792fb
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 9 deletions.
4 changes: 4 additions & 0 deletions compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1818,6 +1818,8 @@ LogicalResult TensorImportOp::verify() {
return success();
}

bool TensorImportOp::pinsValueAffinity() { return true; }

Value TensorImportOp::getTiedResult(unsigned resultIndex) {
return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
}
Expand Down Expand Up @@ -1845,6 +1847,8 @@ LogicalResult TensorExportOp::verify() {
return success();
}

bool TensorExportOp::pinsValueAffinity() { return true; }

Value TensorExportOp::getTiedResult(unsigned resultIndex) {
return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
}
Expand Down
8 changes: 6 additions & 2 deletions compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,9 @@ def OpGroupPseudoOps : OpDocGroup {
let opDocGroup = OpGroupPseudoOps in {

def Stream_TensorImportOp : Stream_PureOp<"tensor.import", [
Stream_AffinityOp,
DeclareOpInterfaceMethods<Stream_AffinityOp, [
"pinsValueAffinity",
]>,
Util_ShapeAwareOp,
Util_SizeAwareOp,
DeclareOpInterfaceMethods<Util_TiedOpInterface, [
Expand Down Expand Up @@ -1136,7 +1138,9 @@ def Stream_TensorImportOp : Stream_PureOp<"tensor.import", [
}

def Stream_TensorExportOp : Stream_PureOp<"tensor.export", [
Stream_AffinityOp,
DeclareOpInterfaceMethods<Stream_AffinityOp, [
"pinsValueAffinity",
]>,
Util_ShapeAwareOp,
Util_SizeAwareOp,
DeclareOpInterfaceMethods<Util_TiedOpInterface, [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ namespace {
// will be needed in the work.
class StreamTensorOpUpdater {
public:
explicit StreamTensorOpUpdater(ModuleOp moduleOp) : moduleOp(moduleOp){};
explicit StreamTensorOpUpdater(ModuleOp moduleOp) : moduleOp(moduleOp) {}
~StreamTensorOpUpdater() {}

// Collects the stream tensor op candidates, and prepares all the needed
Expand Down Expand Up @@ -451,7 +451,9 @@ LogicalResult StreamTensorOpUpdater::addQuery(
}
SmallVector<IREE::Stream::AffinityAttr> affinityAttrs;
if (!affinityAnalysis.tryLookupResourceAffinity(operand, affinityAttrs)) {
return failure();
return dispatchOp.emitError(
"failed to determine resource affinity for operand ")
<< operand;
}
for (auto affinity : affinityAttrs) {
queries.emplace_back(affinity, affinityOp);
Expand All @@ -462,7 +464,7 @@ LogicalResult StreamTensorOpUpdater::addQuery(
return success();
}

/// Updates the operand encondings and result encodings for the `dispatchOp`
/// Updates the operand encodings and result encodings for the `dispatchOp`
/// with resolved layouts.
static LogicalResult updateTensorDispatchOp(
RewriterBase &rewriter, ModuleOp moduleOp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ util.func private @splat_op() -> tensor<1xi32> {
// be living at the time the functions are invoked. Imports do not perform
// transfers so we must use whatever is declared.

// CHECK-LABEL: @imported_tensor
util.func public @imported_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.fence) -> tensor<1xi32> {
// CHECK-LABEL: @imported_hal_tensor
util.func public @imported_hal_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.fence) -> tensor<1xi32> {
// CHECK: hal.tensor.import
// CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
%tensor = hal.tensor.import on(#hal.device.promise<@dev_a>) wait(%fence) => %buffer_view "input" : !hal.buffer_view -> tensor<1xi32>
Expand All @@ -108,13 +108,26 @@ util.func public @imported_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.f

// -----

// CHECK-LABEL: @imported_stream_tensor
util.func public @imported_stream_tensor(%buffer_view: !hal.buffer_view) -> !stream.resource<external> {
%size = stream.tensor.sizeof on(#hal.device.promise<@dev_a>) tensor<1xi32> : index
// CHECK: stream.tensor.import
// CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
%resource = stream.tensor.import on(#hal.device.promise<@dev_a>) %buffer_view : !hal.buffer_view -> tensor<1xi32> in !stream.resource<external>{%size}
// CHECK: util.return
// CHECK-SAME{LITERAL}: stream.affinities.operands = [[#hal.device.promise<@dev_a>]]
util.return %resource : !stream.resource<external>
}

// -----

// Tests that consumer-placed ops exported to buffers are properly placed.
// Frontends can use this to explicitly define where exported tensors must live.
// With consumer-placed ops like constants or splats we place them directly on
// the export target.

// CHECK-LABEL: @exported_constant
util.func public @exported_constant(%fence: !hal.fence) -> !hal.buffer_view {
// CHECK-LABEL: @exported_hal_constant
util.func public @exported_hal_constant(%fence: !hal.fence) -> !hal.buffer_view {
// CHECK: flow.tensor.constant
// CHECK-SAME{LITERAL}: stream.affinities = [#hal.device.promise<@dev_a>]
// CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
Expand All @@ -131,6 +144,21 @@ util.func public @exported_constant(%fence: !hal.fence) -> !hal.buffer_view {

// -----

// CHECK-LABEL: @exported_stream_constant
util.func public @exported_stream_constant() -> !hal.buffer_view {
%size = stream.tensor.sizeof on(#hal.device.promise<@dev_a>) tensor<1x5x64xi32> : index
// CHECK: stream.tensor.constant
// CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
%cst = stream.tensor.constant : tensor<1x5x64xi32> in !stream.resource<external> = dense<0> : tensor<1x5x64xi32>
// CHECK: stream.tensor.export
// CHECK-SAME{LITERAL}: stream.affinities = [#hal.device.promise<@dev_a>]
// CHECK-SAME{LITERAL}: stream.affinities.operands = [[#hal.device.promise<@dev_a>]]
%buffer_view = stream.tensor.export on(#hal.device.promise<@dev_a>) %cst : tensor<1x5x64xi32> in !stream.resource<external>{%size} -> !hal.buffer_view
util.return %buffer_view : !hal.buffer_view
}

// -----

// Tests that producer-placed ops exported to buffers get the appropriate
// affinity on both devices. Frontends can use this to explicitly define where
// exported tensors must live. Transfers may need to be inserted in order to
Expand Down

0 comments on commit 71792fb

Please sign in to comment.