Pinning affinity on stream tensor import/export. (#20016)

The op semantics say the operand/result must be pinned to the affinity specified and this was only happening before by chance.
iree-org · Feb 18, 2025 · 71792fb · 71792fb
1 parent 3c14d0d
commit 71792fb
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 9 deletions.
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp
@@ -1818,6 +1818,8 @@ LogicalResult TensorImportOp::verify() {
   return success();
 }
 
+bool TensorImportOp::pinsValueAffinity() { return true; }
+
 Value TensorImportOp::getTiedResult(unsigned resultIndex) {
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }
@@ -1845,6 +1847,8 @@ LogicalResult TensorExportOp::verify() {
   return success();
 }
 
+bool TensorExportOp::pinsValueAffinity() { return true; }
+
 Value TensorExportOp::getTiedResult(unsigned resultIndex) {
   return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
 }

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
@@ -1083,7 +1083,9 @@ def OpGroupPseudoOps : OpDocGroup {
 let opDocGroup = OpGroupPseudoOps in {
 
 def Stream_TensorImportOp : Stream_PureOp<"tensor.import", [
-  Stream_AffinityOp,
+  DeclareOpInterfaceMethods<Stream_AffinityOp, [
+    "pinsValueAffinity",
+  ]>,
   Util_ShapeAwareOp,
   Util_SizeAwareOp,
   DeclareOpInterfaceMethods<Util_TiedOpInterface, [
@@ -1136,7 +1138,9 @@ def Stream_TensorImportOp : Stream_PureOp<"tensor.import", [
 }
 
 def Stream_TensorExportOp : Stream_PureOp<"tensor.export", [
-  Stream_AffinityOp,
+  DeclareOpInterfaceMethods<Stream_AffinityOp, [
+    "pinsValueAffinity",
+  ]>,
   Util_ShapeAwareOp,
   Util_SizeAwareOp,
   DeclareOpInterfaceMethods<Util_TiedOpInterface, [

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeEncodings.cpp b/compiler/src/iree/compiler/Dialect/Stream/Transforms/SpecializeEncodings.cpp
@@ -373,7 +373,7 @@ namespace {
 // will be needed in the work.
 class StreamTensorOpUpdater {
 public:
-  explicit StreamTensorOpUpdater(ModuleOp moduleOp) : moduleOp(moduleOp){};
+  explicit StreamTensorOpUpdater(ModuleOp moduleOp) : moduleOp(moduleOp) {}
   ~StreamTensorOpUpdater() {}
 
   // Collects the stream tensor op candidates, and prepares all the needed
@@ -451,7 +451,9 @@ LogicalResult StreamTensorOpUpdater::addQuery(
       }
       SmallVector<IREE::Stream::AffinityAttr> affinityAttrs;
       if (!affinityAnalysis.tryLookupResourceAffinity(operand, affinityAttrs)) {
-        return failure();
+        return dispatchOp.emitError(
+                   "failed to determine resource affinity for operand ")
+               << operand;
       }
       for (auto affinity : affinityAttrs) {
         queries.emplace_back(affinity, affinityOp);
@@ -462,7 +464,7 @@ LogicalResult StreamTensorOpUpdater::addQuery(
   return success();
 }
 
-/// Updates the operand encondings and result encodings for the `dispatchOp`
+/// Updates the operand encodings and result encodings for the `dispatchOp`
 /// with resolved layouts.
 static LogicalResult updateTensorDispatchOp(
     RewriterBase &rewriter, ModuleOp moduleOp,

diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_affinities.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/annotate_affinities.mlir
@@ -96,8 +96,8 @@ util.func private @splat_op() -> tensor<1xi32> {
 // be living at the time the functions are invoked. Imports do not perform
 // transfers so we must use whatever is declared.
 
-// CHECK-LABEL: @imported_tensor
-util.func public @imported_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.fence) -> tensor<1xi32> {
+// CHECK-LABEL: @imported_hal_tensor
+util.func public @imported_hal_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.fence) -> tensor<1xi32> {
   // CHECK: hal.tensor.import
   // CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
   %tensor = hal.tensor.import on(#hal.device.promise<@dev_a>) wait(%fence) => %buffer_view "input" : !hal.buffer_view -> tensor<1xi32>
@@ -108,13 +108,26 @@ util.func public @imported_tensor(%buffer_view: !hal.buffer_view, %fence: !hal.f
 
 // -----
 
+// CHECK-LABEL: @imported_stream_tensor
+util.func public @imported_stream_tensor(%buffer_view: !hal.buffer_view) -> !stream.resource<external> {
+  %size = stream.tensor.sizeof on(#hal.device.promise<@dev_a>) tensor<1xi32> : index
+  // CHECK: stream.tensor.import
+  // CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
+  %resource = stream.tensor.import on(#hal.device.promise<@dev_a>) %buffer_view : !hal.buffer_view -> tensor<1xi32> in !stream.resource<external>{%size}
+  // CHECK: util.return
+  // CHECK-SAME{LITERAL}: stream.affinities.operands = [[#hal.device.promise<@dev_a>]]
+  util.return %resource : !stream.resource<external>
+}
+
+// -----
+
 // Tests that consumer-placed ops exported to buffers are properly placed.
 // Frontends can use this to explicitly define where exported tensors must live.
 // With consumer-placed ops like constants or splats we place them directly on
 // the export target.
 
-// CHECK-LABEL: @exported_constant
-util.func public @exported_constant(%fence: !hal.fence) -> !hal.buffer_view {
+// CHECK-LABEL: @exported_hal_constant
+util.func public @exported_hal_constant(%fence: !hal.fence) -> !hal.buffer_view {
   // CHECK: flow.tensor.constant
   // CHECK-SAME{LITERAL}: stream.affinities = [#hal.device.promise<@dev_a>]
   // CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
@@ -131,6 +144,21 @@ util.func public @exported_constant(%fence: !hal.fence) -> !hal.buffer_view {
 
 // -----
 
+// CHECK-LABEL: @exported_stream_constant
+util.func public @exported_stream_constant() -> !hal.buffer_view {
+  %size = stream.tensor.sizeof on(#hal.device.promise<@dev_a>) tensor<1x5x64xi32> : index
+  // CHECK: stream.tensor.constant
+  // CHECK-SAME{LITERAL}: stream.affinities.results = [[#hal.device.promise<@dev_a>]]
+  %cst = stream.tensor.constant : tensor<1x5x64xi32> in !stream.resource<external> = dense<0> : tensor<1x5x64xi32>
+  // CHECK: stream.tensor.export
+  // CHECK-SAME{LITERAL}: stream.affinities = [#hal.device.promise<@dev_a>]
+  // CHECK-SAME{LITERAL}: stream.affinities.operands = [[#hal.device.promise<@dev_a>]]
+  %buffer_view = stream.tensor.export on(#hal.device.promise<@dev_a>) %cst : tensor<1x5x64xi32> in !stream.resource<external>{%size} -> !hal.buffer_view
+  util.return %buffer_view : !hal.buffer_view
+}
+
+// -----
+
 // Tests that producer-placed ops exported to buffers get the appropriate
 // affinity on both devices. Frontends can use this to explicitly define where
 // exported tensors must live. Transfers may need to be inserted in order to