From aa42d439b59d14ff58e02de91cb555aea9db3010 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 18:53:56 +0100 Subject: [PATCH 01/26] pass stub Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 21 +++++++ water/lib/Transforms/CMakeLists.txt | 1 + water/lib/Transforms/GPUModuleToBinary.cpp | 57 +++++++++++++++++++ .../test/Transforms/gpu-module-to-binary.mlir | 11 ++++ 4 files changed, 90 insertions(+) create mode 100644 water/lib/Transforms/GPUModuleToBinary.cpp create mode 100644 water/test/Transforms/gpu-module-to-binary.mlir diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index c64609557..ba6d27a06 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -113,4 +113,25 @@ def WaterGPUToGPURuntime : Pass<"water-gpu-to-gpu-runtime", "::mlir::ModuleOp"> let dependentDialects = ["::mlir::LLVM::LLVMDialect"]; } +def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { + let summary = "Transforms GPU modules into binaries."; + let description = [{ + This pass searches for all nested GPU modules with target attributes + and serializes them to binary format, producing a GPU binary operation. + + This is a simplified version of the upstream gpu-module-to-binary pass, + tailored for the Water project. Currently supports ROCDL targets only. + }]; + let options = [ + Option<"toolkitPath", "toolkit", "std::string", [{""}], + "Toolkit path.">, + ListOption<"linkFiles", "l", "std::string", + "Extra bitcode files to link to.">, + Option<"cmdOptions", "opts", "std::string", [{""}], + "Command line options to pass to the compilation tools.">, + Option<"compilationTarget", "format", "std::string", [{"fatbin"}], + "The target representation (offloading, assembly, binary, fatbin).">, + ]; +} + #endif // WATER_PASSES diff --git a/water/lib/Transforms/CMakeLists.txt b/water/lib/Transforms/CMakeLists.txt index c441b192b..4ad7befe4 100644 --- a/water/lib/Transforms/CMakeLists.txt +++ b/water/lib/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_dialect_library(MLIRWaterTransforms AccessCheckers.cpp CheckStaticAssertions.cpp + GPUModuleToBinary.cpp GPUToGPURuntime.cpp SLPVectorizer.cpp diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp new file mode 100644 index 000000000..698da233e --- /dev/null +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -0,0 +1,57 @@ +// Copyright 2025 The Wave Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "water/Transforms/Passes.h" + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/ADT/StringSwitch.h" + +using namespace mlir; +using namespace mlir::gpu; + +namespace mlir::water { +#define GEN_PASS_DEF_WATERGPUMODULETOBINARY +#include "water/Transforms/Passes.h.inc" +} // namespace mlir::water + +namespace { +class WaterGPUModuleToBinaryPass + : public water::impl::WaterGPUModuleToBinaryBase< + WaterGPUModuleToBinaryPass> { +public: + using Base::Base; + void runOnOperation() final; +}; +} // namespace + +void WaterGPUModuleToBinaryPass::runOnOperation() { + // Parse compilation target format + auto targetFormat = + llvm::StringSwitch>(compilationTarget) + .Cases({"offloading", "llvm"}, CompilationTarget::Offload) + .Cases({"assembly", "isa"}, CompilationTarget::Assembly) + .Cases({"binary", "bin"}, CompilationTarget::Binary) + .Cases({"fatbinary", "fatbin"}, CompilationTarget::Fatbin) + .Default(std::nullopt); + + if (!targetFormat) { + getOperation()->emitError() + << "Invalid format specified: " << compilationTarget; + return signalPassFailure(); + } + + // TODO: Implement the actual serialization logic + // This is a stub that will be filled in with: + // 1. Walk all GPUModuleOp instances + // 2. For each module, serialize using target attributes + // 3. Create gpu.binary ops with the serialized objects + // 4. Erase the original gpu.module ops + + getOperation()->emitRemark() << "WaterGPUModuleToBinary pass stub executed"; +} diff --git a/water/test/Transforms/gpu-module-to-binary.mlir b/water/test/Transforms/gpu-module-to-binary.mlir new file mode 100644 index 000000000..4093b2018 --- /dev/null +++ b/water/test/Transforms/gpu-module-to-binary.mlir @@ -0,0 +1,11 @@ +// RUN: water-opt %s --water-gpu-module-to-binary | FileCheck %s + +// CHECK-LABEL: module +module attributes {gpu.container_module} { + // Simple test to verify the pass stub runs without errors + // TODO: Add actual gpu.module operations once serialization is implemented + + func.func @dummy() { + return + } +} From 0d9db3ec398ae9474cbd7adee8a0186ae26defa3 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:27:38 +0100 Subject: [PATCH 02/26] WIP Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 2 - water/lib/Transforms/GPUModuleToBinary.cpp | 110 ++++++++++++++++----- 2 files changed, 87 insertions(+), 25 deletions(-) diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index ba6d27a06..e4ea3b881 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -129,8 +129,6 @@ def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { "Extra bitcode files to link to.">, Option<"cmdOptions", "opts", "std::string", [{""}], "Command line options to pass to the compilation tools.">, - Option<"compilationTarget", "format", "std::string", [{"fatbin"}], - "The target representation (offloading, assembly, binary, fatbin).">, ]; } diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 698da233e..5d7b163d5 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -8,10 +8,9 @@ #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" -#include "llvm/ADT/StringSwitch.h" - using namespace mlir; using namespace mlir::gpu; @@ -27,31 +26,96 @@ class WaterGPUModuleToBinaryPass public: using Base::Base; void runOnOperation() final; + +private: + LogicalResult serializeModule(GPUModuleOp module); }; } // namespace -void WaterGPUModuleToBinaryPass::runOnOperation() { - // Parse compilation target format - auto targetFormat = - llvm::StringSwitch>(compilationTarget) - .Cases({"offloading", "llvm"}, CompilationTarget::Offload) - .Cases({"assembly", "isa"}, CompilationTarget::Assembly) - .Cases({"binary", "bin"}, CompilationTarget::Binary) - .Cases({"fatbinary", "fatbin"}, CompilationTarget::Fatbin) - .Default(std::nullopt); - - if (!targetFormat) { - getOperation()->emitError() - << "Invalid format specified: " << compilationTarget; - return signalPassFailure(); +LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { + OpBuilder builder(module->getContext()); + + // Check if module has target attributes + if (!module.getTargetsAttr() || module.getTargetsAttr().empty()) { + return module.emitError("GPU module has no target attributes"); } - // TODO: Implement the actual serialization logic - // This is a stub that will be filled in with: - // 1. Walk all GPUModuleOp instances - // 2. For each module, serialize using target attributes - // 3. Create gpu.binary ops with the serialized objects - // 4. Erase the original gpu.module ops + // Collect serialized objects for each target + SmallVector objects; + + for (auto targetAttr : module.getTargetsAttr()) { + if (!targetAttr) { + return module.emitError("Target attribute cannot be null"); + } + + auto target = dyn_cast(targetAttr); + if (!target) { + return module.emitError( + "Target attribute doesn't implement TargetAttrInterface"); + } + + // Build target options + SmallVector librariesToLink; + for (const std::string &path : linkFiles) { + librariesToLink.push_back(StringAttr::get(&getContext(), path)); + } + + // Create lazy symbol table builder + std::optional parentTable; + auto lazyTableBuilder = [&]() -> SymbolTable * { + if (!parentTable) { + Operation *table = SymbolTable::getNearestSymbolTable(module); + if (!table) + return nullptr; + parentTable = SymbolTable(table); + } + return &parentTable.value(); + }; + + TargetOptions targetOptions(toolkitPath, librariesToLink, cmdOptions, + /*elfSection=*/"", CompilationTarget::Binary, + lazyTableBuilder); + + // Serialize the module to binary + std::optional> serializedModule = + target.serializeToObject(module, targetOptions); - getOperation()->emitRemark() << "WaterGPUModuleToBinary pass stub executed"; + if (!serializedModule) { + return module.emitError("Failed to serialize module to object"); + } + + // Create object attribute + Attribute object = + target.createObject(module, *serializedModule, targetOptions); + if (!object) { + return module.emitError("Failed to create object attribute"); + } + + objects.push_back(object); + } + + // Create gpu.binary op + builder.setInsertionPointAfter(module); + gpu::BinaryOp::create(builder, module.getLoc(), module.getName(), + /*offloadingHandler=*/nullptr, + builder.getArrayAttr(objects)); + + // Erase the original module + module->erase(); + return success(); +} + +void WaterGPUModuleToBinaryPass::runOnOperation() { + // Walk all regions and blocks looking for GPUModuleOp instances + for (Region ®ion : getOperation()->getRegions()) { + for (Block &block : region.getBlocks()) { + // Use early_inc_range since we're erasing modules during iteration + for (auto module : + llvm::make_early_inc_range(block.getOps())) { + if (failed(serializeModule(module))) { + return signalPassFailure(); + } + } + } + } } From f8c72ba6ff3b2a236130d073d6dbc14d41a3597d Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:31:08 +0100 Subject: [PATCH 03/26] wip Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 81 +++++++++------------- 1 file changed, 34 insertions(+), 47 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 5d7b163d5..f3b75fea6 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -7,9 +7,14 @@ #include "water/Transforms/Passes.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/Export.h" + +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" using namespace mlir; using namespace mlir::gpu; @@ -40,65 +45,47 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { return module.emitError("GPU module has no target attributes"); } - // Collect serialized objects for each target - SmallVector objects; - - for (auto targetAttr : module.getTargetsAttr()) { - if (!targetAttr) { - return module.emitError("Target attribute cannot be null"); - } - - auto target = dyn_cast(targetAttr); - if (!target) { - return module.emitError( - "Target attribute doesn't implement TargetAttrInterface"); - } + // For now, we only support ROCDL targets + auto rocdlTarget = + dyn_cast_or_null(module.getTargetsAttr()[0]); + if (!rocdlTarget) { + return module.emitError("Only ROCDL targets are currently supported"); + } - // Build target options - SmallVector librariesToLink; - for (const std::string &path : linkFiles) { - librariesToLink.push_back(StringAttr::get(&getContext(), path)); - } + // Step 1: Translate GPU module to LLVM IR + llvm::LLVMContext llvmContext; + std::unique_ptr llvmModule = + translateModuleToLLVMIR(module, llvmContext); - // Create lazy symbol table builder - std::optional parentTable; - auto lazyTableBuilder = [&]() -> SymbolTable * { - if (!parentTable) { - Operation *table = SymbolTable::getNearestSymbolTable(module); - if (!table) - return nullptr; - parentTable = SymbolTable(table); - } - return &parentTable.value(); - }; + if (!llvmModule) { + return module.emitError("Failed to translate GPU module to LLVM IR"); + } - TargetOptions targetOptions(toolkitPath, librariesToLink, cmdOptions, - /*elfSection=*/"", CompilationTarget::Binary, - lazyTableBuilder); + // TODO: Step 2: Link device libraries + // TODO: Step 3: Optimize LLVM IR + // TODO: Step 4: Compile to ISA + // TODO: Step 5: Assemble to binary - // Serialize the module to binary - std::optional> serializedModule = - target.serializeToObject(module, targetOptions); + // For now, just create a placeholder binary + SmallVector binaryData; - if (!serializedModule) { - return module.emitError("Failed to serialize module to object"); - } + // Create object attribute + Builder attrBuilder(module.getContext()); + StringAttr binaryAttr = attrBuilder.getStringAttr( + StringRef(binaryData.data(), binaryData.size())); - // Create object attribute - Attribute object = - target.createObject(module, *serializedModule, targetOptions); - if (!object) { - return module.emitError("Failed to create object attribute"); - } + DictionaryAttr properties{}; + gpu::KernelTableAttr kernels; - objects.push_back(object); - } + Attribute objectAttr = attrBuilder.getAttr( + rocdlTarget, gpu::CompilationTarget::Binary, binaryAttr, properties, + kernels); // Create gpu.binary op builder.setInsertionPointAfter(module); gpu::BinaryOp::create(builder, module.getLoc(), module.getName(), /*offloadingHandler=*/nullptr, - builder.getArrayAttr(objects)); + builder.getArrayAttr({objectAttr})); // Erase the original module module->erase(); From 2c68a851c465255c124b6decf70712a1e7504b4d Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:36:23 +0100 Subject: [PATCH 04/26] style Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index f3b75fea6..1d66b8fdf 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -41,25 +41,22 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { OpBuilder builder(module->getContext()); // Check if module has target attributes - if (!module.getTargetsAttr() || module.getTargetsAttr().empty()) { + if (!module.getTargetsAttr() || module.getTargetsAttr().empty()) return module.emitError("GPU module has no target attributes"); - } // For now, we only support ROCDL targets auto rocdlTarget = dyn_cast_or_null(module.getTargetsAttr()[0]); - if (!rocdlTarget) { + if (!rocdlTarget) return module.emitError("Only ROCDL targets are currently supported"); - } // Step 1: Translate GPU module to LLVM IR llvm::LLVMContext llvmContext; std::unique_ptr llvmModule = translateModuleToLLVMIR(module, llvmContext); - if (!llvmModule) { + if (!llvmModule) return module.emitError("Failed to translate GPU module to LLVM IR"); - } // TODO: Step 2: Link device libraries // TODO: Step 3: Optimize LLVM IR @@ -99,9 +96,8 @@ void WaterGPUModuleToBinaryPass::runOnOperation() { // Use early_inc_range since we're erasing modules during iteration for (auto module : llvm::make_early_inc_range(block.getOps())) { - if (failed(serializeModule(module))) { + if (failed(serializeModule(module))) return signalPassFailure(); - } } } } From 794d01a1648466ad847142c55c2596b4e3222110 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:42:17 +0100 Subject: [PATCH 05/26] optimization Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 124 ++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 1d66b8fdf..307938da7 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" @@ -15,6 +16,12 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/Internalize.h" using namespace mlir; using namespace mlir::gpu; @@ -34,6 +41,18 @@ class WaterGPUModuleToBinaryPass private: LogicalResult serializeModule(GPUModuleOp module); + + // Helper methods + std::unique_ptr loadBitcodeFile(llvm::LLVMContext &context, + StringRef path); + LogicalResult + linkBitcodeFiles(llvm::Module &module, + SmallVector> &&libs); + std::optional + createTargetMachine(ROCDL::ROCDLTargetAttr target); + LogicalResult optimizeModule(llvm::Module &module, + llvm::TargetMachine *targetMachine, + int optLevel); }; } // namespace @@ -58,8 +77,26 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (!llvmModule) return module.emitError("Failed to translate GPU module to LLVM IR"); - // TODO: Step 2: Link device libraries - // TODO: Step 3: Optimize LLVM IR + // Step 2: Load and link device libraries + SmallVector> bitcodeLibs; + for (const std::string &path : linkFiles) { + auto lib = loadBitcodeFile(llvmContext, path); + if (!lib) + return module.emitError("Failed to load bitcode file: " + path); + bitcodeLibs.push_back(std::move(lib)); + } + + if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) + return module.emitError("Failed to link bitcode libraries"); + + // Step 3: Optimize LLVM IR + auto targetMachine = createTargetMachine(rocdlTarget); + if (!targetMachine) + return module.emitError("Failed to create target machine"); + + if (failed(optimizeModule(*llvmModule, *targetMachine, rocdlTarget.getO()))) + return module.emitError("Failed to optimize LLVM IR"); + // TODO: Step 4: Compile to ISA // TODO: Step 5: Assemble to binary @@ -89,6 +126,89 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { return success(); } +std::unique_ptr +WaterGPUModuleToBinaryPass::loadBitcodeFile(llvm::LLVMContext &context, + StringRef path) { + llvm::SMDiagnostic error; + std::unique_ptr library = + llvm::getLazyIRFileModule(path, error, context); + if (!library) { + getOperation()->emitError() << "Failed loading bitcode file from " << path + << ", error: " << error.getMessage(); + return nullptr; + } + return library; +} + +LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( + llvm::Module &module, SmallVector> &&libs) { + if (libs.empty()) + return success(); + + llvm::Linker linker(module); + for (std::unique_ptr &libModule : libs) { + // Link the library, importing only needed symbols + bool err = linker.linkInModule( + std::move(libModule), llvm::Linker::Flags::LinkOnlyNeeded, + [](llvm::Module &m, const StringSet<> &gvs) { + llvm::internalizeModule(m, [&gvs](const llvm::GlobalValue &gv) { + return !gv.hasName() || (gvs.count(gv.getName()) == 0); + }); + }); + + if (err) { + getOperation()->emitError("Failed during bitcode linking"); + return failure(); + } + } + return success(); +} + +std::optional +WaterGPUModuleToBinaryPass::createTargetMachine(ROCDL::ROCDLTargetAttr target) { + std::string error; + llvm::Triple triple(llvm::Triple::normalize(target.getTriple())); + const llvm::Target *llvmTarget = + llvm::TargetRegistry::lookupTarget(triple, error); + + if (!llvmTarget) { + getOperation()->emitError() << "Failed to lookup target for triple '" + << target.getTriple() << "': " << error; + return std::nullopt; + } + + std::unique_ptr targetMachine( + llvmTarget->createTargetMachine(triple, target.getChip(), + target.getFeatures(), {}, {})); + if (!targetMachine) + return std::nullopt; + + return targetMachine.release(); +} + +LogicalResult WaterGPUModuleToBinaryPass::optimizeModule( + llvm::Module &module, llvm::TargetMachine *targetMachine, int optLevel) { + if (optLevel < 0 || optLevel > 3) { + getOperation()->emitError() << "Invalid optimization level: " << optLevel; + return failure(); + } + + targetMachine->setOptLevel(static_cast(optLevel)); + + auto transformer = + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine); + auto error = transformer(&module); + if (error) { + InFlightDiagnostic mlirError = getOperation()->emitError(); + llvm::handleAllErrors( + std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { + mlirError << "Failed to optimize LLVM IR: " << ei.message(); + }); + return failure(); + } + return success(); +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { From 84bdcbc9b55924a39eaa0573ada8ceb661aed331 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:52:36 +0100 Subject: [PATCH 06/26] opt level Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 307938da7..0d74feb5d 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -49,10 +49,9 @@ class WaterGPUModuleToBinaryPass linkBitcodeFiles(llvm::Module &module, SmallVector> &&libs); std::optional - createTargetMachine(ROCDL::ROCDLTargetAttr target); + createTargetMachine(Attribute targetAttr); LogicalResult optimizeModule(llvm::Module &module, - llvm::TargetMachine *targetMachine, - int optLevel); + llvm::TargetMachine *targetMachine); }; } // namespace @@ -63,11 +62,15 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (!module.getTargetsAttr() || module.getTargetsAttr().empty()) return module.emitError("GPU module has no target attributes"); - // For now, we only support ROCDL targets - auto rocdlTarget = - dyn_cast_or_null(module.getTargetsAttr()[0]); - if (!rocdlTarget) - return module.emitError("Only ROCDL targets are currently supported"); + // Check that there is exactly one target + if (module.getTargetsAttr().size() != 1) + return module.emitError( + "GPU module must have exactly one target attribute"); + + // Get the target attribute + Attribute targetAttr = module.getTargetsAttr()[0]; + if (!targetAttr) + return module.emitError("Target attribute cannot be null"); // Step 1: Translate GPU module to LLVM IR llvm::LLVMContext llvmContext; @@ -90,11 +93,11 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { return module.emitError("Failed to link bitcode libraries"); // Step 3: Optimize LLVM IR - auto targetMachine = createTargetMachine(rocdlTarget); + auto targetMachine = createTargetMachine(targetAttr); if (!targetMachine) return module.emitError("Failed to create target machine"); - if (failed(optimizeModule(*llvmModule, *targetMachine, rocdlTarget.getO()))) + if (failed(optimizeModule(*llvmModule, *targetMachine))) return module.emitError("Failed to optimize LLVM IR"); // TODO: Step 4: Compile to ISA @@ -112,7 +115,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { gpu::KernelTableAttr kernels; Attribute objectAttr = attrBuilder.getAttr( - rocdlTarget, gpu::CompilationTarget::Binary, binaryAttr, properties, + targetAttr, gpu::CompilationTarget::Binary, binaryAttr, properties, kernels); // Create gpu.binary op @@ -165,35 +168,43 @@ LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( } std::optional -WaterGPUModuleToBinaryPass::createTargetMachine(ROCDL::ROCDLTargetAttr target) { +WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { + // Check if this is a ROCDL target + auto rocdlTarget = dyn_cast(targetAttr); + if (!rocdlTarget) { + getOperation()->emitError() << "Only ROCDL targets are currently supported"; + return std::nullopt; + } + std::string error; - llvm::Triple triple(llvm::Triple::normalize(target.getTriple())); + llvm::Triple triple(llvm::Triple::normalize(rocdlTarget.getTriple())); const llvm::Target *llvmTarget = llvm::TargetRegistry::lookupTarget(triple, error); if (!llvmTarget) { getOperation()->emitError() << "Failed to lookup target for triple '" - << target.getTriple() << "': " << error; + << rocdlTarget.getTriple() << "': " << error; return std::nullopt; } std::unique_ptr targetMachine( - llvmTarget->createTargetMachine(triple, target.getChip(), - target.getFeatures(), {}, {})); + llvmTarget->createTargetMachine(triple, rocdlTarget.getChip(), + rocdlTarget.getFeatures(), {}, {})); if (!targetMachine) return std::nullopt; + // Set optimization level from target attribute + targetMachine->setOptLevel( + static_cast(rocdlTarget.getO())); + return targetMachine.release(); } -LogicalResult WaterGPUModuleToBinaryPass::optimizeModule( - llvm::Module &module, llvm::TargetMachine *targetMachine, int optLevel) { - if (optLevel < 0 || optLevel > 3) { - getOperation()->emitError() << "Invalid optimization level: " << optLevel; - return failure(); - } - - targetMachine->setOptLevel(static_cast(optLevel)); +LogicalResult +WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &module, + llvm::TargetMachine *targetMachine) { + // Get optimization level from target machine + int optLevel = static_cast(targetMachine->getOptLevel()); auto transformer = makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine); From f8b4250804c795e137040de77f27e5e7de18872c Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:55:11 +0100 Subject: [PATCH 07/26] rename Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 0d74feb5d..6dd5a0439 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -46,11 +46,11 @@ class WaterGPUModuleToBinaryPass std::unique_ptr loadBitcodeFile(llvm::LLVMContext &context, StringRef path); LogicalResult - linkBitcodeFiles(llvm::Module &module, + linkBitcodeFiles(llvm::Module &mod, SmallVector> &&libs); std::optional createTargetMachine(Attribute targetAttr); - LogicalResult optimizeModule(llvm::Module &module, + LogicalResult optimizeModule(llvm::Module &mod, llvm::TargetMachine *targetMachine); }; } // namespace @@ -144,11 +144,11 @@ WaterGPUModuleToBinaryPass::loadBitcodeFile(llvm::LLVMContext &context, } LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( - llvm::Module &module, SmallVector> &&libs) { + llvm::Module &mod, SmallVector> &&libs) { if (libs.empty()) return success(); - llvm::Linker linker(module); + llvm::Linker linker(mod); for (std::unique_ptr &libModule : libs) { // Link the library, importing only needed symbols bool err = linker.linkInModule( @@ -201,14 +201,14 @@ WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { } LogicalResult -WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &module, +WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &mod, llvm::TargetMachine *targetMachine) { // Get optimization level from target machine int optLevel = static_cast(targetMachine->getOptLevel()); auto transformer = makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine); - auto error = transformer(&module); + auto error = transformer(&mod); if (error) { InFlightDiagnostic mlirError = getOperation()->emitError(); llvm::handleAllErrors( From 3589b95997b31ff6fadbc920e3174fddda0198cb Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 19:58:41 +0100 Subject: [PATCH 08/26] ISA Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 65 ++++++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 6dd5a0439..1f5fabd21 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -15,11 +15,13 @@ #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Internalize.h" @@ -52,6 +54,10 @@ class WaterGPUModuleToBinaryPass createTargetMachine(Attribute targetAttr); LogicalResult optimizeModule(llvm::Module &mod, llvm::TargetMachine *targetMachine); + std::optional compileToISA(llvm::Module &mod, + llvm::TargetMachine &targetMachine); + std::optional> + assembleToObject(StringRef isa, llvm::TargetMachine &targetMachine); }; } // namespace @@ -100,11 +106,18 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (failed(optimizeModule(*llvmModule, *targetMachine))) return module.emitError("Failed to optimize LLVM IR"); - // TODO: Step 4: Compile to ISA - // TODO: Step 5: Assemble to binary + // Step 4: Compile to ISA + std::optional isa = compileToISA(*llvmModule, **targetMachine); + if (!isa) + return module.emitError("Failed to compile to ISA"); - // For now, just create a placeholder binary - SmallVector binaryData; + // Step 5: Assemble to binary + std::optional> binary = + assembleToObject(*isa, **targetMachine); + if (!binary) + return module.emitError("Failed to assemble to binary"); + + SmallVector binaryData = std::move(*binary); // Create object attribute Builder attrBuilder(module.getContext()); @@ -220,6 +233,50 @@ WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &mod, return success(); } +std::optional +WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, + llvm::TargetMachine &targetMachine) { + SmallVector isaBuffer; + llvm::raw_svector_ostream stream(isaBuffer); + + llvm::legacy::PassManager codegen; + if (targetMachine.addPassesToEmitFile(codegen, stream, nullptr, + llvm::CodeGenFileType::AssemblyFile)) { + getOperation()->emitError("Target machine cannot emit assembly"); + return std::nullopt; + } + + codegen.run(mod); + return std::string(isaBuffer.begin(), isaBuffer.end()); +} + +std::optional> +WaterGPUModuleToBinaryPass::assembleToObject( + StringRef isa, llvm::TargetMachine &targetMachine) { + // For now, just compile optimized module to object file + // TODO: Parse ISA and assemble properly, then link with ld.lld to create + // HSACO + + SmallVector objectBuffer; + llvm::raw_svector_ostream stream(objectBuffer); + + llvm::legacy::PassManager codegen; + + // Create a temporary module to compile + llvm::LLVMContext context; + auto mod = std::make_unique("isa_module", context); + + if (targetMachine.addPassesToEmitFile(codegen, stream, nullptr, + llvm::CodeGenFileType::ObjectFile)) { + getOperation()->emitError("Target machine cannot emit object file"); + return std::nullopt; + } + + codegen.run(*mod); + + return objectBuffer; +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { From cf6f17aa446aabffe33535079e0c0559c76df4ef Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:01:16 +0100 Subject: [PATCH 09/26] FailureOr Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 59 ++++++++++------------ 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 1f5fabd21..2e429c4b0 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -50,13 +50,12 @@ class WaterGPUModuleToBinaryPass LogicalResult linkBitcodeFiles(llvm::Module &mod, SmallVector> &&libs); - std::optional - createTargetMachine(Attribute targetAttr); + FailureOr createTargetMachine(Attribute targetAttr); LogicalResult optimizeModule(llvm::Module &mod, llvm::TargetMachine *targetMachine); - std::optional compileToISA(llvm::Module &mod, - llvm::TargetMachine &targetMachine); - std::optional> + FailureOr compileToISA(llvm::Module &mod, + llvm::TargetMachine &targetMachine); + FailureOr> assembleToObject(StringRef isa, llvm::TargetMachine &targetMachine); }; } // namespace @@ -99,22 +98,23 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { return module.emitError("Failed to link bitcode libraries"); // Step 3: Optimize LLVM IR - auto targetMachine = createTargetMachine(targetAttr); - if (!targetMachine) + FailureOr targetMachine = + createTargetMachine(targetAttr); + if (failed(targetMachine)) return module.emitError("Failed to create target machine"); if (failed(optimizeModule(*llvmModule, *targetMachine))) return module.emitError("Failed to optimize LLVM IR"); // Step 4: Compile to ISA - std::optional isa = compileToISA(*llvmModule, **targetMachine); - if (!isa) + FailureOr isa = compileToISA(*llvmModule, **targetMachine); + if (failed(isa)) return module.emitError("Failed to compile to ISA"); // Step 5: Assemble to binary - std::optional> binary = + FailureOr> binary = assembleToObject(*isa, **targetMachine); - if (!binary) + if (failed(binary)) return module.emitError("Failed to assemble to binary"); SmallVector binaryData = std::move(*binary); @@ -180,31 +180,29 @@ LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( return success(); } -std::optional +FailureOr WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { // Check if this is a ROCDL target auto rocdlTarget = dyn_cast(targetAttr); - if (!rocdlTarget) { - getOperation()->emitError() << "Only ROCDL targets are currently supported"; - return std::nullopt; - } + if (!rocdlTarget) + return getOperation()->emitError( + "Only ROCDL targets are currently supported"); std::string error; llvm::Triple triple(llvm::Triple::normalize(rocdlTarget.getTriple())); const llvm::Target *llvmTarget = llvm::TargetRegistry::lookupTarget(triple, error); - if (!llvmTarget) { - getOperation()->emitError() << "Failed to lookup target for triple '" - << rocdlTarget.getTriple() << "': " << error; - return std::nullopt; - } + if (!llvmTarget) + return getOperation()->emitError() + << "Failed to lookup target for triple '" << rocdlTarget.getTriple() + << "': " << error; std::unique_ptr targetMachine( llvmTarget->createTargetMachine(triple, rocdlTarget.getChip(), rocdlTarget.getFeatures(), {}, {})); if (!targetMachine) - return std::nullopt; + return getOperation()->emitError("Failed to create target machine"); // Set optimization level from target attribute targetMachine->setOptLevel( @@ -233,7 +231,7 @@ WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &mod, return success(); } -std::optional +FailureOr WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, llvm::TargetMachine &targetMachine) { SmallVector isaBuffer; @@ -241,17 +239,14 @@ WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, llvm::legacy::PassManager codegen; if (targetMachine.addPassesToEmitFile(codegen, stream, nullptr, - llvm::CodeGenFileType::AssemblyFile)) { - getOperation()->emitError("Target machine cannot emit assembly"); - return std::nullopt; - } + llvm::CodeGenFileType::AssemblyFile)) + return getOperation()->emitError("Target machine cannot emit assembly"); codegen.run(mod); return std::string(isaBuffer.begin(), isaBuffer.end()); } -std::optional> -WaterGPUModuleToBinaryPass::assembleToObject( +FailureOr> WaterGPUModuleToBinaryPass::assembleToObject( StringRef isa, llvm::TargetMachine &targetMachine) { // For now, just compile optimized module to object file // TODO: Parse ISA and assemble properly, then link with ld.lld to create @@ -267,10 +262,8 @@ WaterGPUModuleToBinaryPass::assembleToObject( auto mod = std::make_unique("isa_module", context); if (targetMachine.addPassesToEmitFile(codegen, stream, nullptr, - llvm::CodeGenFileType::ObjectFile)) { - getOperation()->emitError("Target machine cannot emit object file"); - return std::nullopt; - } + llvm::CodeGenFileType::ObjectFile)) + return getOperation()->emitError("Target machine cannot emit object file"); codegen.run(*mod); From 5e59c437ab133f6172244bf14c9e7d44befe2bb4 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:06:54 +0100 Subject: [PATCH 10/26] HSACO Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 117 ++++++++++++++++++--- 1 file changed, 102 insertions(+), 15 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 2e429c4b0..dd0df545c 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -19,7 +19,24 @@ #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -248,26 +265,96 @@ WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, FailureOr> WaterGPUModuleToBinaryPass::assembleToObject( StringRef isa, llvm::TargetMachine &targetMachine) { - // For now, just compile optimized module to object file - // TODO: Parse ISA and assemble properly, then link with ld.lld to create - // HSACO + // Step 1: Assemble ISA to object file using MC infrastructure + llvm::Triple triple = targetMachine.getTargetTriple(); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple, error); + if (!target) + return getOperation()->emitError() << "Failed to lookup target: " << error; + + // Set up MC infrastructure + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), + llvm::SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri(target->createMCRegInfo(triple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, triple, mcOptions)); + std::unique_ptr sti( + target->createMCSubtargetInfo(triple, targetMachine.getTargetCPU(), + targetMachine.getTargetFeatureString())); SmallVector objectBuffer; - llvm::raw_svector_ostream stream(objectBuffer); - - llvm::legacy::PassManager codegen; - - // Create a temporary module to compile - llvm::LLVMContext context; - auto mod = std::make_unique("isa_module", context); + llvm::raw_svector_ostream os(objectBuffer); + + llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, + &mcOptions); + std::unique_ptr mofi(target->createMCObjectFileInfo( + ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); + ctx.setObjectFileInfo(mofi.get()); + + std::unique_ptr mcii(target->createMCInstrInfo()); + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + std::unique_ptr mcStreamer(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti)); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) + return getOperation()->emitError("Assembler initialization error"); + + parser->setTargetParser(*tap); + parser->Run(false); + + // Step 2: Link object file to create HSACO + // Write object to temporary file + int tempObjFd = -1; + SmallString<128> tempObjFilename; + if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempObjFd, + tempObjFilename)) + return getOperation()->emitError( + "Failed to create temporary file for object"); - if (targetMachine.addPassesToEmitFile(codegen, stream, nullptr, - llvm::CodeGenFileType::ObjectFile)) - return getOperation()->emitError("Target machine cannot emit object file"); + llvm::FileRemover cleanupObj(tempObjFilename); + { + llvm::raw_fd_ostream tempObjOs(tempObjFd, true); + tempObjOs << StringRef(objectBuffer.data(), objectBuffer.size()); + tempObjOs.flush(); + } - codegen.run(*mod); + // Create temporary file for HSACO + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) + return getOperation()->emitError( + "Failed to create temporary file for HSACO"); + + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + // Link using ld.lld + SmallString<128> lldPath(toolkitPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + int lldResult = llvm::sys::ExecuteAndWait( + lldPath, {"ld.lld", "-shared", tempObjFilename, "-o", tempHsacoFilename}); + if (lldResult != 0) + return getOperation()->emitError("ld.lld invocation failed"); + + // Read HSACO file + auto hsacoFile = + llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); + if (!hsacoFile) + return getOperation()->emitError( + "Failed to read HSACO from temporary file"); - return objectBuffer; + StringRef buffer = (*hsacoFile)->getBuffer(); + return SmallVector(buffer.begin(), buffer.end()); } void WaterGPUModuleToBinaryPass::runOnOperation() { From 13af7e98204982a7531cfe44c3b655c4e78196a3 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:12:31 +0100 Subject: [PATCH 11/26] HSACO 2 Signed-off-by: Ivan Butygin --- water/lib/Transforms/AssembleISA.cpp | 127 +++++++++++++++++++++ water/lib/Transforms/AssembleISA.h | 39 +++++++ water/lib/Transforms/CMakeLists.txt | 1 + water/lib/Transforms/GPUModuleToBinary.cpp | 117 +------------------ 4 files changed, 170 insertions(+), 114 deletions(-) create mode 100644 water/lib/Transforms/AssembleISA.cpp create mode 100644 water/lib/Transforms/AssembleISA.h diff --git a/water/lib/Transforms/AssembleISA.cpp b/water/lib/Transforms/AssembleISA.cpp new file mode 100644 index 000000000..75f896852 --- /dev/null +++ b/water/lib/Transforms/AssembleISA.cpp @@ -0,0 +1,127 @@ +// Copyright 2025 The Wave Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "AssembleISA.h" + +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace mlir; + +namespace mlir::water { + +FailureOr> +assembleISAToHSACO(Operation *op, StringRef isa, + llvm::TargetMachine &targetMachine, StringRef toolkitPath) { + // Step 1: Assemble ISA to object file using MC infrastructure + llvm::Triple triple = targetMachine.getTargetTriple(); + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple, error); + if (!target) + return op->emitError() << "Failed to lookup target: " << error; + + // Set up MC infrastructure + llvm::SourceMgr srcMgr; + srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), + llvm::SMLoc()); + + const llvm::MCTargetOptions mcOptions; + std::unique_ptr mri(target->createMCRegInfo(triple)); + std::unique_ptr mai( + target->createMCAsmInfo(*mri, triple, mcOptions)); + std::unique_ptr sti( + target->createMCSubtargetInfo(triple, targetMachine.getTargetCPU(), + targetMachine.getTargetFeatureString())); + + SmallVector objectBuffer; + llvm::raw_svector_ostream os(objectBuffer); + + llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, + &mcOptions); + std::unique_ptr mofi(target->createMCObjectFileInfo( + ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); + ctx.setObjectFileInfo(mofi.get()); + + std::unique_ptr mcii(target->createMCInstrInfo()); + llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); + llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); + std::unique_ptr mcStreamer(target->createMCObjectStreamer( + triple, ctx, std::unique_ptr(mab), + mab->createObjectWriter(os), std::unique_ptr(ce), + *sti)); + + std::unique_ptr parser( + createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); + std::unique_ptr tap( + target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); + + if (!tap) + return op->emitError("Assembler initialization error"); + + parser->setTargetParser(*tap); + parser->Run(false); + + // Step 2: Link object file to create HSACO + // Write object to temporary file + int tempObjFd = -1; + SmallString<128> tempObjFilename; + if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempObjFd, + tempObjFilename)) + return op->emitError("Failed to create temporary file for object"); + + llvm::FileRemover cleanupObj(tempObjFilename); + { + llvm::raw_fd_ostream tempObjOs(tempObjFd, true); + tempObjOs << StringRef(objectBuffer.data(), objectBuffer.size()); + tempObjOs.flush(); + } + + // Create temporary file for HSACO + SmallString<128> tempHsacoFilename; + if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) + return op->emitError("Failed to create temporary file for HSACO"); + + llvm::FileRemover cleanupHsaco(tempHsacoFilename); + + // Link using ld.lld + SmallString<128> lldPath(toolkitPath); + llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); + int lldResult = llvm::sys::ExecuteAndWait( + lldPath, {"ld.lld", "-shared", tempObjFilename, "-o", tempHsacoFilename}); + if (lldResult != 0) + return op->emitError("ld.lld invocation failed"); + + // Read HSACO file + auto hsacoFile = + llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); + if (!hsacoFile) + return op->emitError("Failed to read HSACO from temporary file"); + + StringRef buffer = (*hsacoFile)->getBuffer(); + return SmallVector(buffer.begin(), buffer.end()); +} + +} // namespace mlir::water diff --git a/water/lib/Transforms/AssembleISA.h b/water/lib/Transforms/AssembleISA.h new file mode 100644 index 000000000..3f10ee502 --- /dev/null +++ b/water/lib/Transforms/AssembleISA.h @@ -0,0 +1,39 @@ +// Copyright 2025 The Wave Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef WATER_LIB_TRANSFORMS_ASSEMBLEISA_H +#define WATER_LIB_TRANSFORMS_ASSEMBLEISA_H + +#include "mlir/IR/Operation.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class TargetMachine; +} // namespace llvm + +namespace mlir::water { + +/// Assembles ISA (assembly code) to HSACO (HSA Code Object) binary. +/// +/// This function: +/// 1. Parses the ISA using LLVM MC infrastructure +/// 2. Assembles it to an ELF object file +/// 3. Links the object file using ld.lld to create an HSACO +/// +/// \param op Operation for error reporting +/// \param isa Assembly code to assemble +/// \param targetMachine Target machine for MC infrastructure setup +/// \param toolkitPath Path to toolkit containing ld.lld +/// \return Binary data of the HSACO file, or failure +FailureOr> +assembleISAToHSACO(Operation *op, StringRef isa, + llvm::TargetMachine &targetMachine, StringRef toolkitPath); + +} // namespace mlir::water + +#endif // WATER_LIB_TRANSFORMS_ASSEMBLEISA_H diff --git a/water/lib/Transforms/CMakeLists.txt b/water/lib/Transforms/CMakeLists.txt index 4ad7befe4..d81eec243 100644 --- a/water/lib/Transforms/CMakeLists.txt +++ b/water/lib/Transforms/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_dialect_library(MLIRWaterTransforms AccessCheckers.cpp + AssembleISA.cpp CheckStaticAssertions.cpp GPUModuleToBinary.cpp GPUToGPURuntime.cpp diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index dd0df545c..1c33384ae 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -6,6 +6,8 @@ #include "water/Transforms/Passes.h" +#include "AssembleISA.h" + #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/ExecutionEngine/OptUtils.h" @@ -19,24 +21,7 @@ #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -72,8 +57,6 @@ class WaterGPUModuleToBinaryPass llvm::TargetMachine *targetMachine); FailureOr compileToISA(llvm::Module &mod, llvm::TargetMachine &targetMachine); - FailureOr> - assembleToObject(StringRef isa, llvm::TargetMachine &targetMachine); }; } // namespace @@ -130,7 +113,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { // Step 5: Assemble to binary FailureOr> binary = - assembleToObject(*isa, **targetMachine); + water::assembleISAToHSACO(module, *isa, **targetMachine, toolkitPath); if (failed(binary)) return module.emitError("Failed to assemble to binary"); @@ -263,100 +246,6 @@ WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, return std::string(isaBuffer.begin(), isaBuffer.end()); } -FailureOr> WaterGPUModuleToBinaryPass::assembleToObject( - StringRef isa, llvm::TargetMachine &targetMachine) { - // Step 1: Assemble ISA to object file using MC infrastructure - llvm::Triple triple = targetMachine.getTargetTriple(); - std::string error; - const llvm::Target *target = - llvm::TargetRegistry::lookupTarget(triple, error); - if (!target) - return getOperation()->emitError() << "Failed to lookup target: " << error; - - // Set up MC infrastructure - llvm::SourceMgr srcMgr; - srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), - llvm::SMLoc()); - - const llvm::MCTargetOptions mcOptions; - std::unique_ptr mri(target->createMCRegInfo(triple)); - std::unique_ptr mai( - target->createMCAsmInfo(*mri, triple, mcOptions)); - std::unique_ptr sti( - target->createMCSubtargetInfo(triple, targetMachine.getTargetCPU(), - targetMachine.getTargetFeatureString())); - - SmallVector objectBuffer; - llvm::raw_svector_ostream os(objectBuffer); - - llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, - &mcOptions); - std::unique_ptr mofi(target->createMCObjectFileInfo( - ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); - ctx.setObjectFileInfo(mofi.get()); - - std::unique_ptr mcii(target->createMCInstrInfo()); - llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); - llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); - std::unique_ptr mcStreamer(target->createMCObjectStreamer( - triple, ctx, std::unique_ptr(mab), - mab->createObjectWriter(os), std::unique_ptr(ce), - *sti)); - - std::unique_ptr parser( - createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); - std::unique_ptr tap( - target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); - - if (!tap) - return getOperation()->emitError("Assembler initialization error"); - - parser->setTargetParser(*tap); - parser->Run(false); - - // Step 2: Link object file to create HSACO - // Write object to temporary file - int tempObjFd = -1; - SmallString<128> tempObjFilename; - if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempObjFd, - tempObjFilename)) - return getOperation()->emitError( - "Failed to create temporary file for object"); - - llvm::FileRemover cleanupObj(tempObjFilename); - { - llvm::raw_fd_ostream tempObjOs(tempObjFd, true); - tempObjOs << StringRef(objectBuffer.data(), objectBuffer.size()); - tempObjOs.flush(); - } - - // Create temporary file for HSACO - SmallString<128> tempHsacoFilename; - if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) - return getOperation()->emitError( - "Failed to create temporary file for HSACO"); - - llvm::FileRemover cleanupHsaco(tempHsacoFilename); - - // Link using ld.lld - SmallString<128> lldPath(toolkitPath); - llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); - int lldResult = llvm::sys::ExecuteAndWait( - lldPath, {"ld.lld", "-shared", tempObjFilename, "-o", tempHsacoFilename}); - if (lldResult != 0) - return getOperation()->emitError("ld.lld invocation failed"); - - // Read HSACO file - auto hsacoFile = - llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); - if (!hsacoFile) - return getOperation()->emitError( - "Failed to read HSACO from temporary file"); - - StringRef buffer = (*hsacoFile)->getBuffer(); - return SmallVector(buffer.begin(), buffer.end()); -} - void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { From 9a402ddcb0a5ce426902043d063a996b9cd613c6 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:31:26 +0100 Subject: [PATCH 12/26] init target Signed-off-by: Ivan Butygin --- water/lib/Transforms/AssembleISA.cpp | 15 ++++++++++++ water/lib/Transforms/AssembleISA.h | 3 +++ water/lib/Transforms/GPUModuleToBinary.cpp | 24 +++++++++++++++---- .../test/Transforms/gpu-module-to-binary.mlir | 20 +++++++++++----- 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/water/lib/Transforms/AssembleISA.cpp b/water/lib/Transforms/AssembleISA.cpp index 75f896852..d18361fe7 100644 --- a/water/lib/Transforms/AssembleISA.cpp +++ b/water/lib/Transforms/AssembleISA.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -32,9 +33,23 @@ using namespace mlir; namespace mlir::water { +void initializeAMDGPUTarget() { + static bool initialized = []() { + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmParser(); + LLVMInitializeAMDGPUAsmPrinter(); + return true; + }(); + (void)initialized; +} + FailureOr> assembleISAToHSACO(Operation *op, StringRef isa, llvm::TargetMachine &targetMachine, StringRef toolkitPath) { + initializeAMDGPUTarget(); + // Step 1: Assemble ISA to object file using MC infrastructure llvm::Triple triple = targetMachine.getTargetTriple(); std::string error; diff --git a/water/lib/Transforms/AssembleISA.h b/water/lib/Transforms/AssembleISA.h index 3f10ee502..657065ed6 100644 --- a/water/lib/Transforms/AssembleISA.h +++ b/water/lib/Transforms/AssembleISA.h @@ -18,6 +18,9 @@ class TargetMachine; namespace mlir::water { +/// Initializes the LLVM AMDGPU target. Safe to call multiple times. +void initializeAMDGPUTarget(); + /// Assembles ISA (assembly code) to HSACO (HSA Code Object) binary. /// /// This function: diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 1c33384ae..7d1fd50f4 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -14,6 +14,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVM/ROCDL/Utils.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/LLVMContext.h" @@ -23,6 +24,7 @@ #include "llvm/Linker/Linker.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Internalize.h" @@ -97,23 +99,33 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) return module.emitError("Failed to link bitcode libraries"); - // Step 3: Optimize LLVM IR + // Step 3: Create target machine and set data layout FailureOr targetMachine = createTargetMachine(targetAttr); if (failed(targetMachine)) return module.emitError("Failed to create target machine"); + // Set the data layout and target triple to match the target machine + llvmModule->setDataLayout((*targetMachine)->createDataLayout()); + llvmModule->setTargetTriple((*targetMachine)->getTargetTriple()); + + // Step 4: Optimize LLVM IR if (failed(optimizeModule(*llvmModule, *targetMachine))) return module.emitError("Failed to optimize LLVM IR"); - // Step 4: Compile to ISA + // Step 5: Compile to ISA FailureOr isa = compileToISA(*llvmModule, **targetMachine); if (failed(isa)) return module.emitError("Failed to compile to ISA"); - // Step 5: Assemble to binary - FailureOr> binary = - water::assembleISAToHSACO(module, *isa, **targetMachine, toolkitPath); + // Step 6: Assemble to binary + // Use ROCM_PATH environment variable if toolkitPath is not provided + std::string actualToolkitPath = toolkitPath; + if (actualToolkitPath.empty()) + actualToolkitPath = ROCDL::getROCMPath().str(); + + FailureOr> binary = water::assembleISAToHSACO( + module, *isa, **targetMachine, actualToolkitPath); if (failed(binary)) return module.emitError("Failed to assemble to binary"); @@ -182,6 +194,8 @@ LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( FailureOr WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { + water::initializeAMDGPUTarget(); + // Check if this is a ROCDL target auto rocdlTarget = dyn_cast(targetAttr); if (!rocdlTarget) diff --git a/water/test/Transforms/gpu-module-to-binary.mlir b/water/test/Transforms/gpu-module-to-binary.mlir index 4093b2018..bbcf0acab 100644 --- a/water/test/Transforms/gpu-module-to-binary.mlir +++ b/water/test/Transforms/gpu-module-to-binary.mlir @@ -1,11 +1,19 @@ // RUN: water-opt %s --water-gpu-module-to-binary | FileCheck %s -// CHECK-LABEL: module -module attributes {gpu.container_module} { - // Simple test to verify the pass stub runs without errors - // TODO: Add actual gpu.module operations once serialization is implemented +// Test that the pass converts a gpu.module with ROCDL target to a gpu.binary +// The gpu.module contains already-lowered LLVM IR +// +// This test requires ROCm to be installed. It uses mlir::ROCDL::getROCMPath() +// which checks ROCM_PATH, ROCM_ROOT, ROCM_HOME environment variables or uses +// the CMake-detected path. - func.func @dummy() { - return +// CHECK-LABEL: module attributes {gpu.container_module} +module attributes {gpu.container_module} { + // CHECK-NOT: gpu.module + // CHECK: gpu.binary @kernel_module [#gpu.object<#rocdl.target, bin = + gpu.module @kernel_module [#rocdl.target] { + llvm.func @simple_kernel(%arg0: f32) attributes {gpu.kernel} { + llvm.return + } } } From 287493c12ad5a9c7cba4622c5187a8adcdb9dd68 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:40:07 +0100 Subject: [PATCH 13/26] clenaup Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 7d1fd50f4..9d0eb485e 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -120,9 +120,9 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { // Step 6: Assemble to binary // Use ROCM_PATH environment variable if toolkitPath is not provided - std::string actualToolkitPath = toolkitPath; + StringRef actualToolkitPath = toolkitPath; if (actualToolkitPath.empty()) - actualToolkitPath = ROCDL::getROCMPath().str(); + actualToolkitPath = ROCDL::getROCMPath(); FailureOr> binary = water::assembleISAToHSACO( module, *isa, **targetMachine, actualToolkitPath); From ea43fbad01bb969213cf94a02a90cb8f4defa1b7 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:52:57 +0100 Subject: [PATCH 14/26] dump-intermediates Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 2 + water/lib/Transforms/GPUModuleToBinary.cpp | 67 +++++++++++++++++++ .../Transforms/gpu-module-to-binary-dump.mlir | 19 ++++++ 3 files changed, 88 insertions(+) create mode 100644 water/test/Transforms/gpu-module-to-binary-dump.mlir diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index e4ea3b881..dd3818319 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -129,6 +129,8 @@ def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { "Extra bitcode files to link to.">, Option<"cmdOptions", "opts", "std::string", [{""}], "Command line options to pass to the compilation tools.">, + Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}], + "Directory to dump intermediate compilation files (LLVM IR, ISA).">, ]; } diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 9d0eb485e..41b8a56cc 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -23,8 +23,11 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Internalize.h" @@ -59,6 +62,12 @@ class WaterGPUModuleToBinaryPass llvm::TargetMachine *targetMachine); FailureOr compileToISA(llvm::Module &mod, llvm::TargetMachine &targetMachine); + + // Dump helpers + LogicalResult dumpLLVMModule(llvm::Module &mod, StringRef moduleName, + StringRef suffix); + LogicalResult dumpText(StringRef text, StringRef moduleName, + StringRef suffix); }; } // namespace @@ -87,6 +96,10 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (!llvmModule) return module.emitError("Failed to translate GPU module to LLVM IR"); + // Dump original LLVM IR + if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_original"))) + return failure(); + // Step 2: Load and link device libraries SmallVector> bitcodeLibs; for (const std::string &path : linkFiles) { @@ -99,6 +112,10 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) return module.emitError("Failed to link bitcode libraries"); + // Dump linked LLVM IR + if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_linked"))) + return failure(); + // Step 3: Create target machine and set data layout FailureOr targetMachine = createTargetMachine(targetAttr); @@ -113,11 +130,19 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (failed(optimizeModule(*llvmModule, *targetMachine))) return module.emitError("Failed to optimize LLVM IR"); + // Dump optimized LLVM IR + if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_optimized"))) + return failure(); + // Step 5: Compile to ISA FailureOr isa = compileToISA(*llvmModule, **targetMachine); if (failed(isa)) return module.emitError("Failed to compile to ISA"); + // Dump ISA + if (failed(dumpText(*isa, module.getName(), ".s"))) + return failure(); + // Step 6: Assemble to binary // Use ROCM_PATH environment variable if toolkitPath is not provided StringRef actualToolkitPath = toolkitPath; @@ -260,6 +285,48 @@ WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, return std::string(isaBuffer.begin(), isaBuffer.end()); } +LogicalResult WaterGPUModuleToBinaryPass::dumpLLVMModule(llvm::Module &mod, + StringRef moduleName, + StringRef suffix) { + if (dumpIntermediates.empty()) + return success(); + + SmallString<128> path(dumpIntermediates); + llvm::sys::path::append(path, moduleName + suffix + ".ll"); + + std::error_code ec; + llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); + if (ec) + return getOperation()->emitError() + << "Failed to open file for dumping: " << path << ": " + << ec.message(); + + mod.print(outputFile.os(), nullptr); + outputFile.keep(); + return success(); +} + +LogicalResult WaterGPUModuleToBinaryPass::dumpText(StringRef text, + StringRef moduleName, + StringRef suffix) { + if (dumpIntermediates.empty()) + return success(); + + SmallString<128> path(dumpIntermediates); + llvm::sys::path::append(path, moduleName + suffix); + + std::error_code ec; + llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); + if (ec) + return getOperation()->emitError() + << "Failed to open file for dumping: " << path << ": " + << ec.message(); + + outputFile.os() << text; + outputFile.keep(); + return success(); +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { diff --git a/water/test/Transforms/gpu-module-to-binary-dump.mlir b/water/test/Transforms/gpu-module-to-binary-dump.mlir new file mode 100644 index 000000000..49bbb056f --- /dev/null +++ b/water/test/Transforms/gpu-module-to-binary-dump.mlir @@ -0,0 +1,19 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: water-opt %s --water-gpu-module-to-binary="dump-intermediates=%t" | FileCheck %s +// RUN: test -f %t/kernel_module_original.ll +// RUN: test -f %t/kernel_module_linked.ll +// RUN: test -f %t/kernel_module_optimized.ll +// RUN: test -f %t/kernel_module.s + +// Test that the pass dumps intermediate compilation files when dump-intermediates is specified + +// CHECK-LABEL: module attributes {gpu.container_module} +module attributes {gpu.container_module} { + // CHECK-NOT: gpu.module + // CHECK: gpu.binary @kernel_module [#gpu.object<#rocdl.target, bin = + gpu.module @kernel_module [#rocdl.target] { + llvm.func @simple_kernel(%arg0: f32) attributes {gpu.kernel} { + llvm.return + } + } +} From aaa4eaa893395fc7ea8609f7f72fa267899d9825 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 20:58:57 +0100 Subject: [PATCH 15/26] dump hsaco Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 27 +++++++++++++++++++ .../Transforms/gpu-module-to-binary-dump.mlir | 1 + 2 files changed, 28 insertions(+) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 41b8a56cc..0fcc80aa1 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -68,6 +68,8 @@ class WaterGPUModuleToBinaryPass StringRef suffix); LogicalResult dumpText(StringRef text, StringRef moduleName, StringRef suffix); + LogicalResult dumpBinary(ArrayRef data, StringRef moduleName, + StringRef suffix); }; } // namespace @@ -156,6 +158,10 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { SmallVector binaryData = std::move(*binary); + // Dump HSACO binary + if (failed(dumpBinary(binaryData, module.getName(), ".hsaco"))) + return failure(); + // Create object attribute Builder attrBuilder(module.getContext()); StringAttr binaryAttr = attrBuilder.getStringAttr( @@ -327,6 +333,27 @@ LogicalResult WaterGPUModuleToBinaryPass::dumpText(StringRef text, return success(); } +LogicalResult WaterGPUModuleToBinaryPass::dumpBinary(ArrayRef data, + StringRef moduleName, + StringRef suffix) { + if (dumpIntermediates.empty()) + return success(); + + SmallString<128> path(dumpIntermediates); + llvm::sys::path::append(path, moduleName + suffix); + + std::error_code ec; + llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); + if (ec) + return getOperation()->emitError() + << "Failed to open file for dumping: " << path << ": " + << ec.message(); + + outputFile.os().write(data.data(), data.size()); + outputFile.keep(); + return success(); +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { diff --git a/water/test/Transforms/gpu-module-to-binary-dump.mlir b/water/test/Transforms/gpu-module-to-binary-dump.mlir index 49bbb056f..e36553739 100644 --- a/water/test/Transforms/gpu-module-to-binary-dump.mlir +++ b/water/test/Transforms/gpu-module-to-binary-dump.mlir @@ -4,6 +4,7 @@ // RUN: test -f %t/kernel_module_linked.ll // RUN: test -f %t/kernel_module_optimized.ll // RUN: test -f %t/kernel_module.s +// RUN: test -f %t/kernel_module.hsaco // Test that the pass dumps intermediate compilation files when dump-intermediates is specified From 4154ee9c30993f6827a4257fe9b2c6fb89ba78b2 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 21:15:25 +0100 Subject: [PATCH 16/26] renamings Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 76 +++++++++++----------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 0fcc80aa1..1ac2fc1b8 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -49,7 +49,7 @@ class WaterGPUModuleToBinaryPass void runOnOperation() final; private: - LogicalResult serializeModule(GPUModuleOp module); + LogicalResult serializeModule(GPUModuleOp mod); // Helper methods std::unique_ptr loadBitcodeFile(llvm::LLVMContext &context, @@ -64,42 +64,40 @@ class WaterGPUModuleToBinaryPass llvm::TargetMachine &targetMachine); // Dump helpers - LogicalResult dumpLLVMModule(llvm::Module &mod, StringRef moduleName, + LogicalResult dumpLLVMModule(llvm::Module &mod, StringRef modName, StringRef suffix); - LogicalResult dumpText(StringRef text, StringRef moduleName, - StringRef suffix); - LogicalResult dumpBinary(ArrayRef data, StringRef moduleName, + LogicalResult dumpText(StringRef text, StringRef modName, StringRef suffix); + LogicalResult dumpBinary(ArrayRef data, StringRef modName, StringRef suffix); }; } // namespace -LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { - OpBuilder builder(module->getContext()); +LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { + OpBuilder builder(mod->getContext()); // Check if module has target attributes - if (!module.getTargetsAttr() || module.getTargetsAttr().empty()) - return module.emitError("GPU module has no target attributes"); + if (!mod.getTargetsAttr() || mod.getTargetsAttr().empty()) + return mod.emitError("GPU module has no target attributes"); // Check that there is exactly one target - if (module.getTargetsAttr().size() != 1) - return module.emitError( - "GPU module must have exactly one target attribute"); + if (mod.getTargetsAttr().size() != 1) + return mod.emitError("GPU module must have exactly one target attribute"); // Get the target attribute - Attribute targetAttr = module.getTargetsAttr()[0]; + Attribute targetAttr = mod.getTargetsAttr()[0]; if (!targetAttr) - return module.emitError("Target attribute cannot be null"); + return mod.emitError("Target attribute cannot be null"); // Step 1: Translate GPU module to LLVM IR llvm::LLVMContext llvmContext; std::unique_ptr llvmModule = - translateModuleToLLVMIR(module, llvmContext); + translateModuleToLLVMIR(mod, llvmContext); if (!llvmModule) - return module.emitError("Failed to translate GPU module to LLVM IR"); + return mod.emitError("Failed to translate GPU module to LLVM IR"); // Dump original LLVM IR - if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_original"))) + if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_original"))) return failure(); // Step 2: Load and link device libraries @@ -107,22 +105,22 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { for (const std::string &path : linkFiles) { auto lib = loadBitcodeFile(llvmContext, path); if (!lib) - return module.emitError("Failed to load bitcode file: " + path); + return mod.emitError("Failed to load bitcode file: " + path); bitcodeLibs.push_back(std::move(lib)); } if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) - return module.emitError("Failed to link bitcode libraries"); + return mod.emitError("Failed to link bitcode libraries"); // Dump linked LLVM IR - if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_linked"))) + if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_linked"))) return failure(); // Step 3: Create target machine and set data layout FailureOr targetMachine = createTargetMachine(targetAttr); if (failed(targetMachine)) - return module.emitError("Failed to create target machine"); + return mod.emitError("Failed to create target machine"); // Set the data layout and target triple to match the target machine llvmModule->setDataLayout((*targetMachine)->createDataLayout()); @@ -130,19 +128,19 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { // Step 4: Optimize LLVM IR if (failed(optimizeModule(*llvmModule, *targetMachine))) - return module.emitError("Failed to optimize LLVM IR"); + return mod.emitError("Failed to optimize LLVM IR"); // Dump optimized LLVM IR - if (failed(dumpLLVMModule(*llvmModule, module.getName(), "_optimized"))) + if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_optimized"))) return failure(); // Step 5: Compile to ISA FailureOr isa = compileToISA(*llvmModule, **targetMachine); if (failed(isa)) - return module.emitError("Failed to compile to ISA"); + return mod.emitError("Failed to compile to ISA"); // Dump ISA - if (failed(dumpText(*isa, module.getName(), ".s"))) + if (failed(dumpText(*isa, mod.getName(), ".s"))) return failure(); // Step 6: Assemble to binary @@ -151,19 +149,19 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { if (actualToolkitPath.empty()) actualToolkitPath = ROCDL::getROCMPath(); - FailureOr> binary = water::assembleISAToHSACO( - module, *isa, **targetMachine, actualToolkitPath); + FailureOr> binary = + water::assembleISAToHSACO(mod, *isa, **targetMachine, actualToolkitPath); if (failed(binary)) - return module.emitError("Failed to assemble to binary"); + return mod.emitError("Failed to assemble to binary"); SmallVector binaryData = std::move(*binary); // Dump HSACO binary - if (failed(dumpBinary(binaryData, module.getName(), ".hsaco"))) + if (failed(dumpBinary(binaryData, mod.getName(), ".hsaco"))) return failure(); // Create object attribute - Builder attrBuilder(module.getContext()); + Builder attrBuilder(mod.getContext()); StringAttr binaryAttr = attrBuilder.getStringAttr( StringRef(binaryData.data(), binaryData.size())); @@ -175,13 +173,13 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp module) { kernels); // Create gpu.binary op - builder.setInsertionPointAfter(module); - gpu::BinaryOp::create(builder, module.getLoc(), module.getName(), + builder.setInsertionPointAfter(mod); + gpu::BinaryOp::create(builder, mod.getLoc(), mod.getName(), /*offloadingHandler=*/nullptr, builder.getArrayAttr({objectAttr})); // Erase the original module - module->erase(); + mod->erase(); return success(); } @@ -292,13 +290,13 @@ WaterGPUModuleToBinaryPass::compileToISA(llvm::Module &mod, } LogicalResult WaterGPUModuleToBinaryPass::dumpLLVMModule(llvm::Module &mod, - StringRef moduleName, + StringRef modName, StringRef suffix) { if (dumpIntermediates.empty()) return success(); SmallString<128> path(dumpIntermediates); - llvm::sys::path::append(path, moduleName + suffix + ".ll"); + llvm::sys::path::append(path, modName + suffix + ".ll"); std::error_code ec; llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); @@ -313,13 +311,13 @@ LogicalResult WaterGPUModuleToBinaryPass::dumpLLVMModule(llvm::Module &mod, } LogicalResult WaterGPUModuleToBinaryPass::dumpText(StringRef text, - StringRef moduleName, + StringRef modName, StringRef suffix) { if (dumpIntermediates.empty()) return success(); SmallString<128> path(dumpIntermediates); - llvm::sys::path::append(path, moduleName + suffix); + llvm::sys::path::append(path, modName + suffix); std::error_code ec; llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); @@ -334,13 +332,13 @@ LogicalResult WaterGPUModuleToBinaryPass::dumpText(StringRef text, } LogicalResult WaterGPUModuleToBinaryPass::dumpBinary(ArrayRef data, - StringRef moduleName, + StringRef modName, StringRef suffix) { if (dumpIntermediates.empty()) return success(); SmallString<128> path(dumpIntermediates); - llvm::sys::path::append(path, moduleName + suffix); + llvm::sys::path::append(path, modName + suffix); std::error_code ec; llvm::ToolOutputFile outputFile(path, ec, llvm::sys::fs::OF_None); From 65a2256f459a689b61aefddbb27f1b4a7f39e160 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 23:19:50 +0100 Subject: [PATCH 17/26] override Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 2 + water/lib/Transforms/GPUModuleToBinary.cpp | 96 ++++++++++++++++++++-- 2 files changed, 91 insertions(+), 7 deletions(-) diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index dd3818319..40fbd2393 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -131,6 +131,8 @@ def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { "Command line options to pass to the compilation tools.">, Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}], "Directory to dump intermediate compilation files (LLVM IR, ISA).">, + Option<"overrideIntermediates", "override-intermediates", "std::string", [{""}], + "Directory containing intermediate files to use instead of generating them.">, ]; } diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 1ac2fc1b8..3d22a05aa 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -24,6 +24,7 @@ #include "llvm/Linker/Linker.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -69,6 +70,13 @@ class WaterGPUModuleToBinaryPass LogicalResult dumpText(StringRef text, StringRef modName, StringRef suffix); LogicalResult dumpBinary(ArrayRef data, StringRef modName, StringRef suffix); + + // Override helpers + FailureOr> + tryLoadOverrideLLVM(llvm::LLVMContext &context, StringRef modName, + StringRef suffix); + FailureOr> tryLoadOverrideText(StringRef modName, + StringRef suffix); }; } // namespace @@ -96,8 +104,22 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (!llvmModule) return mod.emitError("Failed to translate GPU module to LLVM IR"); - // Dump original LLVM IR - if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_original"))) + auto dumpAndOverrideLLVM = [&](StringRef suffix) -> LogicalResult { + StringRef modName = mod.getName(); + if (failed(dumpLLVMModule(*llvmModule, modName, suffix))) + return failure(); + + auto overrideLLVM = tryLoadOverrideLLVM(llvmContext, modName, suffix); + if (failed(overrideLLVM)) + return failure(); + + if (*overrideLLVM) + llvmModule = std::move(*overrideLLVM); + + return success(); + }; + // Dump/override original LLVM IR + if (failed(dumpAndOverrideLLVM("_original"))) return failure(); // Step 2: Load and link device libraries @@ -112,8 +134,8 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) return mod.emitError("Failed to link bitcode libraries"); - // Dump linked LLVM IR - if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_linked"))) + // Dump/override linked LLVM IR + if (failed(dumpAndOverrideLLVM("_linked"))) return failure(); // Step 3: Create target machine and set data layout @@ -131,7 +153,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { return mod.emitError("Failed to optimize LLVM IR"); // Dump optimized LLVM IR - if (failed(dumpLLVMModule(*llvmModule, mod.getName(), "_optimized"))) + if (failed(dumpAndOverrideLLVM("_optimized"))) return failure(); // Step 5: Compile to ISA @@ -139,8 +161,23 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (failed(isa)) return mod.emitError("Failed to compile to ISA"); - // Dump ISA - if (failed(dumpText(*isa, mod.getName(), ".s"))) + auto dumpAndOverrideISA = [&](StringRef suffix) -> LogicalResult { + StringRef modName = mod.getName(); + if (failed(dumpText(*isa, modName, suffix))) + return failure(); + + auto overrideISA = tryLoadOverrideText(modName, suffix); + if (failed(overrideISA)) + return failure(); + + if (*overrideISA) + isa = std::move(**overrideISA); + + return success(); + }; + + // Dump/override ISA + if (failed(dumpAndOverrideISA(".s"))) return failure(); // Step 6: Assemble to binary @@ -352,6 +389,51 @@ LogicalResult WaterGPUModuleToBinaryPass::dumpBinary(ArrayRef data, return success(); } +FailureOr> +WaterGPUModuleToBinaryPass::tryLoadOverrideLLVM(llvm::LLVMContext &context, + StringRef modName, + StringRef suffix) { + if (overrideIntermediates.empty()) + return std::unique_ptr(nullptr); + + SmallString<128> path(overrideIntermediates); + llvm::sys::path::append(path, modName + suffix + ".ll"); + + if (!llvm::sys::fs::exists(path)) + return std::unique_ptr(nullptr); + + llvm::SMDiagnostic error; + std::unique_ptr module = + llvm::parseIRFile(path, error, context); + if (!module) + return getOperation()->emitError() + << "Failed to load override LLVM IR from " << path << ": " + << error.getMessage(); + + return module; +} + +FailureOr> +WaterGPUModuleToBinaryPass::tryLoadOverrideText(StringRef modName, + StringRef suffix) { + if (overrideIntermediates.empty()) + return std::optional(std::nullopt); + + SmallString<128> path(overrideIntermediates); + llvm::sys::path::append(path, modName + suffix); + + if (!llvm::sys::fs::exists(path)) + return std::optional(std::nullopt); + + auto bufferOrError = llvm::MemoryBuffer::getFile(path); + if (!bufferOrError) + return getOperation()->emitError() + << "Failed to load override file from " << path << ": " + << bufferOrError.getError().message(); + + return std::optional(bufferOrError.get()->getBuffer().str()); +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances for (Region ®ion : getOperation()->getRegions()) { From d2e9e662eb1da9b0024a170a911774206e7e419f Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 23:31:14 +0100 Subject: [PATCH 18/26] test Signed-off-by: Ivan Butygin --- .../gpu-module-to-binary-override.mlir | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 water/test/Transforms/gpu-module-to-binary-override.mlir diff --git a/water/test/Transforms/gpu-module-to-binary-override.mlir b/water/test/Transforms/gpu-module-to-binary-override.mlir new file mode 100644 index 000000000..f34e03ec4 --- /dev/null +++ b/water/test/Transforms/gpu-module-to-binary-override.mlir @@ -0,0 +1,23 @@ +// RUN: rm -rf %t && mkdir -p %t/dump1 %t/dump2 %t/override +// RUN: water-opt %s --water-gpu-module-to-binary="dump-intermediates=%t/dump1" | FileCheck %s +// RUN: cp %t/dump1/kernel_module_linked.ll %t/override/kernel_module_linked.ll +// RUN: sed -i 's/i32/i64/g' %t/override/kernel_module_linked.ll +// RUN: water-opt %s --water-gpu-module-to-binary="dump-intermediates=%t/dump2 override-intermediates=%t/override" | FileCheck %s +// RUN: grep "define.*i64" %t/dump2/kernel_module_optimized.ll + +// Test that override-intermediates works by: +// 1. First run dumps all intermediates to dump1 +// 2. Copy linked LLVM IR to override directory and modify it (i32 -> i64) +// 3. Second run uses the modified linked IR from override directory +// 4. Verify the modification appears in the optimized IR (next stage after linked) + +// CHECK-LABEL: module attributes {gpu.container_module} +module attributes {gpu.container_module} { + // CHECK-NOT: gpu.module + // CHECK: gpu.binary @kernel_module [#gpu.object<#rocdl.target, bin = + gpu.module @kernel_module [#rocdl.target] { + llvm.func @simple_kernel(%arg0: i32) attributes {gpu.kernel} { + llvm.return + } + } +} From 7bc13384055e7bb814d9cdb4be1ccd91c6a8635d Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 23:48:00 +0100 Subject: [PATCH 19/26] create dump dir Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index 3d22a05aa..d9087fab4 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -104,6 +104,15 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (!llvmModule) return mod.emitError("Failed to translate GPU module to LLVM IR"); + // Create dump directory if specified + if (!dumpIntermediates.empty()) { + std::error_code ec = llvm::sys::fs::create_directories(dumpIntermediates); + if (ec) + return mod.emitError() + << "Failed to create dump directory: " << dumpIntermediates << ": " + << ec.message(); + } + auto dumpAndOverrideLLVM = [&](StringRef suffix) -> LogicalResult { StringRef modName = mod.getName(); if (failed(dumpLLVMModule(*llvmModule, modName, suffix))) From df0f21c583cd5379b0fce09b58002edd8d2835f3 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 23:53:49 +0100 Subject: [PATCH 20/26] cleanup Signed-off-by: Ivan Butygin --- water/lib/Transforms/AssembleISA.cpp | 14 +++--- water/lib/Transforms/GPUModuleToBinary.cpp | 54 +++++++++++----------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/water/lib/Transforms/AssembleISA.cpp b/water/lib/Transforms/AssembleISA.cpp index d18361fe7..0d6db2c7e 100644 --- a/water/lib/Transforms/AssembleISA.cpp +++ b/water/lib/Transforms/AssembleISA.cpp @@ -50,7 +50,7 @@ assembleISAToHSACO(Operation *op, StringRef isa, llvm::TargetMachine &targetMachine, StringRef toolkitPath) { initializeAMDGPUTarget(); - // Step 1: Assemble ISA to object file using MC infrastructure + // Step 1: Assemble ISA to object file using MC infrastructure. llvm::Triple triple = targetMachine.getTargetTriple(); std::string error; const llvm::Target *target = @@ -58,7 +58,7 @@ assembleISAToHSACO(Operation *op, StringRef isa, if (!target) return op->emitError() << "Failed to lookup target: " << error; - // Set up MC infrastructure + // Set up MC infrastructure. llvm::SourceMgr srcMgr; srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), llvm::SMLoc()); @@ -99,8 +99,8 @@ assembleISAToHSACO(Operation *op, StringRef isa, parser->setTargetParser(*tap); parser->Run(false); - // Step 2: Link object file to create HSACO - // Write object to temporary file + // Step 2: Link object file to create HSACO. + // Write object to temporary file. int tempObjFd = -1; SmallString<128> tempObjFilename; if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempObjFd, @@ -114,14 +114,14 @@ assembleISAToHSACO(Operation *op, StringRef isa, tempObjOs.flush(); } - // Create temporary file for HSACO + // Create temporary file for HSACO. SmallString<128> tempHsacoFilename; if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFilename)) return op->emitError("Failed to create temporary file for HSACO"); llvm::FileRemover cleanupHsaco(tempHsacoFilename); - // Link using ld.lld + // Link using ld.lld. SmallString<128> lldPath(toolkitPath); llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); int lldResult = llvm::sys::ExecuteAndWait( @@ -129,7 +129,7 @@ assembleISAToHSACO(Operation *op, StringRef isa, if (lldResult != 0) return op->emitError("ld.lld invocation failed"); - // Read HSACO file + // Read HSACO file. auto hsacoFile = llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); if (!hsacoFile) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index d9087fab4..c2a8fdb1b 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -81,22 +81,20 @@ class WaterGPUModuleToBinaryPass } // namespace LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { - OpBuilder builder(mod->getContext()); - - // Check if module has target attributes + // Check if module has target attributes. if (!mod.getTargetsAttr() || mod.getTargetsAttr().empty()) return mod.emitError("GPU module has no target attributes"); - // Check that there is exactly one target + // Check that there is exactly one target. if (mod.getTargetsAttr().size() != 1) return mod.emitError("GPU module must have exactly one target attribute"); - // Get the target attribute + // Get the target attribute. Attribute targetAttr = mod.getTargetsAttr()[0]; if (!targetAttr) return mod.emitError("Target attribute cannot be null"); - // Step 1: Translate GPU module to LLVM IR + // Step 1: Translate GPU module to LLVM IR. llvm::LLVMContext llvmContext; std::unique_ptr llvmModule = translateModuleToLLVMIR(mod, llvmContext); @@ -104,7 +102,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (!llvmModule) return mod.emitError("Failed to translate GPU module to LLVM IR"); - // Create dump directory if specified + // Create dump directory if specified. if (!dumpIntermediates.empty()) { std::error_code ec = llvm::sys::fs::create_directories(dumpIntermediates); if (ec) @@ -127,11 +125,11 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { return success(); }; - // Dump/override original LLVM IR + // Dump/override original LLVM IR. if (failed(dumpAndOverrideLLVM("_original"))) return failure(); - // Step 2: Load and link device libraries + // Step 2: Load and link device libraries. SmallVector> bitcodeLibs; for (const std::string &path : linkFiles) { auto lib = loadBitcodeFile(llvmContext, path); @@ -143,29 +141,29 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { if (failed(linkBitcodeFiles(*llvmModule, std::move(bitcodeLibs)))) return mod.emitError("Failed to link bitcode libraries"); - // Dump/override linked LLVM IR + // Dump/override linked LLVM IR. if (failed(dumpAndOverrideLLVM("_linked"))) return failure(); - // Step 3: Create target machine and set data layout + // Step 3: Create target machine and set data layout. FailureOr targetMachine = createTargetMachine(targetAttr); if (failed(targetMachine)) return mod.emitError("Failed to create target machine"); - // Set the data layout and target triple to match the target machine + // Set the data layout and target triple to match the target machine. llvmModule->setDataLayout((*targetMachine)->createDataLayout()); llvmModule->setTargetTriple((*targetMachine)->getTargetTriple()); - // Step 4: Optimize LLVM IR + // Step 4: Optimize LLVM IR. if (failed(optimizeModule(*llvmModule, *targetMachine))) return mod.emitError("Failed to optimize LLVM IR"); - // Dump optimized LLVM IR + // Dump optimized LLVM IR. if (failed(dumpAndOverrideLLVM("_optimized"))) return failure(); - // Step 5: Compile to ISA + // Step 5: Compile to ISA. FailureOr isa = compileToISA(*llvmModule, **targetMachine); if (failed(isa)) return mod.emitError("Failed to compile to ISA"); @@ -185,12 +183,12 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { return success(); }; - // Dump/override ISA + // Dump/override ISA. if (failed(dumpAndOverrideISA(".s"))) return failure(); - // Step 6: Assemble to binary - // Use ROCM_PATH environment variable if toolkitPath is not provided + // Step 6: Assemble to binary. + // Use ROCM_PATH environment variable if toolkitPath is not provided. StringRef actualToolkitPath = toolkitPath; if (actualToolkitPath.empty()) actualToolkitPath = ROCDL::getROCMPath(); @@ -202,11 +200,11 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { SmallVector binaryData = std::move(*binary); - // Dump HSACO binary + // Dump HSACO binary. if (failed(dumpBinary(binaryData, mod.getName(), ".hsaco"))) return failure(); - // Create object attribute + // Create object attribute. Builder attrBuilder(mod.getContext()); StringAttr binaryAttr = attrBuilder.getStringAttr( StringRef(binaryData.data(), binaryData.size())); @@ -218,13 +216,14 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { targetAttr, gpu::CompilationTarget::Binary, binaryAttr, properties, kernels); - // Create gpu.binary op + // Create gpu.binary op. + OpBuilder builder(mod.getContext()); builder.setInsertionPointAfter(mod); gpu::BinaryOp::create(builder, mod.getLoc(), mod.getName(), /*offloadingHandler=*/nullptr, builder.getArrayAttr({objectAttr})); - // Erase the original module + // Erase the original module. mod->erase(); return success(); } @@ -250,7 +249,7 @@ LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( llvm::Linker linker(mod); for (std::unique_ptr &libModule : libs) { - // Link the library, importing only needed symbols + // Link the library, importing only needed symbols. bool err = linker.linkInModule( std::move(libModule), llvm::Linker::Flags::LinkOnlyNeeded, [](llvm::Module &m, const StringSet<> &gvs) { @@ -271,7 +270,6 @@ FailureOr WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { water::initializeAMDGPUTarget(); - // Check if this is a ROCDL target auto rocdlTarget = dyn_cast(targetAttr); if (!rocdlTarget) return getOperation()->emitError( @@ -293,7 +291,7 @@ WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { if (!targetMachine) return getOperation()->emitError("Failed to create target machine"); - // Set optimization level from target attribute + // Set optimization level from target attribute. targetMachine->setOptLevel( static_cast(rocdlTarget.getO())); @@ -303,7 +301,7 @@ WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { LogicalResult WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &mod, llvm::TargetMachine *targetMachine) { - // Get optimization level from target machine + // Get optimization level from target machine. int optLevel = static_cast(targetMachine->getOptLevel()); auto transformer = @@ -444,10 +442,10 @@ WaterGPUModuleToBinaryPass::tryLoadOverrideText(StringRef modName, } void WaterGPUModuleToBinaryPass::runOnOperation() { - // Walk all regions and blocks looking for GPUModuleOp instances + // Walk all regions and blocks looking for GPUModuleOp instances. for (Region ®ion : getOperation()->getRegions()) { for (Block &block : region.getBlocks()) { - // Use early_inc_range since we're erasing modules during iteration + // Use early_inc_range since we're erasing modules during iteration. for (auto module : llvm::make_early_inc_range(block.getOps())) { if (failed(serializeModule(module))) From 421a360afd895c84f09d919671ecb4debede3d18 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Wed, 3 Dec 2025 23:58:06 +0100 Subject: [PATCH 21/26] cleanup Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 2 +- water/lib/Transforms/GPUModuleToBinary.cpp | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index 40fbd2393..fa27c61d4 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -120,7 +120,7 @@ def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { and serializes them to binary format, producing a GPU binary operation. This is a simplified version of the upstream gpu-module-to-binary pass, - tailored for the Water project. Currently supports ROCDL targets only. + tailored for the Wave project. Currently supports ROCDL targets only. }]; let options = [ Option<"toolkitPath", "toolkit", "std::string", [{""}], diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index c2a8fdb1b..b423d55bd 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -81,12 +81,8 @@ class WaterGPUModuleToBinaryPass } // namespace LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { - // Check if module has target attributes. - if (!mod.getTargetsAttr() || mod.getTargetsAttr().empty()) - return mod.emitError("GPU module has no target attributes"); - // Check that there is exactly one target. - if (mod.getTargetsAttr().size() != 1) + if (!mod.getTargetsAttr() || mod.getTargetsAttr().size() != 1) return mod.emitError("GPU module must have exactly one target attribute"); // Get the target attribute. From 91f8555197280b7b1d6fca8a9d046d69170be1f2 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 4 Dec 2025 00:14:29 +0100 Subject: [PATCH 22/26] override hsaco Signed-off-by: Ivan Butygin --- water/lib/Transforms/GPUModuleToBinary.cpp | 44 +++++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index b423d55bd..bfa08e24e 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -77,6 +77,8 @@ class WaterGPUModuleToBinaryPass StringRef suffix); FailureOr> tryLoadOverrideText(StringRef modName, StringRef suffix); + FailureOr>> + tryLoadOverrideBinary(StringRef modName, StringRef suffix); }; } // namespace @@ -196,8 +198,23 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { SmallVector binaryData = std::move(*binary); - // Dump HSACO binary. - if (failed(dumpBinary(binaryData, mod.getName(), ".hsaco"))) + auto dumpAndOverrideBinary = [&](StringRef suffix) -> LogicalResult { + StringRef modName = mod.getName(); + if (failed(dumpBinary(binaryData, modName, suffix))) + return failure(); + + auto overrideBinary = tryLoadOverrideBinary(modName, suffix); + if (failed(overrideBinary)) + return failure(); + + if (*overrideBinary) + binaryData = std::move(**overrideBinary); + + return success(); + }; + + // Dump/override HSACO binary. + if (failed(dumpAndOverrideBinary(".hsaco"))) return failure(); // Create object attribute. @@ -437,6 +454,29 @@ WaterGPUModuleToBinaryPass::tryLoadOverrideText(StringRef modName, return std::optional(bufferOrError.get()->getBuffer().str()); } +FailureOr>> +WaterGPUModuleToBinaryPass::tryLoadOverrideBinary(StringRef modName, + StringRef suffix) { + if (overrideIntermediates.empty()) + return std::optional>(std::nullopt); + + SmallString<128> path(overrideIntermediates); + llvm::sys::path::append(path, modName + suffix); + + if (!llvm::sys::fs::exists(path)) + return std::optional>(std::nullopt); + + auto bufferOrError = llvm::MemoryBuffer::getFile(path); + if (!bufferOrError) + return getOperation()->emitError() + << "Failed to load override binary from " << path << ": " + << bufferOrError.getError().message(); + + StringRef data = bufferOrError.get()->getBuffer(); + SmallVector result(data.begin(), data.end()); + return std::optional>(std::move(result)); +} + void WaterGPUModuleToBinaryPass::runOnOperation() { // Walk all regions and blocks looking for GPUModuleOp instances. for (Region ®ion : getOperation()->getRegions()) { From e00427ed76f2c70749cd5239acd1929095dcef25 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 4 Dec 2025 00:20:47 +0100 Subject: [PATCH 23/26] update test Signed-off-by: Ivan Butygin --- water/test/Transforms/gpu-module-to-binary-dump.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/water/test/Transforms/gpu-module-to-binary-dump.mlir b/water/test/Transforms/gpu-module-to-binary-dump.mlir index e36553739..b21329299 100644 --- a/water/test/Transforms/gpu-module-to-binary-dump.mlir +++ b/water/test/Transforms/gpu-module-to-binary-dump.mlir @@ -1,4 +1,4 @@ -// RUN: rm -rf %t && mkdir -p %t +// RUN: rm -rf %t // RUN: water-opt %s --water-gpu-module-to-binary="dump-intermediates=%t" | FileCheck %s // RUN: test -f %t/kernel_module_original.ll // RUN: test -f %t/kernel_module_linked.ll From 9b27a3f6d17bf1812e49acb16127725e0b345efe Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 4 Dec 2025 00:41:14 +0100 Subject: [PATCH 24/26] add lib Signed-off-by: Ivan Butygin --- water/lib/Transforms/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/water/lib/Transforms/CMakeLists.txt b/water/lib/Transforms/CMakeLists.txt index d81eec243..7ad8c3f99 100644 --- a/water/lib/Transforms/CMakeLists.txt +++ b/water/lib/Transforms/CMakeLists.txt @@ -22,6 +22,7 @@ add_mlir_dialect_library(MLIRWaterTransforms MLIRLLVMDialect MLIRMemRefDialect MLIRPass + MLIRROCDLTarget MLIRRewrite MLIRTransformUtils MLIRVectorDialect From 78160aec0613e93146cb807289516cfe46931b28 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 4 Dec 2025 00:43:44 +0100 Subject: [PATCH 25/26] cleanup Signed-off-by: Ivan Butygin --- water/include/water/Transforms/Passes.td | 2 -- 1 file changed, 2 deletions(-) diff --git a/water/include/water/Transforms/Passes.td b/water/include/water/Transforms/Passes.td index fa27c61d4..3263951fd 100644 --- a/water/include/water/Transforms/Passes.td +++ b/water/include/water/Transforms/Passes.td @@ -127,8 +127,6 @@ def WaterGPUModuleToBinary : Pass<"water-gpu-module-to-binary", ""> { "Toolkit path.">, ListOption<"linkFiles", "l", "std::string", "Extra bitcode files to link to.">, - Option<"cmdOptions", "opts", "std::string", [{""}], - "Command line options to pass to the compilation tools.">, Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}], "Directory to dump intermediate compilation files (LLVM IR, ISA).">, Option<"overrideIntermediates", "override-intermediates", "std::string", [{""}], From 7f134f684b2b221cf00743ecb76573e178973f57 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 4 Dec 2025 01:16:42 +0100 Subject: [PATCH 26/26] fixes Signed-off-by: Ivan Butygin --- water/lib/Transforms/AssembleISA.cpp | 3 ++- water/lib/Transforms/GPUModuleToBinary.cpp | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/water/lib/Transforms/AssembleISA.cpp b/water/lib/Transforms/AssembleISA.cpp index 0d6db2c7e..6a931e7ff 100644 --- a/water/lib/Transforms/AssembleISA.cpp +++ b/water/lib/Transforms/AssembleISA.cpp @@ -97,7 +97,8 @@ assembleISAToHSACO(Operation *op, StringRef isa, return op->emitError("Assembler initialization error"); parser->setTargetParser(*tap); - parser->Run(false); + if (parser->Run(false)) + return op->emitError("Assembly parsing failed"); // Step 2: Link object file to create HSACO. // Write object to temporary file. diff --git a/water/lib/Transforms/GPUModuleToBinary.cpp b/water/lib/Transforms/GPUModuleToBinary.cpp index bfa08e24e..f2fd8f970 100644 --- a/water/lib/Transforms/GPUModuleToBinary.cpp +++ b/water/lib/Transforms/GPUModuleToBinary.cpp @@ -17,6 +17,7 @@ #include "mlir/Target/LLVM/ROCDL/Utils.h" #include "mlir/Target/LLVMIR/Export.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -58,9 +59,10 @@ class WaterGPUModuleToBinaryPass LogicalResult linkBitcodeFiles(llvm::Module &mod, SmallVector> &&libs); - FailureOr createTargetMachine(Attribute targetAttr); + FailureOr> + createTargetMachine(Attribute targetAttr); LogicalResult optimizeModule(llvm::Module &mod, - llvm::TargetMachine *targetMachine); + llvm::TargetMachine &targetMachine); FailureOr compileToISA(llvm::Module &mod, llvm::TargetMachine &targetMachine); @@ -144,7 +146,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { return failure(); // Step 3: Create target machine and set data layout. - FailureOr targetMachine = + FailureOr> targetMachine = createTargetMachine(targetAttr); if (failed(targetMachine)) return mod.emitError("Failed to create target machine"); @@ -154,7 +156,7 @@ LogicalResult WaterGPUModuleToBinaryPass::serializeModule(GPUModuleOp mod) { llvmModule->setTargetTriple((*targetMachine)->getTargetTriple()); // Step 4: Optimize LLVM IR. - if (failed(optimizeModule(*llvmModule, *targetMachine))) + if (failed(optimizeModule(*llvmModule, **targetMachine))) return mod.emitError("Failed to optimize LLVM IR"); // Dump optimized LLVM IR. @@ -279,7 +281,7 @@ LogicalResult WaterGPUModuleToBinaryPass::linkBitcodeFiles( return success(); } -FailureOr +FailureOr> WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { water::initializeAMDGPUTarget(); @@ -308,17 +310,17 @@ WaterGPUModuleToBinaryPass::createTargetMachine(Attribute targetAttr) { targetMachine->setOptLevel( static_cast(rocdlTarget.getO())); - return targetMachine.release(); + return targetMachine; } LogicalResult WaterGPUModuleToBinaryPass::optimizeModule(llvm::Module &mod, - llvm::TargetMachine *targetMachine) { + llvm::TargetMachine &targetMachine) { // Get optimization level from target machine. - int optLevel = static_cast(targetMachine->getOptLevel()); + int optLevel = static_cast(targetMachine.getOptLevel()); auto transformer = - makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, targetMachine); + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); auto error = transformer(&mod); if (error) { InFlightDiagnostic mlirError = getOperation()->emitError();