Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,3 @@
[submodule "submodules/aiter"]
path = submodules/aiter
url = https://github.com/ROCm/aiter.git
[submodule "submodules/quack"]
path = submodules/quack
url = https://github.com/Dao-AILab/quack.git
1 change: 0 additions & 1 deletion submodules/quack
Submodule quack deleted from a42fef
20 changes: 19 additions & 1 deletion tools/quack/install.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
import os
import shutil
import subprocess

from pathlib import Path


REPO_PATH = Path(os.path.abspath(__file__)).parent.parent.parent
CURRENT_DIR = Path(os.path.abspath(__file__)).parent
QUACK_PATH = REPO_PATH.joinpath("submodules", "quack")

QUACK_REPO = "https://github.com/Dao-AILab/quack.git"
QUACK_SHA = "bceb632dbac9bb0b55d48a7ed3ad204bd952fcb2"

QUACK_INSTALL_PATH = REPO_PATH.joinpath(".install")


def install_quack():
cmd = ["pip", "install", "-e", "."]
subprocess.check_call(cmd, cwd=QUACK_PATH)


def install_quack():
QUACK_INSTALL_PATH.mkdir(parents=True, exist_ok=True)
quack_path = QUACK_INSTALL_PATH.joinpath("quack")
if quack_path.exists():
shutil.rmtree(quack_path)
git_clone_cmd = ["git", "clone", QUACK_REPO]
subprocess.check_call(git_clone_cmd, cwd=QUACK_INSTALL_PATH)
git_checkout_cmd = ["git", "checkout", QUACK_SHA]
subprocess.check_call(git_checkout_cmd, cwd=quack_path)
install_helion_cmd = ["pip", "install", "-e", ".[dev]"]
subprocess.check_call(install_helion_cmd, cwd=quack_path)
18 changes: 18 additions & 0 deletions tritonbench/operators/launch_latency/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def nop_cutedsl(self, *args):
cute_args = cute_args[:-5]
return lambda: kernel(*cute_args)

@register_benchmark(enabled=HAS_CUTEDSL)
def nop_cutedsl_tvm_ffi(self, *args):
if len(args) == 0:
kernel = cute.compile(cutedsl_nop_kernel)
return lambda: kernel()
cute_args = []
for arg in args:
if isinstance(arg, torch.Tensor):
cute_args.append(cute.runtime.from_dlpack(arg, enable_tvm_ffi=True))
else:
cute_args.append(arg)
kernel = cute.compile(
cutedsl_nop_with_args_kernel, *cute_args, options="--enable-tvm-ffi"
)
# remove constexpr args
cute_args = cute_args[:-5]
return lambda: kernel(*cute_args)

@register_benchmark(baseline=True)
def nop_python_function(self, *args):
def nop():
Expand Down
3 changes: 3 additions & 0 deletions tritonbench/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,4 +416,7 @@ def get_parser(args=None):
)
if args.isolate:
parser.error("A/B testing is not compatible with --isolate mode")

if args.metrics and "walltime_kineto_trace" in args.metrics and args.repcnt is None:
parser.error("Walltime Kineto trace requires --repcnt to be specified")
return parser
Loading