[Target] Add target to all TVM callbacks (apache#14939)

junrushao · web-flow · commit e11913be06b3 · 2023-05-25T09:10:41.000+03:00
* [Target] Add target to all TVM callbacks

This PR adds an extra parameter `target` to all `tvm_callback_*` so that
the callback can decide its own behavior by querying which target to
compile against.

* fix lint

* fix lint
diff --git a/apps/ios_rpc/tests/ios_rpc_mobilenet.py b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
@@ -15,24 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import tvm
-from tvm import rpc, relay
-from tvm.contrib.download import download_testdata
-from tvm.relay.expr_functor import ExprMutator
-from tvm.relay import transform
-from tvm.relay.op.annotation import compiler_begin, compiler_end
-from tvm.relay.quantize.quantize import prerequisite_optimize
-from tvm.contrib import utils, xcode, graph_executor, coreml_runtime
-from tvm.contrib.target import coreml as _coreml
-
+import argparse
 import os
 import re
 import sys
+
+import coremltools
 import numpy as np
+import tvm
 from mxnet import gluon
 from PIL import Image
-import coremltools
-import argparse
+from tvm import relay, rpc
+from tvm.contrib import coreml_runtime, graph_executor, utils, xcode
+from tvm.contrib.download import download_testdata
+from tvm.contrib.target import coreml as _coreml
+from tvm.relay import transform
+from tvm.relay.expr_functor import ExprMutator
+from tvm.relay.op.annotation import compiler_begin, compiler_end
+from tvm.relay.quantize.quantize import prerequisite_optimize
 
 # Change target configuration, this is setting for iphone6s
 # arch = "x86_64"
@@ -43,9 +43,10 @@
 
 MODES = {"proxy": rpc.connect, "tracker": rpc.connect_tracker, "standalone": rpc.connect}
 
+
 # override metal compiler to compile to iphone
 @tvm.register_func("tvm_callback_metal_compile")
-def compile_metal(src):
+def compile_metal(src, target):
     return xcode.compile_metal(src, sdk=sdk)
 
 
diff --git a/apps/ios_rpc/tests/ios_rpc_test.py b/apps/ios_rpc/tests/ios_rpc_test.py
@@ -20,15 +20,15 @@
 And configure the proxy host field as commented.
 """
 
-import tvm
-from tvm import te
+import argparse
 import os
 import re
 import sys
-from tvm import rpc
-from tvm.contrib import utils, xcode
+
 import numpy as np
-import argparse
+import tvm
+from tvm import rpc, te
+from tvm.contrib import utils, xcode
 
 # Change target configuration, this is setting for iphone6s
 arch = "arm64"
@@ -37,9 +37,10 @@
 
 MODES = {"proxy": rpc.connect, "tracker": rpc.connect_tracker, "standalone": rpc.connect}
 
+
 # override metal compiler to compile to iphone
 @tvm.register_func("tvm_callback_metal_compile")
-def compile_metal(src):
+def compile_metal(src, target):
     return xcode.compile_metal(src, sdk=sdk)
 
 
diff --git a/apps/topi_recipe/broadcast/test_broadcast_map.py b/apps/topi_recipe/broadcast/test_broadcast_map.py
@@ -15,20 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
+
+import numpy as np
 import tvm
-from tvm import te
+from tvm import te, topi
 from tvm.contrib import nvcc
-import numpy as np
-
-from tvm import topi
-
 
 TASK = "reduce_map"
 USE_MANUAL_CODE = False
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
 
@@ -39,7 +37,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/apps/topi_recipe/conv/depthwise_conv2d_test.py b/apps/topi_recipe/conv/depthwise_conv2d_test.py
@@ -15,25 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
-import tvm
-from tvm import te
+
 import numpy as np
+import tvm
 from scipy import signal
+from tvm import te, topi
 from tvm.contrib import nvcc
-
-from tvm import topi
-from tvm.topi.utils import get_const_tuple
 from tvm.topi.cuda.depthwise_conv2d import (
     schedule_depthwise_conv2d_nchw,
     schedule_depthwise_conv2d_nhwc,
 )
+from tvm.topi.utils import get_const_tuple
 
 TASK = "depthwise_conv2d"
 USE_MANUAL_CODE = False
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
 
@@ -44,7 +43,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/apps/topi_recipe/conv/test_conv2d_hwcn_map.py b/apps/topi_recipe/conv/test_conv2d_hwcn_map.py
@@ -16,20 +16,19 @@
 # under the License.
 """Example code to do convolution."""
 import os
+
 import numpy as np
-import scipy.signal
 import tvm
-from tvm import te
+from tvm import te, topi
 from tvm.contrib import nvcc
-from tvm import topi
 from tvm.topi.utils import get_const_tuple
 
 TASK = "conv2d_hwcn_map"
 USE_MANUAL_CODE = False
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
 
@@ -40,7 +39,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/apps/topi_recipe/reduce/test_reduce_map.py b/apps/topi_recipe/reduce/test_reduce_map.py
@@ -15,13 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
+
+import numpy as np
 import tvm
-from tvm import te
+from tvm import te, topi
 from tvm.contrib import nvcc
-import numpy as np
-
-from tvm import topi
-
 
 TASK = "reduce_map"
 USE_MANUAL_CODE = False
@@ -33,7 +31,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/apps/topi_recipe/rnn/lstm.py b/apps/topi_recipe/rnn/lstm.py
@@ -15,11 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 """LSTM Example, still work in progress.."""
+import os
+
+import numpy as np
 import tvm
 from tvm import te
-import os
 from tvm.contrib import nvcc
-import numpy as np
 
 # Quick knobs
 TASK = "lstm"
@@ -31,7 +32,7 @@
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     """Use nvcc compiler for better perf."""
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
@@ -43,7 +44,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/apps/topi_recipe/rnn/matexp.py b/apps/topi_recipe/rnn/matexp.py
@@ -23,13 +23,14 @@
 X[t] = dot(X[t-1], W)
 ```
 """
+import argparse
+import os
+import time
+
+import numpy as np
 import tvm
 from tvm import te
-import time
-import os
-import argparse
 from tvm.contrib import nvcc
-import numpy as np
 
 # Quick knobs
 TASK = "matexp"
@@ -40,7 +41,7 @@
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     """Use nvcc compiler for better perf."""
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
@@ -52,7 +53,7 @@ def write_code(code, fname):
 
 
 @tvm.register_func
-def tvm_callback_cuda_postproc(code):
+def tvm_callback_cuda_postproc(code, target):
     if not os.path.exists("perf"):
         os.mkdir("perf")
     write_code(code, "perf/%s_generated.cu" % TASK)
diff --git a/jvm/core/src/test/scripts/test_add_gpu.py b/jvm/core/src/test/scripts/test_add_gpu.py
@@ -18,11 +18,11 @@
 
 import tvm
 from tvm import te
-from tvm.contrib import cc, utils, nvcc
+from tvm.contrib import cc, nvcc, utils
 
 
 @tvm.register_func("tvm_callback_cuda_compile", override=True)
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):
     ptx = nvcc.compile_cuda(code, target_format="ptx")
     return ptx
 
diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
@@ -18,15 +18,15 @@
 """Utility to invoke nvcc compiler in the system"""
 from __future__ import absolute_import as _abs
 
-import subprocess
 import os
+import subprocess
 import warnings
 
 import tvm._ffi
 from tvm.target import Target
 
-from . import utils
 from .._ffi.base import py_str
+from . import utils
 
 
 def compile_cuda(code, target_format="ptx", arch=None, options=None, path_target=None):
@@ -184,7 +184,7 @@ def get_cuda_version(cuda_path=None):
 
 
 @tvm._ffi.register_func
-def tvm_callback_cuda_compile(code):
+def tvm_callback_cuda_compile(code, target):  # pylint: disable=unused-argument
     """use nvcc to generate fatbin code for better optimization"""
     ptx = compile_cuda(code, target_format="fatbin")
     return ptx
diff --git a/python/tvm/contrib/sdaccel.py b/python/tvm/contrib/sdaccel.py
@@ -15,15 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 """Utility for Interacting with SDAccel Tools"""
-import subprocess
 import os
+import subprocess
 
 import tvm._ffi
+
 from . import utils
 
 
 @tvm._ffi.register_func("tvm_callback_sdaccel_compile")
-def compile_vhls(kernel_info, device_name):
+def compile_vhls(kernel_info, target):
     """Compile Vivado HLS code for SDAccel.
 
     Parameters
@@ -32,14 +33,15 @@ def compile_vhls(kernel_info, device_name):
         List of kernel information.  The kernel information is a tuple of
         function name and source code.
 
-    device_name : str
-        The name of the target device
+    target : tvm.target.Target
+        The compilation target
 
     Return
     ------
     xclbin : bytearray
         The bytearray of the xclbin
     """
+    device_name = target.attrs.get("device", "")
     tmp_dir = utils.tempdir()
 
     sdk = os.environ.get("XILINX_SDX", None)
diff --git a/src/target/opt/build_cuda_on.cc b/src/target/opt/build_cuda_on.cc
@@ -143,14 +143,14 @@ runtime::Module BuildCUDA(IRModule mod, Target target) {
   std::string code = cg.Finish();
 
   if (const auto* f = Registry::Get("tvm_callback_cuda_postproc")) {
-    code = (*f)(code).operator std::string();
+    code = (*f)(code, target).operator std::string();
   }
   std::string fmt = "ptx";
   std::string ptx;
   const auto* f_enter = Registry::Get("target.TargetEnterScope");
   (*f_enter)(target);
   if (const auto* f = Registry::Get("tvm_callback_cuda_compile")) {
-    ptx = (*f)(code).operator std::string();
+    ptx = (*f)(code, target).operator std::string();
     // Dirty matching to check PTX vs cubin.
     // TODO(tqchen) more reliable checks
     if (ptx[0] != '/') fmt = "cubin";
diff --git a/src/target/source/codegen_aocl.cc b/src/target/source/codegen_aocl.cc
@@ -51,7 +51,7 @@ runtime::Module BuildAOCL(IRModule mod, Target target, bool emulation) {
 
   std::string code = cg.Finish();
   if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {
-    code = (*f)(code).operator std::string();
+    code = (*f)(code, target).operator std::string();
   }
 
   // Write a .cl file.
diff --git a/src/target/source/codegen_metal.cc b/src/target/source/codegen_metal.cc
@@ -365,7 +365,7 @@ runtime::Module BuildMetal(IRModule mod, Target target) {
     std::string fsource = cg.Finish();
     source_maker << fsource << "\n";
     if (fmetal_compile) {
-      fsource = (*fmetal_compile)(fsource).operator std::string();
+      fsource = (*fmetal_compile)(fsource, target).operator std::string();
     }
     smap[func_name] = fsource;
   }
diff --git a/src/target/source/codegen_opencl.cc b/src/target/source/codegen_opencl.cc
diff --git a/src/target/source/codegen_vhls.cc b/src/target/source/codegen_vhls.cc
diff --git a/src/target/spirv/spirv_utils.cc b/src/target/spirv/spirv_utils.cc
diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py
diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py
diff --git a/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py b/tests/python/unittest/test_tir_transform_inject_ptx_async_copy.py

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ runtime::Module BuildAOCL(IRModule mod, Target target, bool emulation) {`
`51`	`51`
`52`	`52`	`std::string code = cg.Finish();`
`53`	`53`	`if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {`
`54`		`- code = (*f)(code).operator std::string();`
	`54`	`+ code = (*f)(code, target).operator std::string();`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`// Write a .cl file.`
Original file line number	Diff line number	Diff line change
`@@ -365,7 +365,7 @@ runtime::Module BuildMetal(IRModule mod, Target target) {`
`365`	`365`	`std::string fsource = cg.Finish();`
`366`	`366`	`source_maker << fsource << "\n";`
`367`	`367`	`if (fmetal_compile) {`
`368`		`- fsource = (*fmetal_compile)(fsource).operator std::string();`
	`368`	`+ fsource = (*fmetal_compile)(fsource, target).operator std::string();`
`369`	`369`	`}`
`370`	`370`	`smap[func_name] = fsource;`
`371`	`371`	`}`