diff --git a/src/gt4py/next/metrics.py b/src/gt4py/next/metrics.py index f6a48ca7f6..b6a4beaeef 100644 --- a/src/gt4py/next/metrics.py +++ b/src/gt4py/next/metrics.py @@ -38,6 +38,7 @@ # Metric collection levels DISABLED: Final[int] = 0 MINIMAL: Final[int] = 1 +GPU_TX_MARKERS: Final[int] = 2 PERFORMANCE: Final[int] = 10 INFO: Final[int] = 30 VERBOSE: Final[int] = 50 diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/common.py b/src/gt4py/next/program_processors/runners/dace/workflow/common.py index fa6ef492fa..5eb1b65af6 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/common.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/common.py @@ -114,7 +114,7 @@ def set_dace_config( dace.Config.set("compiler.cuda.backend", value="cuda") # Instrumentation of SDFG timers - dace.Config.set("instrumentation", "report_each_invocation", value=True) + dace.Config.set("instrumentation", "report_each_invocation", value=False) # we are not interested in storing the history of SDFG transformations. dace.Config.set("store_history", value=False) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py index e5a556eb4f..41684ee5c2 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py @@ -265,6 +265,18 @@ def add_instrumentation(sdfg: dace.SDFG, gpu: bool) -> None: None, dace.Memlet(f"{output}[0]"), ) + + if (config.COLLECT_METRICS_LEVEL == metrics.GPU_TX_MARKERS) and gpu: + sdfg.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + for node, _ in sdfg.all_nodes_recursive(): + if isinstance( + node, dace.nodes.MapEntry + ): # Add ranges to scopes and maps that are NOT scheduled to the GPU + node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + elif isinstance(node, dace.sdfg.state.SDFGState): + node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + + # Check SDFG validity after applying the above changes. # Normally, we do not call `SDFGState.add_tasklet()` directly, instead we call # the wrapper provided by `DataflowBuilder`, that modifies the tasklet connectors # to avoid name conflicts with program symbols. However, this method is not