From ed529bcdfd9b5f995f00ea5489733621b204aaf1 Mon Sep 17 00:00:00 2001 From: Ioannis Magkanaris Date: Fri, 24 Oct 2025 18:45:20 +0200 Subject: [PATCH 1/3] Add NVTX marker instrumentation --- src/gt4py/next/metrics.py | 1 + .../runners/dace/workflow/translation.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/gt4py/next/metrics.py b/src/gt4py/next/metrics.py index f6a48ca7f6..b6a4beaeef 100644 --- a/src/gt4py/next/metrics.py +++ b/src/gt4py/next/metrics.py @@ -38,6 +38,7 @@ # Metric collection levels DISABLED: Final[int] = 0 MINIMAL: Final[int] = 1 +GPU_TX_MARKERS: Final[int] = 2 PERFORMANCE: Final[int] = 10 INFO: Final[int] = 30 VERBOSE: Final[int] = 50 diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py index 996c0f06e5..a7d583cc02 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py @@ -267,6 +267,14 @@ def add_instrumentation(sdfg: dace.SDFG, gpu: bool) -> None: dace.Memlet(f"{output}[0]"), ) + if (config.COLLECT_METRICS_LEVEL == metrics.GPU_TX_MARKERS) and gpu: + sdfg.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + for node, _ in sdfg.all_nodes_recursive(): + if isinstance(node, dace.nodes.MapEntry): + node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + elif isinstance(node, dace.sdfg.state.SDFGState): + node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS + # Check SDFG validity after applying the above changes. sdfg.validate() From 494570b2c9b9cf3ed6e0a61fa82dc3545a429fd3 Mon Sep 17 00:00:00 2001 From: Ioannis Magkanaris Date: Mon, 27 Oct 2025 14:25:13 +0100 Subject: [PATCH 2/3] Make sure that instrumentation reports from dace are not written in every function call --- .../next/program_processors/runners/dace/workflow/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/common.py b/src/gt4py/next/program_processors/runners/dace/workflow/common.py index fa6ef492fa..5eb1b65af6 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/common.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/common.py @@ -114,7 +114,7 @@ def set_dace_config( dace.Config.set("compiler.cuda.backend", value="cuda") # Instrumentation of SDFG timers - dace.Config.set("instrumentation", "report_each_invocation", value=True) + dace.Config.set("instrumentation", "report_each_invocation", value=False) # we are not interested in storing the history of SDFG transformations. dace.Config.set("store_history", value=False) From 9fc176494d8d629cc04f117546ac7a26d26b317f Mon Sep 17 00:00:00 2001 From: Ioannis Magkanaris Date: Mon, 27 Oct 2025 14:58:59 +0100 Subject: [PATCH 3/3] Added comment for map entries --- .../program_processors/runners/dace/workflow/translation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py index a7d583cc02..1ed7baf77f 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py @@ -270,7 +270,9 @@ def add_instrumentation(sdfg: dace.SDFG, gpu: bool) -> None: if (config.COLLECT_METRICS_LEVEL == metrics.GPU_TX_MARKERS) and gpu: sdfg.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS for node, _ in sdfg.all_nodes_recursive(): - if isinstance(node, dace.nodes.MapEntry): + if isinstance( + node, dace.nodes.MapEntry + ): # Add ranges to scopes and maps that are NOT scheduled to the GPU node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS elif isinstance(node, dace.sdfg.state.SDFGState): node.instrument = dace.dtypes.InstrumentationType.GPU_TX_MARKERS