ROCm · vedithal-amd · Jan 7, 2026 · Jan 8, 2026 · Jan 9, 2026 · Jan 13, 2026
@@ -2,7 +2,7 @@ default_stages: [pre-commit]
 fail_fast: true
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
       - id: check-yaml
       - id: end-of-file-fixer
@@ -12,7 +12,7 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility
     # for the latest ruff version supported by the hook.
-    rev: v0.12.12
+    rev: v0.14.11
     hooks:
       - id: ruff-check
         args: [--fix]

@@ -15,7 +15,7 @@ This section provides an overview of ROCm Compute Profiler's CLI analysis featur
 * :ref:`Metric customization <cli-analysis-options>`: Isolate a subset of built-in metrics or build your own profiling configuration.
 
 * :ref:`Filtering <cli-analysis-options>`: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering.
-  
+
 * :ref:`Per-kernel roofline analysis <per-kernel-roofline>`: Detailed arithmetic intensity and performance analysis for individual kernels.
 
 Run ``rocprof-compute analyze -h`` for more details.
@@ -534,36 +534,46 @@ Analysis database example
 
 .. code-block:: shell-session
 
-   $ rocprof-compute analyze --verbose --db test -p workloads/vmem/MI300X_A1 -p workloads/vmem1/MI300X_A1
+   $ rocprof-compute analyze --verbose --output-name test --output-format db -p workloads/nbody/MI300X_A1 -p workloads/nbody1/MI300X_A1
    DEBUG Execution mode = analyze
 
-                                    __                                       _
-   _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
-   | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
-   | | | (_) | (__| |_) | | | (_) |  _|_____| (_| (_) | | | | | | |_) | |_| | ||  __/
-   |_|  \___/ \___| .__/|_|  \___/|_|        \___\___/|_| |_| |_| .__/ \__,_|\__\___|
-                  |_|                                           |_|
-
-      INFO Analysis mode = db
-   DEBUG [omnisoc init]
-   DEBUG [omnisoc init]
-   DEBUG [analysis] prepping to do some analysis
-      INFO [analysis] deriving rocprofiler-compute metrics...
-   WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
-   WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
-   WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
-   WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
-   DEBUG Collected dispatch data
-   DEBUG Applied analysis mode filters
-   DEBUG Calculated dispatch data
-   DEBUG Collected metrics data
-   WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
+                                 __                                       _
+ _ __ ___   ___ _ __  _ __ ___  / _|       ___ ___  _ __ ___  _ __  _   _| |_ ___
+| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
+| | | (_) | (__| |_) | | | (_) |  _|_____| (_| (_) | | | | | | |_) | |_| | ||  __/
+|_|  \___/ \___| .__/|_|  \___/|_|        \___\___/|_| |_| |_| .__/ \__,_|\__\___|
+               |_|                                           |_|
+
+   INFO Analysis mode = db
+   INFO ed45b0b189
+  DEBUG [omnisoc init]
+   INFO ed45b0b189
+  DEBUG [omnisoc init]
+  DEBUG [analysis] prepping to do some analysis
+   INFO [analysis] deriving rocprofiler-compute metrics...
+  DEBUG Collected roofline ceilings
+WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody/MI300X_A1.
+WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody1/MI300X_A1.
+  DEBUG Collected dispatch data
+  DEBUG Applied analysis mode filters
+  DEBUG Calculated dispatch data
+  DEBUG Collected metrics data
+WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
    (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
-   WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
+WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
    (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
-   DEBUG Calculated metric values
-   DEBUG Calculated roofline data points
-   DEBUG [analysis] generating analysis
-   DEBUG SQLite database initialized with name: test.db
-   DEBUG Initialized database: test.db
-   DEBUG Completed writing database
+  DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
+  DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
+  DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
+  DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
+  DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
+  DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
+  DEBUG Calculated metric values
+  DEBUG Calculated roofline data points
+  DEBUG [analysis] generating analysis
+  DEBUG SQLite database initialized with name: test.db
+  DEBUG Initialized database: test.db
+   INFO ed45b0b189
+   INFO ed45b0b189
+  DEBUG Completed writing database
+WARNING Created file: test.db
@@ -101,7 +101,9 @@ def run_analysis(self) -> None:
         Database.init(db_name)
         console_debug(f"Initialized database: {db_name}")
 
+        # Iterate over all workloads
         for workload_path in self._runs.keys():
+            # Add workload
             workload_obj = orm.Workload(
                 name=workload_path.split("/")[-2],
                 sub_name=workload_path.split("/")[-1],
@@ -113,98 +115,126 @@ def run_analysis(self) -> None:
             )
             Database.get_session().add(workload_obj)
 
-            for pc_sample in self._pc_sampling_data_per_workload.get(
+            # Add kernel
+            kernel_objs: dict[str, orm.Kernel] = {}
+
+            for dispatch in self._dispatch_data_per_workload.get(
                 workload_path, pd.DataFrame()
             ).itertuples():
-                Database.get_session().add(
-                    orm.PCsampling(
-                        source=pc_sample.source_line,
-                        instruction=pc_sample.instruction,
-                        count=pc_sample.count,
-                        kernel_name=pc_sample.kernel_name,
-                        offset=pc_sample.offset,
-                        count_issue=pc_sample.count_issued,
-                        count_stall=pc_sample.count_stalled,
-                        stall_reason=pc_sample.stall_reason,
+                # Add kernel object and map it, if not already added
+                if dispatch.kernel_name not in kernel_objs:
+                    kernel_objs[dispatch.kernel_name] = orm.Kernel(
+                        kernel_name=dispatch.kernel_name,
                         workload=workload_obj,
                     )
+                    Database.get_session().add(kernel_objs[dispatch.kernel_name])
+
+                # Add dispatch object and link with kernel object
+                Database.get_session().add(
+                    orm.Dispatch(
+                        dispatch_id=dispatch.dispatch_id,
+                        gpu_id=dispatch.gpu_id,
+                        start_timestamp=dispatch.start_timestamp,
+                        end_timestamp=dispatch.end_timestamp,
+                        kernel=kernel_objs[dispatch.kernel_name],
+                    )
                 )
 
+            # Add roofline data points
             for roofline_data in self._roofline_data_per_workload.get(
                 workload_path, pd.DataFrame()
             ).itertuples():
+                if roofline_data.kernel_name not in kernel_objs:
+                    console_warning(
+                        f"Kernel {roofline_data.kernel_name} from roofline data "
+                        "not found in dispatch data. Skipping roofline entry."
+                    )
+                    continue
                 Database.get_session().add(
                     orm.RooflineData(
-                        kernel_name=roofline_data.kernel_name,
                         total_flops=roofline_data.total_flops,
                         l1_cache_data=roofline_data.l1_cache_data,
                         l2_cache_data=roofline_data.l2_cache_data,
                         hbm_cache_data=roofline_data.hbm_cache_data,
-                        workload=workload_obj,
+                        kernel=kernel_objs[roofline_data.kernel_name],
                     )
                 )
 
-            kernel_objs: dict[str, orm.Kernel] = {}
-            for dispatch in self._dispatch_data_per_workload.get(
+            # Add pc sampling data
+            for pc_sample in self._pc_sampling_data_per_workload.get(
                 workload_path, pd.DataFrame()
             ).itertuples():
-                # Add kernel object and map it, if not already added
-                if dispatch.kernel_name not in kernel_objs:
-                    kernel_objs[dispatch.kernel_name] = orm.Kernel(
-                        kernel_name=dispatch.kernel_name,
-                        workload=workload_obj,
+                if pc_sample.kernel_name not in kernel_objs:
+                    console_warning(
+                        f"Kernel {pc_sample.kernel_name} from PC sampling data "
+                        "not found in dispatch data. Skipping PC sampling entry."
                     )
-                    Database.get_session().add(kernel_objs[dispatch.kernel_name])
-
-                # Add dispatch object and link with kernel object
+                    continue
                 Database.get_session().add(
-                    orm.Dispatch(
-                        dispatch_id=dispatch.dispatch_id,
-                        gpu_id=dispatch.gpu_id,
-                        start_timestamp=dispatch.start_timestamp,
-                        end_timestamp=dispatch.end_timestamp,
-                        kernel=kernel_objs[dispatch.kernel_name],
+                    orm.PCsampling(
+                        source=pc_sample.source_line,
+                        instruction=pc_sample.instruction,
+                        count=pc_sample.count,
+                        offset=pc_sample.offset,
+                        count_issue=pc_sample.count_issued,
+                        count_stall=pc_sample.count_stalled,
+                        stall_reason=pc_sample.stall_reason,
+                        kernel=kernel_objs[pc_sample.kernel_name],
                     )
                 )
 
-            # Optimize: Pre-group values by (metric_id, kernel_name) for O(1) lookups
-            values_df = self._values_data_per_workload.get(
-                workload_path, pd.DataFrame()
-            )
-            values_grouped = {}
-            if not values_df.empty:
-                for value in values_df.itertuples():
-                    key = (value.metric_id, value.kernel_name)
-                    if key not in values_grouped:
-                        values_grouped[key] = []
-                    values_grouped[key].append(value)
-
-            for metric in self._metrics_info_data_per_workload.get(
+            # Add metrics and values - iterate on values, create metrics as needed
+            metrics_info_dict = {
+                row.metric_id: row
+                for row in self._metrics_info_data_per_workload.get(
+                    workload_path, pd.DataFrame()
+                ).itertuples()
+            }
+            metric_objs: dict[str, orm.MetricDefinition] = {}
+
+            for value in self._values_data_per_workload.get(
                 workload_path, pd.DataFrame()
             ).itertuples():
-                for kernel_name in kernel_objs.keys():
-                    metric_obj = orm.Metric(
-                        name=metric.name,
-                        metric_id=metric.metric_id,
-                        description=metric.description,
-                        unit=metric.unit,
-                        table_name=metric.table_name,
-                        sub_table_name=metric.sub_table_name,
-                        kernel=kernel_objs[kernel_name],
+                # Check if kernel exists
+                if value.kernel_name not in kernel_objs:
+                    console_warning(
+                        f"Kernel {value.kernel_name} from values data "
+                        "not found in dispatch data. Skipping metric value."
                     )
-                    Database.get_session().add(metric_obj)
-
-                    # Direct lookup instead of iterating through all values
-                    key = (metric.metric_id, kernel_name)
-                    for value in values_grouped.get(key, []):
-                        Database.get_session().add(
-                            orm.Value(
-                                metric=metric_obj,
-                                value_name=value.value_name,
-                                value=value.value,
-                            )
+                    continue
+
+                # Create or reuse metric object
+                if value.metric_id not in metric_objs:
+                    # Fetch metric info
+                    if value.metric_id not in metrics_info_dict:
+                        console_warning(
+                            f"Metric {value.metric_id} from values data "
+                            "not found in metrics info. Skipping metric value."
                         )
+                        continue
+                    metric_info = metrics_info_dict[value.metric_id]
+                    metric_objs[value.metric_id] = orm.MetricDefinition(
+                        name=metric_info.name,
+                        metric_id=metric_info.metric_id,
+                        description=metric_info.description,
+                        unit=metric_info.unit,
+                        table_name=metric_info.table_name,
+                        sub_table_name=metric_info.sub_table_name,
+                        workload=workload_obj,
+                    )
+                    Database.get_session().add(metric_objs[value.metric_id])
+
+                # Add value
+                Database.get_session().add(
+                    orm.MetricValue(
+                        metric=metric_objs[value.metric_id],
+                        kernel=kernel_objs[value.kernel_name],
+                        value_name=value.value_name,
+                        value=value.value,
+                    )
+                )
 
+            # Add metadata
             version = get_version(rocprof_compute_home)
             Database.get_session().add(
                 orm.Metadata(