Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions projects/rocprofiler-compute/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ default_stages: [pre-commit]
fail_fast: true
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
Expand All @@ -12,7 +12,7 @@ repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility
# for the latest ruff version supported by the hook.
rev: v0.12.12
rev: v0.14.11
hooks:
- id: ruff-check
args: [--fix]
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
70 changes: 40 additions & 30 deletions projects/rocprofiler-compute/docs/how-to/analyze/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ This section provides an overview of ROCm Compute Profiler's CLI analysis featur
* :ref:`Metric customization <cli-analysis-options>`: Isolate a subset of built-in metrics or build your own profiling configuration.

* :ref:`Filtering <cli-analysis-options>`: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering.

* :ref:`Per-kernel roofline analysis <per-kernel-roofline>`: Detailed arithmetic intensity and performance analysis for individual kernels.

Run ``rocprof-compute analyze -h`` for more details.
Expand Down Expand Up @@ -534,36 +534,46 @@ Analysis database example

.. code-block:: shell-session

$ rocprof-compute analyze --verbose --db test -p workloads/vmem/MI300X_A1 -p workloads/vmem1/MI300X_A1
$ rocprof-compute analyze --verbose --output-name test --output-format db -p workloads/nbody/MI300X_A1 -p workloads/nbody1/MI300X_A1
DEBUG Execution mode = analyze

__ _
_ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___
| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/
|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___|
|_| |_|

INFO Analysis mode = db
DEBUG [omnisoc init]
DEBUG [omnisoc init]
DEBUG [analysis] prepping to do some analysis
INFO [analysis] deriving rocprofiler-compute metrics...
WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1.
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1.
DEBUG Collected dispatch data
DEBUG Applied analysis mode filters
DEBUG Calculated dispatch data
DEBUG Collected metrics data
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
__ _
_ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___
| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \
| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/
|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___|
|_| |_|

INFO Analysis mode = db
INFO ed45b0b189
DEBUG [omnisoc init]
INFO ed45b0b189
DEBUG [omnisoc init]
DEBUG [analysis] prepping to do some analysis
INFO [analysis] deriving rocprofiler-compute metrics...
DEBUG Collected roofline ceilings
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody/MI300X_A1.
WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody1/MI300X_A1.
DEBUG Collected dispatch data
DEBUG Applied analysis mode filters
DEBUG Calculated dispatch data
DEBUG Collected metrics data
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg(
(pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float'
DEBUG Calculated metric values
DEBUG Calculated roofline data points
DEBUG [analysis] generating analysis
DEBUG SQLite database initialized with name: test.db
DEBUG Initialized database: test.db
DEBUG Completed writing database
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_
DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_
DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type<float, 4u> const*, HIP_vector_type<float, 4u>*, float, int)
DEBUG Calculated metric values
DEBUG Calculated roofline data points
DEBUG [analysis] generating analysis
DEBUG SQLite database initialized with name: test.db
DEBUG Initialized database: test.db
INFO ed45b0b189
INFO ed45b0b189
DEBUG Completed writing database
WARNING Created file: test.db
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def run_analysis(self) -> None:
Database.init(db_name)
console_debug(f"Initialized database: {db_name}")

# Iterate over all workloads
for workload_path in self._runs.keys():
# Add workload
workload_obj = orm.Workload(
name=workload_path.split("/")[-2],
sub_name=workload_path.split("/")[-1],
Expand All @@ -113,98 +115,126 @@ def run_analysis(self) -> None:
)
Database.get_session().add(workload_obj)

for pc_sample in self._pc_sampling_data_per_workload.get(
# Add kernel
kernel_objs: dict[str, orm.Kernel] = {}

for dispatch in self._dispatch_data_per_workload.get(
workload_path, pd.DataFrame()
).itertuples():
Database.get_session().add(
orm.PCsampling(
source=pc_sample.source_line,
instruction=pc_sample.instruction,
count=pc_sample.count,
kernel_name=pc_sample.kernel_name,
offset=pc_sample.offset,
count_issue=pc_sample.count_issued,
count_stall=pc_sample.count_stalled,
stall_reason=pc_sample.stall_reason,
# Add kernel object and map it, if not already added
if dispatch.kernel_name not in kernel_objs:
kernel_objs[dispatch.kernel_name] = orm.Kernel(
kernel_name=dispatch.kernel_name,
workload=workload_obj,
)
Database.get_session().add(kernel_objs[dispatch.kernel_name])

# Add dispatch object and link with kernel object
Database.get_session().add(
orm.Dispatch(
dispatch_id=dispatch.dispatch_id,
gpu_id=dispatch.gpu_id,
start_timestamp=dispatch.start_timestamp,
end_timestamp=dispatch.end_timestamp,
kernel=kernel_objs[dispatch.kernel_name],
)
)

# Add roofline data points
for roofline_data in self._roofline_data_per_workload.get(
workload_path, pd.DataFrame()
).itertuples():
if roofline_data.kernel_name not in kernel_objs:
console_warning(
f"Kernel {roofline_data.kernel_name} from roofline data "
"not found in dispatch data. Skipping roofline entry."
)
continue
Database.get_session().add(
orm.RooflineData(
kernel_name=roofline_data.kernel_name,
total_flops=roofline_data.total_flops,
l1_cache_data=roofline_data.l1_cache_data,
l2_cache_data=roofline_data.l2_cache_data,
hbm_cache_data=roofline_data.hbm_cache_data,
workload=workload_obj,
kernel=kernel_objs[roofline_data.kernel_name],
)
)

kernel_objs: dict[str, orm.Kernel] = {}
for dispatch in self._dispatch_data_per_workload.get(
# Add pc sampling data
for pc_sample in self._pc_sampling_data_per_workload.get(
workload_path, pd.DataFrame()
).itertuples():
# Add kernel object and map it, if not already added
if dispatch.kernel_name not in kernel_objs:
kernel_objs[dispatch.kernel_name] = orm.Kernel(
kernel_name=dispatch.kernel_name,
workload=workload_obj,
if pc_sample.kernel_name not in kernel_objs:
console_warning(
f"Kernel {pc_sample.kernel_name} from PC sampling data "
"not found in dispatch data. Skipping PC sampling entry."
)
Database.get_session().add(kernel_objs[dispatch.kernel_name])

# Add dispatch object and link with kernel object
continue
Database.get_session().add(
orm.Dispatch(
dispatch_id=dispatch.dispatch_id,
gpu_id=dispatch.gpu_id,
start_timestamp=dispatch.start_timestamp,
end_timestamp=dispatch.end_timestamp,
kernel=kernel_objs[dispatch.kernel_name],
orm.PCsampling(
source=pc_sample.source_line,
instruction=pc_sample.instruction,
count=pc_sample.count,
offset=pc_sample.offset,
count_issue=pc_sample.count_issued,
count_stall=pc_sample.count_stalled,
stall_reason=pc_sample.stall_reason,
kernel=kernel_objs[pc_sample.kernel_name],
)
)

# Optimize: Pre-group values by (metric_id, kernel_name) for O(1) lookups
values_df = self._values_data_per_workload.get(
workload_path, pd.DataFrame()
)
values_grouped = {}
if not values_df.empty:
for value in values_df.itertuples():
key = (value.metric_id, value.kernel_name)
if key not in values_grouped:
values_grouped[key] = []
values_grouped[key].append(value)

for metric in self._metrics_info_data_per_workload.get(
# Add metrics and values - iterate on values, create metrics as needed
metrics_info_dict = {
row.metric_id: row
for row in self._metrics_info_data_per_workload.get(
workload_path, pd.DataFrame()
).itertuples()
}
metric_objs: dict[str, orm.MetricDefinition] = {}

for value in self._values_data_per_workload.get(
workload_path, pd.DataFrame()
).itertuples():
for kernel_name in kernel_objs.keys():
metric_obj = orm.Metric(
name=metric.name,
metric_id=metric.metric_id,
description=metric.description,
unit=metric.unit,
table_name=metric.table_name,
sub_table_name=metric.sub_table_name,
kernel=kernel_objs[kernel_name],
# Check if kernel exists
if value.kernel_name not in kernel_objs:
console_warning(
f"Kernel {value.kernel_name} from values data "
"not found in dispatch data. Skipping metric value."
)
Database.get_session().add(metric_obj)

# Direct lookup instead of iterating through all values
key = (metric.metric_id, kernel_name)
for value in values_grouped.get(key, []):
Database.get_session().add(
orm.Value(
metric=metric_obj,
value_name=value.value_name,
value=value.value,
)
continue

# Create or reuse metric object
if value.metric_id not in metric_objs:
# Fetch metric info
if value.metric_id not in metrics_info_dict:
console_warning(
f"Metric {value.metric_id} from values data "
"not found in metrics info. Skipping metric value."
)
continue
metric_info = metrics_info_dict[value.metric_id]
metric_objs[value.metric_id] = orm.MetricDefinition(
name=metric_info.name,
metric_id=metric_info.metric_id,
description=metric_info.description,
unit=metric_info.unit,
table_name=metric_info.table_name,
sub_table_name=metric_info.sub_table_name,
workload=workload_obj,
)
Database.get_session().add(metric_objs[value.metric_id])

# Add value
Database.get_session().add(
orm.MetricValue(
metric=metric_objs[value.metric_id],
kernel=kernel_objs[value.kernel_name],
value_name=value.value_name,
value=value.value,
)
)

# Add metadata
version = get_version(rocprof_compute_home)
Database.get_session().add(
orm.Metadata(
Expand Down
Loading