diff --git a/projects/rocprofiler-compute/.pre-commit-config.yaml b/projects/rocprofiler-compute/.pre-commit-config.yaml index 10c643321fe..e47ccf4aa00 100644 --- a/projects/rocprofiler-compute/.pre-commit-config.yaml +++ b/projects/rocprofiler-compute/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_stages: [pre-commit] fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -12,7 +12,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. Check https://github.com/astral-sh/ruff-pre-commit#version-compatibility # for the latest ruff version supported by the hook. - rev: v0.12.12 + rev: v0.14.11 hooks: - id: ruff-check args: [--fix] diff --git a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png index 274b6f0020c..24004d38257 100644 Binary files a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png and b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_schema.png differ diff --git a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png index 954ee506dd4..7942dd96a79 100644 Binary files a/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png and b/projects/rocprofiler-compute/docs/data/analyze/analysis_data_dump_views.png differ diff --git a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst index 984d226aaad..26238c154ce 100644 --- a/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst +++ b/projects/rocprofiler-compute/docs/how-to/analyze/cli.rst @@ -15,7 +15,7 @@ This section provides an overview of ROCm Compute Profiler's CLI analysis featur * :ref:`Metric customization `: Isolate a subset of built-in metrics or build your own profiling configuration. * :ref:`Filtering `: Hone in on a particular kernel, GPU ID, or dispatch ID via post-process filtering. - + * :ref:`Per-kernel roofline analysis `: Detailed arithmetic intensity and performance analysis for individual kernels. Run ``rocprof-compute analyze -h`` for more details. @@ -534,36 +534,46 @@ Analysis database example .. code-block:: shell-session - $ rocprof-compute analyze --verbose --db test -p workloads/vmem/MI300X_A1 -p workloads/vmem1/MI300X_A1 + $ rocprof-compute analyze --verbose --output-name test --output-format db -p workloads/nbody/MI300X_A1 -p workloads/nbody1/MI300X_A1 DEBUG Execution mode = analyze - __ _ - _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ - | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ - | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ - |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| - |_| |_| - - INFO Analysis mode = db - DEBUG [omnisoc init] - DEBUG [omnisoc init] - DEBUG [analysis] prepping to do some analysis - INFO [analysis] deriving rocprofiler-compute metrics... - WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1. - WARNING Roofline ceilings not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1. - WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem/MI300X_A1. - WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/vmem1/MI300X_A1. - DEBUG Collected dispatch data - DEBUG Applied analysis mode filters - DEBUG Calculated dispatch data - DEBUG Collected metrics data - WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( + __ _ + _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ +| '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ +| | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ +|_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| + |_| |_| + + INFO Analysis mode = db + INFO ed45b0b189 + DEBUG [omnisoc init] + INFO ed45b0b189 + DEBUG [omnisoc init] + DEBUG [analysis] prepping to do some analysis + INFO [analysis] deriving rocprofiler-compute metrics... + DEBUG Collected roofline ceilings +WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody/MI300X_A1. +WARNING PC sampling data not found for /app/projects/rocprofiler-compute/workloads/nbody1/MI300X_A1. + DEBUG Collected dispatch data + DEBUG Applied analysis mode filters + DEBUG Calculated dispatch data + DEBUG Collected metrics data +WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' - WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( +WARNING Failed to evaluate expression for 3.1.39 - Value: to_round((to_avg( (pmc_df.get("pmc_perf_ACCUM") / pmc_df.get("SQC_ICACHE_REQ")).where((pmc_df.get("SQC_ICACHE_REQ") != 0), None)) * 100), 0) - unsupported operand type(s) for /: 'NoneType' and 'float' - DEBUG Calculated metric values - DEBUG Calculated roofline data points - DEBUG [analysis] generating analysis - DEBUG SQLite database initialized with name: test.db - DEBUG Initialized database: test.db - DEBUG Completed writing database \ No newline at end of file + DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_ + DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_ + DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type const*, HIP_vector_type*, float, int) + DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host33static_block_size_config_providerILj256EEEjLb0ELNS3_11target_archE942EZNS3_25xorwow_generator_templateINS0_13device_systemENS3_23default_config_providerIL16rocrand_rng_type401EEEE4initEvEUlT_DpT0_E_JPN14rocrand_device13xorwow_engineEjjyyEEEvT3_DpT4_ + DEBUG Calculating expressions for kernel: _ZN12rocrand_impl6system6detail17trampoline_kernelINS_4host23default_config_providerIL16rocrand_rng_type401EEEfLb0ELNS3_11target_archE942EZZNS3_25xorwow_generator_templateINS0_13device_systemES6_E8generateIfNS3_20uniform_distributionIfjEEEE14rocrand_statusPT_mT0_ENKUlSF_E_clISt17integral_constantIbLb0EEEEDaSF_EUlSF_DpT0_E_JPN14rocrand_device13xorwow_engineEjPfmSD_EEEvT3_DpT4_ + DEBUG Calculating expressions for kernel: void bodyForce_block<256>(HIP_vector_type const*, HIP_vector_type*, float, int) + DEBUG Calculated metric values + DEBUG Calculated roofline data points + DEBUG [analysis] generating analysis + DEBUG SQLite database initialized with name: test.db + DEBUG Initialized database: test.db + INFO ed45b0b189 + INFO ed45b0b189 + DEBUG Completed writing database +WARNING Created file: test.db diff --git a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py index 4a121499f72..1a01b23a55e 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_analyze/analysis_db.py @@ -101,7 +101,9 @@ def run_analysis(self) -> None: Database.init(db_name) console_debug(f"Initialized database: {db_name}") + # Iterate over all workloads for workload_path in self._runs.keys(): + # Add workload workload_obj = orm.Workload( name=workload_path.split("/")[-2], sub_name=workload_path.split("/")[-1], @@ -113,98 +115,126 @@ def run_analysis(self) -> None: ) Database.get_session().add(workload_obj) - for pc_sample in self._pc_sampling_data_per_workload.get( + # Add kernel + kernel_objs: dict[str, orm.Kernel] = {} + + for dispatch in self._dispatch_data_per_workload.get( workload_path, pd.DataFrame() ).itertuples(): - Database.get_session().add( - orm.PCsampling( - source=pc_sample.source_line, - instruction=pc_sample.instruction, - count=pc_sample.count, - kernel_name=pc_sample.kernel_name, - offset=pc_sample.offset, - count_issue=pc_sample.count_issued, - count_stall=pc_sample.count_stalled, - stall_reason=pc_sample.stall_reason, + # Add kernel object and map it, if not already added + if dispatch.kernel_name not in kernel_objs: + kernel_objs[dispatch.kernel_name] = orm.Kernel( + kernel_name=dispatch.kernel_name, workload=workload_obj, ) + Database.get_session().add(kernel_objs[dispatch.kernel_name]) + + # Add dispatch object and link with kernel object + Database.get_session().add( + orm.Dispatch( + dispatch_id=dispatch.dispatch_id, + gpu_id=dispatch.gpu_id, + start_timestamp=dispatch.start_timestamp, + end_timestamp=dispatch.end_timestamp, + kernel=kernel_objs[dispatch.kernel_name], + ) ) + # Add roofline data points for roofline_data in self._roofline_data_per_workload.get( workload_path, pd.DataFrame() ).itertuples(): + if roofline_data.kernel_name not in kernel_objs: + console_warning( + f"Kernel {roofline_data.kernel_name} from roofline data " + "not found in dispatch data. Skipping roofline entry." + ) + continue Database.get_session().add( orm.RooflineData( - kernel_name=roofline_data.kernel_name, total_flops=roofline_data.total_flops, l1_cache_data=roofline_data.l1_cache_data, l2_cache_data=roofline_data.l2_cache_data, hbm_cache_data=roofline_data.hbm_cache_data, - workload=workload_obj, + kernel=kernel_objs[roofline_data.kernel_name], ) ) - kernel_objs: dict[str, orm.Kernel] = {} - for dispatch in self._dispatch_data_per_workload.get( + # Add pc sampling data + for pc_sample in self._pc_sampling_data_per_workload.get( workload_path, pd.DataFrame() ).itertuples(): - # Add kernel object and map it, if not already added - if dispatch.kernel_name not in kernel_objs: - kernel_objs[dispatch.kernel_name] = orm.Kernel( - kernel_name=dispatch.kernel_name, - workload=workload_obj, + if pc_sample.kernel_name not in kernel_objs: + console_warning( + f"Kernel {pc_sample.kernel_name} from PC sampling data " + "not found in dispatch data. Skipping PC sampling entry." ) - Database.get_session().add(kernel_objs[dispatch.kernel_name]) - - # Add dispatch object and link with kernel object + continue Database.get_session().add( - orm.Dispatch( - dispatch_id=dispatch.dispatch_id, - gpu_id=dispatch.gpu_id, - start_timestamp=dispatch.start_timestamp, - end_timestamp=dispatch.end_timestamp, - kernel=kernel_objs[dispatch.kernel_name], + orm.PCsampling( + source=pc_sample.source_line, + instruction=pc_sample.instruction, + count=pc_sample.count, + offset=pc_sample.offset, + count_issue=pc_sample.count_issued, + count_stall=pc_sample.count_stalled, + stall_reason=pc_sample.stall_reason, + kernel=kernel_objs[pc_sample.kernel_name], ) ) - # Optimize: Pre-group values by (metric_id, kernel_name) for O(1) lookups - values_df = self._values_data_per_workload.get( - workload_path, pd.DataFrame() - ) - values_grouped = {} - if not values_df.empty: - for value in values_df.itertuples(): - key = (value.metric_id, value.kernel_name) - if key not in values_grouped: - values_grouped[key] = [] - values_grouped[key].append(value) - - for metric in self._metrics_info_data_per_workload.get( + # Add metrics and values - iterate on values, create metrics as needed + metrics_info_dict = { + row.metric_id: row + for row in self._metrics_info_data_per_workload.get( + workload_path, pd.DataFrame() + ).itertuples() + } + metric_objs: dict[str, orm.MetricDefinition] = {} + + for value in self._values_data_per_workload.get( workload_path, pd.DataFrame() ).itertuples(): - for kernel_name in kernel_objs.keys(): - metric_obj = orm.Metric( - name=metric.name, - metric_id=metric.metric_id, - description=metric.description, - unit=metric.unit, - table_name=metric.table_name, - sub_table_name=metric.sub_table_name, - kernel=kernel_objs[kernel_name], + # Check if kernel exists + if value.kernel_name not in kernel_objs: + console_warning( + f"Kernel {value.kernel_name} from values data " + "not found in dispatch data. Skipping metric value." ) - Database.get_session().add(metric_obj) - - # Direct lookup instead of iterating through all values - key = (metric.metric_id, kernel_name) - for value in values_grouped.get(key, []): - Database.get_session().add( - orm.Value( - metric=metric_obj, - value_name=value.value_name, - value=value.value, - ) + continue + + # Create or reuse metric object + if value.metric_id not in metric_objs: + # Fetch metric info + if value.metric_id not in metrics_info_dict: + console_warning( + f"Metric {value.metric_id} from values data " + "not found in metrics info. Skipping metric value." ) + continue + metric_info = metrics_info_dict[value.metric_id] + metric_objs[value.metric_id] = orm.MetricDefinition( + name=metric_info.name, + metric_id=metric_info.metric_id, + description=metric_info.description, + unit=metric_info.unit, + table_name=metric_info.table_name, + sub_table_name=metric_info.sub_table_name, + workload=workload_obj, + ) + Database.get_session().add(metric_objs[value.metric_id]) + + # Add value + Database.get_session().add( + orm.MetricValue( + metric=metric_objs[value.metric_id], + kernel=kernel_objs[value.kernel_name], + value_name=value.value_name, + value=value.value, + ) + ) + # Add metadata version = get_version(rocprof_compute_home) Database.get_session().add( orm.Metadata( diff --git a/projects/rocprofiler-compute/src/utils/analysis_orm.py b/projects/rocprofiler-compute/src/utils/analysis_orm.py index f2315fc768d..76a9ff3c08b 100644 --- a/projects/rocprofiler-compute/src/utils/analysis_orm.py +++ b/projects/rocprofiler-compute/src/utils/analysis_orm.py @@ -45,7 +45,7 @@ from utils.logger import console_debug, console_error PREFIX = "compute_" -SCHEMA_VERSION = "1.1.0" +SCHEMA_VERSION = "1.2.0" Base = declarative_base() @@ -63,18 +63,16 @@ class Workload(Base): # Workload can have multiple kernels kernels = relationship("Kernel", back_populates="workload") - # Workload can have multiple roofline data points - roofline_data_points = relationship("RooflineData", back_populates="workload") - # Workload can have multiple pc_sampling values - pc_sampling_values = relationship("PCsampling", back_populates="workload") + # Workload can have multiple metric definitions + metric_definitions = relationship("MetricDefinition", back_populates="workload") -class Metric(Base): - __tablename__ = f"{PREFIX}metric" +class MetricDefinition(Base): + __tablename__ = f"{PREFIX}metric_definition" metric_uuid = Column(Integer, primary_key=True) - kernel_uuid = Column( - Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False + workload_id = Column( + Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False ) name = Column(String) # e.g. Wavefronts Num metric_id = Column(String) # e.g. 4.1.3 @@ -83,27 +81,26 @@ class Metric(Base): sub_table_name = Column(String) # e.g. Wavefront stats unit = Column(String) # e.g. Gbps - # Metric can have one kernel - kernel = relationship("Kernel", back_populates="metrics") - # Metric can have multiple values - values = relationship("Value", back_populates="metric") + # Metric can have one workload + workload = relationship("Workload", back_populates="metric_definitions") + # Metric can have multiple metric values + metric_values = relationship("MetricValue", back_populates="metric") class RooflineData(Base): __tablename__ = f"{PREFIX}roofline_data" roofline_uuid = Column(Integer, primary_key=True) - workload_id = Column( - Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + kernel_uuid = Column( + Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False ) - kernel_name = Column(String) total_flops = Column(Float) l1_cache_data = Column(Float) l2_cache_data = Column(Float) hbm_cache_data = Column(Float) - # Roofline data point can have one workload - workload = relationship("Workload", back_populates="roofline_data_points") + # Roofline data point can have one kernel + kernel = relationship("Kernel", back_populates="roofline_data_points") class Dispatch(Base): @@ -135,42 +132,50 @@ class Kernel(Base): workload = relationship("Workload", back_populates="kernels") # Kernel can have multiple dispatches dispatches = relationship("Dispatch", back_populates="kernel") - # Kernel can have multiple metrics - metrics = relationship("Metric", back_populates="kernel") + # Kernel can have multiple metric values + metric_values = relationship("MetricValue", back_populates="kernel") + # Kernel can have multiple roofline data points + roofline_data_points = relationship("RooflineData", back_populates="kernel") + # Kernel can have multiple pc_sampling values + pc_sampling_values = relationship("PCsampling", back_populates="kernel") class PCsampling(Base): __tablename__ = f"{PREFIX}pcsampling" pc_sampling_uuid = Column(Integer, primary_key=True) - workload_id = Column( - Integer, ForeignKey(f"{PREFIX}workload.workload_id"), nullable=False + kernel_uuid = Column( + Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False ) source = Column(String) instruction = Column(String) count = Column(Integer) - kernel_name = Column(String) offset = Column(Integer) count_issue = Column(Integer) count_stall = Column(Integer) stall_reason = Column(JSON) - # PCsampling can have one workload - workload = relationship("Workload", back_populates="pc_sampling_values") + # PCsampling can have one kernel + kernel = relationship("Kernel", back_populates="pc_sampling_values") -class Value(Base): - __tablename__ = f"{PREFIX}value" +class MetricValue(Base): + __tablename__ = f"{PREFIX}metric_value" value_uuid = Column(Integer, primary_key=True) metric_uuid = Column( - Integer, ForeignKey(f"{PREFIX}metric.metric_uuid"), nullable=False + Integer, ForeignKey(f"{PREFIX}metric_definition.metric_uuid"), nullable=False + ) + kernel_uuid = Column( + Integer, ForeignKey(f"{PREFIX}kernel.kernel_uuid"), nullable=False ) value_name = Column(String) # e.g. min, max, avg value = Column(Float) # e.g. 123.45 # Value can have one metric - metric = relationship("Metric", back_populates="values") + metric = relationship("MetricDefinition", back_populates="metric_values") + # Value can have one kernel + kernel = relationship("Kernel", back_populates="metric_values") class Metadata(Base): @@ -250,11 +255,20 @@ def get_views() -> list[TextClause]: views: dict[str, Select[Any]] = { "kernel_view": select( + Kernel.kernel_uuid.label("kernel_uuid"), + Kernel.workload_id.label("workload_id"), + Workload.name.label("workload_name"), Kernel.kernel_name, func.count(Dispatch.dispatch_id).label("dispatch_count"), func.sum(Dispatch.end_timestamp - Dispatch.start_timestamp).label( "duration_ns_sum" ), + func.min(Dispatch.end_timestamp - Dispatch.start_timestamp).label( + "duration_ns_min" + ), + func.max(Dispatch.end_timestamp - Dispatch.start_timestamp).label( + "duration_ns_max" + ), median_calc.c.duration_ns_median, func.avg(Dispatch.end_timestamp - Dispatch.start_timestamp).label( "duration_ns_mean" @@ -262,24 +276,31 @@ def get_views() -> list[TextClause]: ) .select_from(Dispatch) .join(Kernel, Dispatch.kernel_uuid == Kernel.kernel_uuid) + .join(Workload, Kernel.workload_id == Workload.workload_id) .join(median_calc.subquery(), Kernel.kernel_name == median_calc.c.kernel_name) - .group_by(Kernel.kernel_name), + .group_by( + Kernel.kernel_uuid, Kernel.workload_id, Workload.name, Kernel.kernel_name + ), "metric_view": select( + Workload.workload_id.label("workload_id"), Workload.name.label("workload_name"), + Kernel.kernel_uuid.label("kernel_uuid"), Kernel.kernel_name, - Metric.name.label("metric_name"), - Metric.metric_id, - Metric.description, - Metric.table_name, - Metric.sub_table_name, - Metric.unit, - Value.value_name, - Value.value, + MetricDefinition.metric_uuid.label("metric_uuid"), + MetricDefinition.name.label("metric_name"), + MetricDefinition.metric_id, + MetricDefinition.description, + MetricDefinition.table_name, + MetricDefinition.sub_table_name, + MetricDefinition.unit, + MetricValue.value_uuid.label("value_uuid"), + MetricValue.value_name, + MetricValue.value, ) - .select_from(Metric) - .join(Kernel, Metric.kernel_uuid == Kernel.kernel_uuid) - .join(Value, Metric.metric_uuid == Value.metric_uuid) - .join(Workload, Kernel.workload_id == Workload.workload_id), + .select_from(MetricDefinition) + .join(Workload, MetricDefinition.workload_id == Workload.workload_id) + .join(MetricValue, MetricDefinition.metric_uuid == MetricValue.metric_uuid) + .join(Kernel, MetricValue.kernel_uuid == Kernel.kernel_uuid), } return [ diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 4b685e6ddec..97488141df8 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -989,19 +989,19 @@ def test_analyze_rocpd( Dispatch, Kernel, Metadata, - Metric, + MetricDefinition, + MetricValue, RooflineData, - Value, Workload, ) table_name_map = { "compute_workload": Workload, - "compute_metric": Metric, + "compute_metric_definition": MetricDefinition, "compute_roofline_data": RooflineData, "compute_dispatch": Dispatch, "compute_kernel": Kernel, - "compute_value": Value, + "compute_metric_value": MetricValue, "compute_metadata": Metadata, }