1+ #!/usr/bin/env python3
2+ """
3+ Complete NCCL to CSV Converter
4+ Parses NCCL output and creates CSV files with results and summary
5+ """
6+
7+ import re
8+ import csv
9+ import sys
10+ from pathlib import Path
11+
12+ def parse_nccl_output (file_path ):
13+ """Parse NCCL test output and extract performance data"""
14+
15+ data = []
16+ avg_bandwidth = None
17+
18+ # Pattern to match NCCL performance lines (flexible for different test types)
19+ # Handles both allreduce/reducescatter format and allgather/alltoall format
20+ # Note: alltoall uses N/A for in-place errors, so we handle that case
21+ pattern = r'^\s*(\d+)\s+(\d+)\s+(float|double|int|half)\s+(sum|prod|max|min|none)\s+(-?\d+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+|N/A)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+|N/A)'
22+
23+ # Pattern to match average bandwidth line
24+ avg_pattern = r'# Avg bus bandwidth\s*:\s*(\d+\.?\d*)'
25+
26+ try :
27+ with open (file_path , 'r' ) as f :
28+ for line_num , line in enumerate (f , 1 ):
29+ # Check for performance data
30+ match = re .match (pattern , line .strip ())
31+ if match :
32+ size_bytes = int (match .group (1 ))
33+ count = int (match .group (2 ))
34+ data_type = match .group (3 )
35+ operation = match .group (4 )
36+ root = int (match .group (5 ))
37+
38+ # Out-of-place metrics
39+ oop_time_us = float (match .group (6 ))
40+ oop_algbw = float (match .group (7 ))
41+ oop_busbw = float (match .group (8 ))
42+ oop_error = 0 if match .group (9 ) == 'N/A' else int (match .group (9 ))
43+
44+ # In-place metrics
45+ ip_time_us = float (match .group (10 ))
46+ ip_algbw = float (match .group (11 ))
47+ ip_busbw = float (match .group (12 ))
48+ ip_error = 0 if match .group (13 ) == 'N/A' else int (match .group (13 ))
49+
50+ data .append ({
51+ 'Size_Bytes' : size_bytes ,
52+ 'Size_KB' : round (size_bytes / 1024 , 2 ),
53+ 'Size_MB' : round (size_bytes / (1024 * 1024 ), 2 ),
54+ 'Count' : count ,
55+ 'Data_Type' : data_type ,
56+ 'Operation' : operation ,
57+ 'Root' : root ,
58+ 'OOP_Time_us' : oop_time_us ,
59+ 'OOP_AlgBW_GBps' : oop_algbw ,
60+ 'OOP_BusBW_GBps' : oop_busbw ,
61+ 'OOP_Errors' : oop_error ,
62+ 'IP_Time_us' : ip_time_us ,
63+ 'IP_AlgBW_GBps' : ip_algbw ,
64+ 'IP_BusBW_GBps' : ip_busbw ,
65+ 'IP_Errors' : ip_error
66+ })
67+
68+ # Check for average bandwidth
69+ avg_match = re .search (avg_pattern , line )
70+ if avg_match :
71+ avg_bandwidth = float (avg_match .group (1 ))
72+
73+ except FileNotFoundError :
74+ print (f"Error: File { file_path } not found" )
75+ return None , None
76+ except Exception as e :
77+ print (f"Error reading file: { e } " )
78+ return None , None
79+
80+ if not data :
81+ print ("No NCCL performance data found in the file" )
82+ return None , None
83+
84+ return data , avg_bandwidth
85+
86+ def write_csv (data , filename ):
87+ """Write data to CSV file"""
88+
89+ if not data :
90+ return False
91+
92+ try :
93+ with open (filename , 'w' , newline = '' ) as csvfile :
94+ fieldnames = list (data [0 ].keys ())
95+ writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
96+ writer .writeheader ()
97+ writer .writerows (data )
98+ return True
99+ except Exception as e :
100+ print (f"Error writing CSV file { filename } : { e } " )
101+ return False
102+
103+ def create_summary_data (data , avg_bandwidth = None ):
104+ """Create summary statistics from performance data"""
105+
106+ if not data :
107+ return None
108+
109+ oop_busbw_values = [row ['OOP_BusBW_GBps' ] for row in data ]
110+ ip_busbw_values = [row ['IP_BusBW_GBps' ] for row in data ]
111+
112+ summary_data = [
113+ {'Metric' : 'Total Test Points' , 'Value' : len (data )},
114+ {'Metric' : 'Min Message Size (Bytes)' , 'Value' : min (row ['Size_Bytes' ] for row in data )},
115+ {'Metric' : 'Max Message Size (Bytes)' , 'Value' : max (row ['Size_Bytes' ] for row in data )},
116+ {'Metric' : 'Peak OOP Bus BW (GB/s)' , 'Value' : round (max (oop_busbw_values ), 2 )},
117+ {'Metric' : 'Peak IP Bus BW (GB/s)' , 'Value' : round (max (ip_busbw_values ), 2 )},
118+ {'Metric' : 'Avg OOP Bus BW (GB/s)' , 'Value' : round (sum (oop_busbw_values ) / len (oop_busbw_values ), 2 )},
119+ {'Metric' : 'Avg IP Bus BW (GB/s)' , 'Value' : round (sum (ip_busbw_values ) / len (ip_busbw_values ), 2 )},
120+ {'Metric' : 'Total Errors' , 'Value' : sum (row ['OOP_Errors' ] + row ['IP_Errors' ] for row in data )}
121+ ]
122+
123+ if avg_bandwidth is not None :
124+ summary_data .append ({'Metric' : 'NCCL Reported Avg Bus BW (GB/s)' , 'Value' : avg_bandwidth })
125+
126+ return summary_data
127+
128+ def main ():
129+ if len (sys .argv ) != 2 :
130+ print ("Usage: python nccl_to_excel.py <nccl_output_file>" )
131+ print ("Example: python nccl_to_excel.py nccl-tests-container_3480.out" )
132+ sys .exit (1 )
133+
134+ input_file = sys .argv [1 ]
135+ base_name = Path (input_file ).stem
136+
137+ print (f"Parsing NCCL output from: { input_file } " )
138+
139+ # Parse the NCCL output
140+ data , avg_bandwidth = parse_nccl_output (input_file )
141+
142+ if data is None :
143+ sys .exit (1 )
144+
145+ print (f"Found { len (data )} performance data points" )
146+ if avg_bandwidth :
147+ print (f"Average bus bandwidth: { avg_bandwidth } GB/s" )
148+
149+ # Create main results CSV file
150+ results_file = f"{ base_name } _results.csv"
151+ if write_csv (data , results_file ):
152+ print (f"Results exported to: { results_file } " )
153+ else :
154+ print ("Error writing results file" )
155+ sys .exit (1 )
156+
157+ # Create summary CSV file
158+ summary_data = create_summary_data (data , avg_bandwidth )
159+ if summary_data :
160+ summary_file = f"{ base_name } _summary.csv"
161+ if write_csv (summary_data , summary_file ):
162+ print (f"Summary exported to: { summary_file } " )
163+ else :
164+ print ("Error writing summary file" )
165+
166+ print ("\n Files created:" )
167+ print (f"- { results_file } (detailed performance data)" )
168+ print (f"- { summary_file } (summary statistics)" )
169+ print ("\n You can open these CSV files in Excel, LibreOffice Calc, or any spreadsheet application" )
170+
171+ if __name__ == "__main__" :
172+ main ()
0 commit comments