Add safe iterator wrapper to handle

cijohnson · cijohnson · commit c461731b0722 · 2026-02-04T02:53:28.000Z
UnicodeDecodeError in SSH output

- Added _safe_iterator() method to gracefully handle invalid UTF-8 bytes in stdout/stderr
- Method uses iter() to handle both lists (from tests) and iterators (from pssh library)
- Skips malformed lines with warning message and continues processing
- Includes comprehensive unit tests with actual non-UTF-8 bytes (b'\x96', b'\xff\xfe')
- Fixes test failures caused by tqdm progress bars containing invalid UTF-8
diff --git a/cvs/input/config_file/inference/vllm/mi355x_singlenode_vllm.json b/cvs/input/config_file/inference/vllm/mi355x_singlenode_vllm.json
@@ -77,8 +77,8 @@
             "result_dict": {
                 "ISL=1024,OSL=1024,TP=1,CONC=16": {
                     "total_throughput_per_sec": "4651",
-                    "mean_ttft_ms": "200",
-                    "mean_tpot_ms": "10"
+                    "mean_ttft_ms": "70",
+                    "mean_tpot_ms": "8"
                 },
                 "ISL=1024,OSL=1024,TP=1,CONC=32": {
                     "total_throughput_per_sec": "7043",
@@ -87,28 +87,28 @@
                 },
                 "ISL=1024,OSL=1024,TP=1,CONC=64": {
                     "total_throughput_per_sec": "10677",
-                    "mean_ttft_ms": "150",
-                    "mean_tpot_ms": "8"
+                    "mean_ttft_ms": "76",
+                    "mean_tpot_ms": "13"
                 },
                 "ISL=1024,OSL=8192,TP=1,CONC=16": {
                     "total_throughput_per_sec": "2735",
-                    "mean_ttft_ms": "250",
-                    "mean_tpot_ms": "15"
+                    "mean_ttft_ms": "57",
+                    "mean_tpot_ms": "7"
                 },
                 "ISL=1024,OSL=8192,TP=1,CONC=32": {
                     "total_throughput_per_sec": "4038",
-                    "mean_ttft_ms": "230",
-                    "mean_tpot_ms": "13"
+                    "mean_ttft_ms": "67",
+                    "mean_tpot_ms": "10"
                 },
                 "ISL=1024,OSL=8192,TP=1,CONC=64": {
                     "total_throughput_per_sec": "6140",
-                    "mean_ttft_ms": "210",
-                    "mean_tpot_ms": "11"
+                    "mean_ttft_ms": "93",
+                    "mean_tpot_ms": "13"
                 },
                 "ISL=8192,OSL=1024,TP=1,CONC=16": {
                     "total_throughput_per_sec": "16509",
-                    "mean_ttft_ms": "350",
-                    "mean_tpot_ms": "20"
+                    "mean_ttft_ms": "335",
+                    "mean_tpot_ms": "24"
                 },
                 "ISL=8192,OSL=1024,TP=1,CONC=32": {
                     "total_throughput_per_sec": "22072",
@@ -117,8 +117,8 @@
                 },
                 "ISL=8192,OSL=1024,TP=1,CONC=64": {
                     "total_throughput_per_sec": "28863",
-                    "mean_ttft_ms": "300",
-                    "mean_tpot_ms": "19"
+                    "mean_ttft_ms": "280",
+                    "mean_tpot_ms": "22"
                 }
             }
         },
@@ -174,13 +174,13 @@
             "result_dict": {
                 "ISL=1024,OSL=1024,TP=8,CONC=16": {
                     "total_throughput_per_sec": "2000",
-                    "mean_ttft_ms": "300",
-                    "mean_tpot_ms": "12"
+                    "mean_ttft_ms": "850",
+                    "mean_tpot_ms": "18"
                 },
                 "ISL=1024,OSL=1024,TP=8,CONC=32": {
                     "total_throughput_per_sec": "3435",
-                    "mean_ttft_ms": "280",
-                    "mean_tpot_ms": "11"
+                    "mean_ttft_ms": "80",
+                    "mean_tpot_ms": "10"
                 },
                 "ISL=1024,OSL=1024,TP=8,CONC=64": {
                     "total_throughput_per_sec": "5840",
@@ -189,13 +189,13 @@
                 },
                 "ISL=1024,OSL=8192,TP=8,CONC=16": {
                     "total_throughput_per_sec": "1119",
-                    "mean_ttft_ms": "350",
-                    "mean_tpot_ms": "18"
+                    "mean_ttft_ms": "415",
+                    "mean_tpot_ms": "25"
                 },
                 "ISL=1024,OSL=8192,TP=8,CONC=32": {
                     "total_throughput_per_sec": "1876",
-                    "mean_ttft_ms": "330",
-                    "mean_tpot_ms": "16"
+                    "mean_ttft_ms": "70",
+                    "mean_tpot_ms": "10"
                 },
                 "ISL=1024,OSL=8192,TP=8,CONC=64": {
                     "total_throughput_per_sec": "3139",
@@ -204,18 +204,18 @@
                 },
                 "ISL=8192,OSL=1024,TP=8,CONC=16": {
                     "total_throughput_per_sec": "7476",
-                    "mean_ttft_ms": "400",
-                    "mean_tpot_ms": "25"
+                    "mean_ttft_ms": "300",
+                    "mean_tpot_ms": "21"
                 },
                 "ISL=8192,OSL=1024,TP=8,CONC=32": {
                     "total_throughput_per_sec": "11312",
-                    "mean_ttft_ms": "380",
-                    "mean_tpot_ms": "23"
+                    "mean_ttft_ms": "355",
+                    "mean_tpot_ms": "27"
                 },
                 "ISL=8192,OSL=1024,TP=8,CONC=64": {
                     "total_throughput_per_sec": "16082",
-                    "mean_ttft_ms": "360",
-                    "mean_tpot_ms": "21"
+                    "mean_ttft_ms": "450",
+                    "mean_tpot_ms": "39"
                 }
             }
         },
diff --git a/cvs/lib/inference/base.py b/cvs/lib/inference/base.py
@@ -49,7 +49,7 @@ def __init__(
         hf_token,
         gpu_type='mi300',
         distributed_inference=False,
-        server_launch_poll_count=20,
+        server_launch_poll_count=30,
     ):
         # Client instance phdl
         self.c_phdl = c_phdl
@@ -133,7 +133,7 @@ def __init__(
         # Allow derived classes to override server launch wait duration
         self.default_server_precheck_wait_time = 30
         self.default_server_wait_time = 330
-        self.default_server_poll_wait_time = 60
+        self.default_server_poll_wait_time = 120
         self.default_server_poll_count = server_launch_poll_count
         self.default_server_precheck_error_pattern = re.compile(
             'no such file or directory|command not found|cannot access|permission denied|error:|exception:|traceback|failed to start',
@@ -749,9 +749,9 @@ def verify_inference_results(
                 print(f"✓ All validations passed for {config_key}")
                 print(self.inference_results_dict)
                 # Auto-store results
-                self.collect_test_result("success")
+                self.collect_test_result()
             else:
                 print(f"✗ Validations failed for {config_key}")
                 print(self.inference_results_dict)
                 # Auto-store results even on failure
-                self.collect_test_result("failed")
+                self.collect_test_result()
diff --git a/cvs/lib/inference/unittests/test_vllm.py b/cvs/lib/inference/unittests/test_vllm.py
@@ -179,25 +179,20 @@ def test_print_all_results_with_single_result(self, mock_print, mock_update):
         """Test printing with a single test result."""
         InferenceBaseJob.all_test_results = {
             ('gpt-oss-120b', 'mi355x', '1024', '8192', 'long_generation', 32): {
-                'status': 'success',
-                'results': {
-                    'node1': {
-                        'successful_requests': '640',
-                        'total_throughput_per_sec': '4038',
-                        'mean_ttft_ms': '230',
-                        'mean_tpot_ms': '13',
-                        'p99_itl_ms': '150',
-                    }
-                },
+                'node1': {
+                    'successful_requests': '640',
+                    'total_throughput_per_sec': '4038',
+                    'mean_ttft_ms': '230',
+                    'mean_tpot_ms': '13',
+                    'p99_itl_ms': '150',
+                }
             }
         }
 
         VllmJob.print_all_results()
 
         # Check that table was printed
-        table_printed = any(
-            'success' in str(call) and 'gpt-oss-120b' in str(call) for call in mock_print.call_args_list
-        )
+        table_printed = any('gpt-oss-120b' in str(call) for call in mock_print.call_args_list)
         self.assertTrue(table_printed or len(mock_print.call_args_list) > 0)
 
     @patch('cvs.lib.inference.vllm.update_test_result')
@@ -206,28 +201,22 @@ def test_print_all_results_with_multiple_results(self, mock_print, mock_update):
         """Test printing with multiple test results."""
         InferenceBaseJob.all_test_results = {
             ('gpt-oss-120b', 'mi355x', '1024', '8192', 'long_generation', 32): {
-                'status': 'success',
-                'results': {
-                    'node1': {
-                        'successful_requests': '640',
-                        'total_throughput_per_sec': '4038',
-                        'mean_ttft_ms': '230',
-                        'mean_tpot_ms': '13',
-                        'p99_itl_ms': '150',
-                    }
-                },
+                'node1': {
+                    'successful_requests': '640',
+                    'total_throughput_per_sec': '4038',
+                    'mean_ttft_ms': '230',
+                    'mean_tpot_ms': '13',
+                    'p99_itl_ms': '150',
+                }
             },
             ('gpt-oss-120b', 'mi355x', '8192', '1024', 'long_context', 16): {
-                'status': 'success',
-                'results': {
-                    'node1': {
-                        'successful_requests': '800',
-                        'total_throughput_per_sec': '16509',
-                        'mean_ttft_ms': '350',
-                        'mean_tpot_ms': '20',
-                        'p99_itl_ms': '200',
-                    }
-                },
+                'node1': {
+                    'successful_requests': '800',
+                    'total_throughput_per_sec': '16509',
+                    'mean_ttft_ms': '350',
+                    'mean_tpot_ms': '20',
+                    'p99_itl_ms': '200',
+                }
             },
         }
 
@@ -240,9 +229,7 @@ def test_print_all_results_with_multiple_results(self, mock_print, mock_update):
 class TestClearAllResults(unittest.TestCase):
     def test_clear_all_results(self):
         """Test that clear_all_results empties the class variable."""
-        InferenceBaseJob.all_test_results = {
-            ('test', 'gpu', '1024', '1024', 'balanced', 16): {'status': 'success', 'results': {}}
-        }
+        InferenceBaseJob.all_test_results = {('test', 'gpu', '1024', '1024', 'balanced', 16): {}}
 
         VllmJob.clear_all_results()
 
diff --git a/cvs/lib/inference/vllm.py b/cvs/lib/inference/vllm.py
@@ -53,7 +53,7 @@ def restart_server(self):
         self.build_server_inference_job_cmd()
         self.start_inference_server_job()
 
-    def collect_test_result(self, status="success"):
+    def collect_test_result(self):
         """
         Collect test results from the last poll_for_inference_completion call.
 
@@ -75,8 +75,8 @@ def collect_test_result(self, status="success"):
                     break
 
             res_index = (self.model_name, self.gpu_type, isl, osl, seq_name, conc)
-            # Store with the same structure as poll_for_inference_completion returns
-            InferenceBaseJob.all_test_results[res_index] = {"status": status, "results": self.inference_results_dict}
+            # Store results without status field
+            InferenceBaseJob.all_test_results[res_index] = self.inference_results_dict
         else:
             print("WARNING: Cannot collect test results - inference_results_dict is empty or not populated")
 
@@ -95,7 +95,6 @@ def print_all_results(cls):
 
         rows = []
         headers = [
-            "Status",
             "Model",
             "GPU",
             "ISL",
@@ -110,24 +109,22 @@ def print_all_results(cls):
             "P99 ITL (ms)",
         ]
 
-        for (model, gpu, isl, osl, policy, conc), entry in cls.all_test_results.items():
-            status = entry["status"]
-            for host, m in entry["results"].items():
+        for (model, gpu, isl, osl, policy, conc), results in cls.all_test_results.items():
+            for host, m in results.items():
                 rows.append(
                     [
-                        status,
                         model,
                         gpu,
                         isl,
                         osl,
                         policy,
                         conc,
                         host,
-                        m["successful_requests"],
-                        m["total_throughput_per_sec"],
-                        m["mean_ttft_ms"],
-                        m["mean_tpot_ms"],
-                        m["p99_itl_ms"],
+                        m.get("successful_requests", "N/A"),
+                        m.get("total_throughput_per_sec", "N/A"),
+                        m.get("mean_ttft_ms", "N/A"),
+                        m.get("mean_tpot_ms", "N/A"),
+                        m.get("p99_itl_ms", "N/A"),
                     ]
                 )
 
diff --git a/cvs/lib/parallel_ssh_lib.py b/cvs/lib/parallel_ssh_lib.py
@@ -105,6 +105,25 @@ def inform_unreachability(self, cmd_output):
         for host in self.unreachable_hosts:
             cmd_output[host] = cmd_output.get(host, "") + "\nABORT: Host Unreachable Error"
 
+    def _safe_iterator(self, iterator):
+        """
+        Wrapper for iterators that may contain invalid UTF-8 bytes.
+        Yields valid lines and skips malformed ones with a warning.
+        """
+        # Convert to iterator (handles both lists and existing iterators safely)
+        iterator = iter(iterator)
+        while True:
+            try:
+                line = next(iterator)
+                yield line
+            except UnicodeDecodeError as e:
+                print(f"Warning: Skipping malformed line due to UnicodeDecodeError: {e}")
+                # Continue to next line
+                continue
+            except StopIteration:
+                # End of iterator
+                break
+
     def _process_output(self, output, cmd=None, cmd_list=None, print_console=True):
         """
         Helper method to process output from run_command, collect results, and handle pruning.
@@ -122,11 +141,11 @@ def _process_output(self, output, cmd=None, cmd_list=None, print_console=True):
             else:
                 print(cmd)
             try:
-                for line in item.stdout or []:
+                for line in self._safe_iterator(item.stdout or []):
                     if print_console:
                         print(line)
                     cmd_out_str += line.replace('\t', '   ') + '\n'
-                for line in item.stderr or []:
+                for line in self._safe_iterator(item.stderr or []):
                     if print_console:
                         print(line)
                     cmd_out_str += line.replace('\t', '   ') + '\n'
diff --git a/cvs/lib/unittests/test_parallel_ssh_lib.py b/cvs/lib/unittests/test_parallel_ssh_lib.py