diff --git a/.gitignore b/.gitignore index e0a29972..f1654067 100644 --- a/.gitignore +++ b/.gitignore @@ -230,3 +230,6 @@ src/ui/next-env.d.ts !src/ui/public/manifest.json !src/ui/serve.json .eslintcache + +# vllm-sim +bin/ diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index de789ad2..f70fe7cc 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -180,6 +180,17 @@ def benchmark(): "If None, will run until max_seconds or the data is exhausted." ), ) +# TODO: Review Cursor generated code (start) +@click.option( + "--max-error-rate", + type=float, + default=None, + help=( + "The maximum error rate allowed (0.0 to 1.0) before stopping the benchmark. " + "If None, no error rate constraint will be applied." + ), +) +# TODO: Review Cursor generated code (end) @click.option( "--warmup-percent", type=float, @@ -261,6 +272,9 @@ def run( rate, max_seconds, max_requests, + # TODO: Review Cursor generated code (start) + max_error_rate, + # TODO: Review Cursor generated code (end) warmup_percent, cooldown_percent, disable_progress, @@ -288,6 +302,9 @@ def run( rate=rate, max_seconds=max_seconds, max_requests=max_requests, + # TODO: Review Cursor generated code (start) + max_error_rate=max_error_rate, + # TODO: Review Cursor generated code (end) warmup_percent=warmup_percent, cooldown_percent=cooldown_percent, output_sampling=output_sampling, diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index cc251153..38c7780e 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -354,12 +354,53 @@ async def resolve( request_info.request_timings.request_end = time.time() response.request_output_tokens = usage_stats.output_tokens response.request_prompt_tokens = usage_stats.prompt_tokens + # TODO: Review Cursor generated code (start) + logger.debug( + f"OpenAI Backend: Got usage_stats - prompt_tokens={usage_stats.prompt_tokens}, output_tokens={usage_stats.output_tokens}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug what we're actually yielding + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.debug("OpenAI Backend: About to yield response, request_info") + logger.debug( + f"OpenAI Backend: request_info.request_timings id: {id(request_info.request_timings)}" + ) + if request_info.request_timings: + logger.debug( + f"OpenAI Backend: Yielding with first_iteration={request_info.request_timings.first_iteration}, last_iteration={request_info.request_timings.last_iteration}" + ) + else: + logger.debug("OpenAI Backend: Yielding with request_timings=None") + # TODO: Review Cursor generated code (end) yield response, request_info if request_info.request_timings.request_end is None: request_info.request_timings.request_end = time.time() response.delta = None + + # TODO: Review Cursor generated code (start) + # Debug final yield + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.debug( + f"OpenAI Backend: Final yield - request_info.request_timings id: {id(request_info.request_timings)}" + ) + if request_info.request_timings: + logger.debug( + f"OpenAI Backend: Final yield with first_iteration={request_info.request_timings.first_iteration}, last_iteration={request_info.request_timings.last_iteration}" + ) + else: + logger.debug("OpenAI Backend: Final yield with request_timings=None") + # TODO: Review Cursor generated code (end) + yield response, request_info async def text_completions( diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py index 3a0b57c7..0e4a8798 100644 --- a/src/guidellm/benchmark/aggregator.py +++ b/src/guidellm/benchmark/aggregator.py @@ -34,6 +34,11 @@ ) import numpy + +# TODO: Review Cursor generated code (start) +from loguru import logger + +# TODO: Review Cursor generated code (end) from pydantic import Field, PrivateAttr from guidellm.backend import ( @@ -310,7 +315,9 @@ def __call__( "worker_resolve_start_delay", agg_state, request_info.scheduler_timings.resolve_start, - request_info.scheduler_timings.scheduled, + # TODO: Review Cursor generated code (start) + request_info.scheduler_timings.scheduled_at, + # TODO: Review Cursor generated code (end) ) self.add_aggregate_metric( "worker_resolve_time", @@ -479,7 +486,14 @@ def __call__( if ( request_info.status == "completed" + # TODO: Review Cursor generated code (start) + and request_info.request_timings is not None + and hasattr(request_info.request_timings, "first_iteration") + # TODO: Review Cursor generated code (end) and request_info.request_timings.first_iteration is not None + # TODO: Review Cursor generated code (start) + and hasattr(request_info.request_timings, "last_iteration") + # TODO: Review Cursor generated code (end) and request_info.request_timings.last_iteration is not None and response.output_tokens ): @@ -492,7 +506,11 @@ def __call__( ) if ( - request_info.request_timings.first_iteration is not None + # TODO: Review Cursor generated code (start) + request_info.request_timings is not None + and hasattr(request_info.request_timings, "first_iteration") + and request_info.request_timings.first_iteration is not None + # TODO: Review Cursor generated code (end) and request_info.request_timings.request_start is not None ): self.add_aggregate_metric( @@ -503,7 +521,12 @@ def __call__( ) if ( - request_info.request_timings.first_iteration is not None + # TODO: Review Cursor generated code (start) + request_info.request_timings is not None + and hasattr(request_info.request_timings, "first_iteration") + and request_info.request_timings.first_iteration is not None + and hasattr(request_info.request_timings, "last_iteration") + # TODO: Review Cursor generated code (end) and request_info.request_timings.last_iteration is not None and response.output_tokens is not None and response.output_tokens > 1 @@ -674,8 +697,10 @@ def __call__( # Categorize request by status if request_info.status == "completed": agg_state["completed"].append((response, request, request_info)) + elif request_info.status == "canceled": agg_state["incomplete"].append((response, request, request_info)) + else: agg_state["errored"].append((response, request, request_info)) @@ -696,10 +721,29 @@ def compile( :param scheduler_state: Final scheduler execution state. :return: Complete benchmark results with metrics and request statistics. """ + # TODO: Review Cursor generated code (start) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: agg_state keys: {list(agg_state.keys())}" + ) + completed_data = agg_state.get("completed", []) + incomplete_data = agg_state.get("incomplete", []) + errored_data = agg_state.get("errored", []) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: completed={len(completed_data)}, incomplete={len(incomplete_data)}, errored={len(errored_data)}" + ) + # TODO: Review Cursor generated code (end) + successful: list[GenerativeRequestStats] = [ self._create_generate_stats(response, request, request_info) - for (response, request, request_info) in agg_state.get("completed", []) + # TODO: Review Cursor generated code (start) + for (response, request, request_info) in completed_data + # TODO: Review Cursor generated code (end) ] + # TODO: Review Cursor generated code (start) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: Created {len(successful)} successful request stats" + ) + # TODO: Review Cursor generated code (end) incomplete: list[GenerativeRequestStats] = [ self._create_generate_stats(response, request, request_info) for (response, request, request_info) in agg_state.get("incomplete", []) @@ -733,6 +777,104 @@ def compile( ] ) + # TODO: Review Cursor generated code (start) + # Debug logging before StatusBreakdown creation + successful_requests = ( + ( + list( + numpy.random.choice( + successful, size=self.request_samples, replace=False + ) + ) + if self.request_samples is not None + and len(successful) >= self.request_samples + else successful + ) + if successful + else [] + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + incomplete_requests = ( + ( + list( + numpy.random.choice( + incomplete, size=self.request_samples, replace=False + ) + ) + if self.request_samples is not None + and len(incomplete) >= self.request_samples + else incomplete + ) + if incomplete + else [] + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + errored_requests = ( + ( + list( + numpy.random.choice( + errored, size=self.request_samples, replace=False + ) + ) + if self.request_samples is not None + and len(errored) >= self.request_samples + else errored + ) + if errored + else [] + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Rebuild total and total_types from sampled lists to match for metrics calculations + total: list[GenerativeRequestStats] = ( + successful_requests + incomplete_requests + errored_requests + ) + total_types: list[Literal["successful", "incomplete", "error"]] = [ + *["successful"] * len(successful_requests), + *["incomplete"] * len(incomplete_requests), + *["error"] * len(errored_requests), + ] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: About to create StatusBreakdown with successful={len(successful_requests)}, incomplete={len(incomplete_requests)}, errored={len(errored_requests)}" + ) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: request_samples={self.request_samples}" + ) + if successful_requests: + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: First successful request type: {type(successful_requests[0])}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Create the StatusBreakdown object and test it + requests_breakdown = StatusBreakdown( + successful=successful_requests, + incomplete=incomplete_requests, + errored=errored_requests, + ) + logger.debug( + "DEBUG GenerativeRequestsAggregator.compile: StatusBreakdown created" + ) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: StatusBreakdown.successful type: {type(requests_breakdown.successful)}" + ) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: StatusBreakdown.successful length: {len(requests_breakdown.successful) if requests_breakdown.successful is not None else 'None'}" + ) + logger.debug( + f"DEBUG GenerativeRequestsAggregator.compile: StatusBreakdown.successful is None: {requests_breakdown.successful is None}" + ) + # TODO: Review Cursor generated code (end) + return { "start_time": start_time, "end_time": end_time, @@ -742,39 +884,24 @@ def compile( errored=len(errored), total=len(total), ), - "requests": StatusBreakdown( - successful=( - list( - numpy.random.choice( - successful, size=self.request_samples, replace=False - ) - ) - if self.request_samples - else successful - ), - incomplete=( - list( - numpy.random.choice( - incomplete, size=self.request_samples, replace=False - ) - ) - if self.request_samples - else incomplete - ), - errored=( - list( - numpy.random.choice( - errored, size=self.request_samples, replace=False - ) - ) - if self.request_samples - else errored - ), - ), + # TODO: Review Cursor generated code (start) + "requests": requests_breakdown, + # TODO: Review Cursor generated code (end) "metrics": GenerativeMetrics( requests_per_second=( StatusDistributionSummary.from_request_times( - request_types=total_types, + # TODO: Review Cursor generated code (start) + request_types=[ + req_type + for req, req_type in zip(total, total_types) + if ( + req.scheduler_info.request_timings.request_start + is not None + and req.scheduler_info.request_timings.request_end + is not None + ) + ], + # TODO: Review Cursor generated code (end) requests=[ ( req.scheduler_info.request_timings.request_start, @@ -793,7 +920,18 @@ def compile( ), request_concurrency=( StatusDistributionSummary.from_request_times( - request_types=total_types, + # TODO: Review Cursor generated code (start) + request_types=[ + req_type + for req, req_type in zip(total, total_types) + if ( + req.scheduler_info.request_timings.request_start + is not None + and req.scheduler_info.request_timings.request_end + is not None + ) + ], + # TODO: Review Cursor generated code (end) requests=[ ( req.scheduler_info.request_timings.request_start, @@ -812,7 +950,13 @@ def compile( ), request_latency=( StatusDistributionSummary.from_values( - value_types=total_types, + # TODO: Review Cursor generated code (start) + value_types=[ + req_type + for req, req_type in zip(total, total_types) + if req.request_latency is not None + ], + # TODO: Review Cursor generated code (end) values=[ req.request_latency for req in total @@ -933,16 +1077,27 @@ def compile( ], first_iter_times=[ req.scheduler_info.request_timings.first_iteration + # TODO: Review Cursor generated code (start) + if ( + req.scheduler_info.request_timings is not None + and hasattr( + req.scheduler_info.request_timings, + "first_iteration", + ) + and req.scheduler_info.request_timings.first_iteration + is not None + ) + else req.scheduler_info.request_timings.request_start + # TODO: Review Cursor generated code (end) for req in total if req.output_tokens_per_second is not None - and req.scheduler_info.request_timings.first_iteration - is not None ], iter_counts=[ - req.output_tokens + # TODO: Review Cursor generated code (start) + req.output_tokens if req.output_tokens is not None else 1 + # TODO: Review Cursor generated code (end) for req in total if req.output_tokens_per_second is not None - and req.output_tokens is not None ], ) ), @@ -963,11 +1118,25 @@ def compile( ], first_iter_times=[ req.scheduler_info.request_timings.first_iteration + # TODO: Review Cursor generated code (start) + if ( + req.scheduler_info.request_timings is not None + and hasattr( + req.scheduler_info.request_timings, + "first_iteration", + ) + and req.scheduler_info.request_timings.first_iteration + is not None + ) + else req.scheduler_info.request_timings.request_start + # TODO: Review Cursor generated code (end) for req in total if req.tokens_per_second is not None ], iter_counts=[ - req.output_tokens + # TODO: Review Cursor generated code (start) + req.output_tokens if req.output_tokens is not None else 1 + # TODO: Review Cursor generated code (end) for req in total if req.tokens_per_second is not None ], @@ -1053,6 +1222,11 @@ def _create_generate_stats( settings.preferred_output_tokens_source ) + # TODO: Review Cursor generated code (start) + # Debug timing data + timings = request_info.request_timings + # TODO: Review Cursor generated code (end) + return GenerativeRequestStats( request_id=request.request_id, request_type=request.request_type, diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py index 7fe7fc2d..45e7cb92 100644 --- a/src/guidellm/benchmark/benchmarker.py +++ b/src/guidellm/benchmark/benchmarker.py @@ -114,9 +114,9 @@ async def run( request, request_info, scheduler_state, - ) in Scheduler[ - BackendInterface, RequestT, MeasuredRequestTimingsT, ResponseT - ]().run( + # TODO: Review Cursor generated code (start) + ) in Scheduler[RequestT, MeasuredRequestTimingsT, ResponseT]().run( + # TODO: Review Cursor generated code (end) requests=requests, backend=backend, strategy=strategy, @@ -203,7 +203,10 @@ def _compile_benchmark_kwargs( "scheduler": { "strategy": strategy, "constraints": { - key: InfoMixin.extract_from_obj(val) for key, val in constraints + # TODO: Review Cursor generated code (start) + key: InfoMixin.extract_from_obj(val) + for key, val in constraints.items() + # TODO: Review Cursor generated code (end) }, "state": scheduler_state, }, @@ -217,6 +220,11 @@ def _compile_benchmark_kwargs( for key, aggregator in aggregators.items() }, }, + # TODO: Review Cursor generated code (start) + "env_args": InfoMixin.extract_from_obj( + environment + ), # Add required env_args field + # TODO: Review Cursor generated code (end) "system": {}, "extras": {}, } @@ -226,6 +234,31 @@ def _compile_benchmark_kwargs( compiled = aggregator.compile(aggregators_state[key], scheduler_state) + # TODO: Review Cursor generated code (start) + # Handle field name mappings for specific aggregators + if key == "scheduler_stats" and "scheduler_stats" in compiled: + # Map scheduler_stats to run_stats for GenerativeBenchmark + benchmark_kwargs["run_stats"] = compiled["scheduler_stats"] + continue + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if key == "requests": + # Extract fields from GenerativeRequestsAggregator to top level + for field_name in [ + "metrics", + "request_totals", + "start_time", + "end_time", + ]: + if field_name in compiled: + benchmark_kwargs[field_name] = compiled[field_name] + # Extract the requests StatusBreakdown specifically for the requests field + if "requests" in compiled: + benchmark_kwargs[key] = compiled["requests"] + continue + # TODO: Review Cursor generated code (end) + if key not in benchmark_kwargs: benchmark_kwargs[key] = compiled continue diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 53714fab..308127df 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -6,6 +6,11 @@ from typing import Any, Literal from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict + +# TODO: Review Cursor generated code (start) +from loguru import logger + +# TODO: Review Cursor generated code (end) from pydantic import validate_call from rich.console import Console from transformers import ( # type: ignore[import] @@ -60,7 +65,147 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs): """ if isinstance(scenario, GenerativeTextScenario): - return await benchmark_generative_text(**vars(scenario), **kwargs) + # TODO: Review Cursor generated code (start) + # Map scenario fields to function parameters + # Use model_dump() to get values, but handle special cases for problematic fields + scenario_vars = scenario.model_dump() + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug logging to understand the data field issue + logger.debug(f"DEBUG: scenario.data type: {type(scenario.data)}") + logger.debug(f"DEBUG: scenario.data value: {scenario.data}") + logger.debug( + f"DEBUG: scenario_vars['data'] type: {type(scenario_vars.get('data'))}" + ) + logger.debug(f"DEBUG: scenario_vars['data'] value: {scenario_vars.get('data')}") + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Handle the data field specially if it's a ValidatorIterator + # This happens when Pydantic converts string data to an iterable during validation + if "data" in scenario_vars and "ValidatorIterator" in str( + type(scenario_vars["data"]) + ): + logger.debug("DEBUG: Detected ValidatorIterator for data field") + # Try to get the original string value + # For CLI usage, the data should be a string like "prompt_tokens=256,output_tokens=128" + try: + # Access the actual scenario field to get the real string value + actual_data = getattr(scenario, "data", None) + logger.debug( + f"DEBUG: actual_data from scenario.data: {actual_data}, type: {type(actual_data)}" + ) + if isinstance(actual_data, str): + scenario_vars["data"] = actual_data + logger.debug( + f"DEBUG: Updated scenario_vars['data'] to string: {actual_data}" + ) + elif hasattr(actual_data, "__iter__") and not isinstance( + actual_data, (dict, str) + ): + # If it's an iterable, try to extract the string representation + # For now, we'll just keep the original value and let the function handle it + logger.debug( + "DEBUG: data is iterable but not string/dict, keeping original" + ) + except Exception as e: + logger.debug(f"DEBUG: Exception handling ValidatorIterator: {e}") + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + function_params = {} + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Direct mappings (same name) + direct_mapping_fields = [ + "target", + "data", + "random_seed", + "model", + "processor", + "processor_args", + "data_args", + "data_sampler", + "max_seconds", + "max_requests", + "max_error_rate", + "backend_args", + ] + for field in direct_mapping_fields: + if field in scenario_vars: + function_params[field] = scenario_vars[field] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Handle rate specially - only include if not None + if "rate" in scenario_vars and scenario_vars["rate"] is not None: + function_params["rate"] = scenario_vars["rate"] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Field name mappings (different names) + field_mappings = { + "backend_type": "backend", + "rate_type": "profile", + "warmup_percent": "warmup", + "cooldown_percent": "cooldown", + "output_sampling": "request_samples", + } + for scenario_field, function_param in field_mappings.items(): + if scenario_field in scenario_vars: + function_params[function_param] = scenario_vars[scenario_field] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Handle kwargs mappings (CLI parameters to function parameters) + final_kwargs = {} + kwargs_mappings = { + "output_path": "save_path", + } + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + for cli_param, function_param in kwargs_mappings.items(): + if cli_param in kwargs: + final_kwargs[function_param] = kwargs[cli_param] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Handle special kwargs that need transformation + if "show_progress" in kwargs: + # Direct mapping: show_progress=True means print_updates=True + final_kwargs["print_updates"] = kwargs.get("show_progress", True) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Filter out CLI-specific parameters that don't map to function parameters + # These will be handled differently by the function's internal logic + filtered_kwargs = [ + "show_progress_scheduler_stats", + "output_console", + "output_extras", + ] + for kwarg in filtered_kwargs: + # These parameters don't directly map to function parameters, so we skip them + pass + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug logging for function parameters + logger.debug( + f"DEBUG: Final function_params keys: {list(function_params.keys())}" + ) + logger.debug( + f"DEBUG: Final function_params['data']: {function_params.get('data')}, type: {type(function_params.get('data'))}" + ) + logger.debug(f"DEBUG: Final final_kwargs: {final_kwargs}") + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + return await benchmark_generative_text(**function_params, **final_kwargs) + # TODO: Review Cursor generated code (end) else: raise ValueError(f"Unsupported Scenario type {type(scenario)}") @@ -115,6 +260,26 @@ async def benchmark_generative_text( ) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]: console = Console(quiet=not print_updates) + # TODO: Review Cursor generated code (start) + # Fix ValidatorIterator issue: convert it back to string if needed + if "ValidatorIterator" in str(type(data)): + try: + # Try to extract the original string from the ValidatorIterator + # For CLI synthetic data like "prompt_tokens=256,output_tokens=128" + if hasattr(data, "__iter__"): + # Convert iterator to list and reconstruct the string + data_list = list(data) + if len(data_list) > 0 and all( + isinstance(item, str) for item in data_list + ): + # If all items are strings (characters), join them back into the original string + data = "".join(data_list) + elif len(data_list) == 1 and isinstance(data_list[0], str): + data = data_list[0] + except Exception: + pass + # TODO: Review Cursor generated code (end) + backend = ( Backend.create(backend, target=target, model=model, **(backend_args or {})) if not isinstance(backend, Backend) @@ -160,9 +325,23 @@ async def benchmark_generative_text( random_seed=random_seed, ) unique_requests = request_loader.num_unique_items(raise_err=False) + + # TODO: Review Cursor generated code (start) + # Try to get info or description, with fallback + info_str = "GenerativeRequestLoader" + if hasattr(request_loader, "info"): + info_str = request_loader.info + elif hasattr(request_loader, "description"): + info_str = str(request_loader.description) + elif hasattr(request_loader, "data"): + info_str = f"data={request_loader.data}" + # TODO: Review Cursor generated code (end) + console.print( f"[{Colors.SUCCESS}]Request loader created:[/{Colors.SUCCESS}] " - f"with {unique_requests} unique requests, {request_loader.info}" + # TODO: Review Cursor generated code (start) + f"with {unique_requests} unique requests, {info_str}" + # TODO: Review Cursor generated code (end) ) for key, val in { @@ -175,20 +354,54 @@ async def benchmark_generative_text( if val is not None: constraints[key] = val if not isinstance(profile, Profile): - profile = Profile.create( - rate_type=profile, - rate=rate, - random_seed=random_seed, - constraints={**constraints}, - ) + # TODO: Review Cursor generated code (start) + # Fix rate parameter if it's a list with single value + rate_param = rate + if isinstance(rate, list) and len(rate) == 1: + rate_param = rate[0] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Handle rate parameter for different profile types + profile_kwargs = { + "rate_type": profile, + "random_seed": random_seed, + "constraints": {**constraints}, + } + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # For synchronous profiles, rate must be None + if profile == "synchronous": + profile_kwargs["rate"] = None + elif rate_param is not UNSET: + profile_kwargs["rate"] = rate_param + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + profile = Profile.create(**profile_kwargs) + # TODO: Review Cursor generated code (end) elif constraints: raise ValueError( "Constraints must be empty or unset when providing a Profile instance. " f"Provided constraints: {constraints} ; provided profile: {profile}" ) + # TODO: Review Cursor generated code (start) + # Try to get profile info with fallback + profile_info = "" + if hasattr(profile, "info"): + profile_info = profile.info + elif hasattr(profile, "type_"): + profile_info = f"type={profile.type_}" + else: + profile_info = str(profile) + # TODO: Review Cursor generated code (end) + console.print( f"[{Colors.SUCCESS}]Profile created:[/{Colors.SUCCESS}] " - f"{profile.__class__.__name__} {profile.info}" + # TODO: Review Cursor generated code (start) + f"{profile.__class__.__name__} {profile_info}" + # TODO: Review Cursor generated code (end) ) aggregators = ( @@ -209,8 +422,17 @@ async def benchmark_generative_text( f"{len(aggregators)} aggregators: {', '.join(aggregators.keys())}" ) + # TODO: Review Cursor generated code (start) + # Handle UNSET parameter for progress + progress_instances = [] if progress is UNSET or progress is None else progress + progress_enabled = progress is not UNSET and progress is not None + # TODO: Review Cursor generated code (end) + progress_group = BenchmarkerProgressGroup( - instances=progress or [], enabled=progress is not None + # TODO: Review Cursor generated code (start) + instances=progress_instances, + enabled=progress_enabled, + # TODO: Review Cursor generated code (end) ) report = GenerativeBenchmarksReport() console.print(f"[{Colors.INFO}]Starting benchmark run...[/{Colors.INFO}]\n\n\n") @@ -227,7 +449,9 @@ async def benchmark_generative_text( GenerationRequest, GenerationRequestTimings, GenerationResponse, - ].run( + # TODO: Review Cursor generated code (start) + ]().run( + # TODO: Review Cursor generated code (end) requests=request_loader, backend=backend, profile=profile, @@ -249,11 +473,39 @@ async def benchmark_generative_text( ) if outputs == UNSET: - outputs = { - "console": {"save_path": save_path}, - "csv": {"save_path": save_path}, - "html": {"save_path": save_path}, - } + # TODO: Review Cursor generated code (start) + outputs = {"console": {"save_path": save_path}} + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Infer output format from file extension if save_path is provided + if save_path is not None and save_path is not UNSET: + save_path_obj = ( + Path(save_path) if not isinstance(save_path, Path) else save_path + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # If it's a directory, use default JSON + if save_path_obj.is_dir(): + outputs["json"] = {"save_path": save_path} + else: + # Infer format from file extension + extension = save_path_obj.suffix.lower() + if extension == ".json": + # JSON output is handled by report.save_file(), don't add duplicate + pass + elif extension == ".yaml" or extension == ".yml": + # YAML output is handled by report.save_file(), don't add duplicate + pass + elif extension == ".csv": + outputs["csv"] = {"save_path": save_path} + elif extension == ".html" or extension == ".htm": + outputs["html"] = {"save_path": save_path} + else: + # Unknown extension, default to JSON via report.save_file() + pass + # TODO: Review Cursor generated code (end) for key, output in GenerativeBenchmarkerOutput.resolve(outputs or {}).items(): finalized_outputs[key] = await output.finalize(report) console.print( diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 366236ff..d03a57d5 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -47,6 +47,15 @@ class GenerativeBenchmarkerOutput( PydanticClassRegistryMixin[type["GenerativeBenchmarkerOutput"]], ABC ): + # TODO: Review Cursor generated code (start) + @classmethod + def __pydantic_schema_base_type__(cls) -> type[GenerativeBenchmarkerOutput]: + if cls.__name__ == "GenerativeBenchmarkerOutput": + return cls + return GenerativeBenchmarkerOutput + + # TODO: Review Cursor generated code (end) + @classmethod @abstractmethod def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]: @@ -99,7 +108,6 @@ class GenerativeBenchmarkerConsole(GenerativeBenchmarkerOutput): ) @classmethod - @abstractmethod def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]: return {} @@ -164,10 +172,14 @@ def _print_benchmarks_info(self, benchmarks: list[GenerativeBenchmark]): for benchmark in benchmarks: rows.append( [ - str(benchmark.scheduler["strategy"]), - datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S"), - datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S"), - f"{(benchmark.end_time - benchmark.start_time):.1f}", + # TODO: Review Cursor generated code (start) + str(benchmark.scheduler.strategy), + self._safe_format_timestamp(benchmark.start_time), + self._safe_format_timestamp(benchmark.end_time), + f"{(benchmark.end_time - benchmark.start_time):.1f}" + if benchmark.end_time > 0 and benchmark.start_time > 0 + else "N/A", + # TODO: Review Cursor generated code (end) f"{benchmark.request_totals.successful:.0f}", f"{benchmark.request_totals.incomplete:.0f}", f"{benchmark.request_totals.errored:.0f}", @@ -223,7 +235,9 @@ def _print_benchmarks_stats(self, benchmarks: list[GenerativeBenchmark]): for benchmark in benchmarks: rows.append( [ - str(benchmark.scheduler["strategy"]), + # TODO: Review Cursor generated code (start) + str(benchmark.scheduler.strategy), + # TODO: Review Cursor generated code (end) f"{benchmark.metrics.requests_per_second.successful.mean:.2f}", f"{benchmark.metrics.request_concurrency.successful.mean:.2f}", f"{benchmark.metrics.output_tokens_per_second.successful.mean:.1f}", @@ -246,7 +260,9 @@ def _print_benchmarks_stats(self, benchmarks: list[GenerativeBenchmark]): self._print_table(headers, rows, "Benchmarks Stats", sections) def _get_profile_str(self, benchmark: GenerativeBenchmark) -> str: - profile = benchmark.benchmarker.get("profile") + # TODO: Review Cursor generated code (start) + profile = benchmark.benchmarker.profile + # TODO: Review Cursor generated code (end) if profile is None: return "None" @@ -264,12 +280,91 @@ def _get_profile_str(self, benchmark: GenerativeBenchmark) -> str: elif isinstance(profile, AsyncProfile): profile_args["max_concurrency"] = str(profile.max_concurrency) profile_args["rate"] = str(profile.rate) - profile_args["initial_burst"] = str(profile.initial_burst) + # TODO: Review Cursor generated code (start) + profile_args["startup_duration"] = str(profile.startup_duration) + # TODO: Review Cursor generated code (end) elif isinstance(profile, SweepProfile): profile_args["sweep_size"] = str(profile.sweep_size) return ", ".join(f"{key}={value}" for key, value in profile_args.items()) + # TODO: Review Cursor generated code (start) + def _get_scheduler_str(self, benchmark: GenerativeBenchmark) -> str: + scheduler = benchmark.scheduler + scheduler_args = OrderedDict() + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if "strategy" in scheduler: + strategy = scheduler["strategy"] + scheduler_args["strategy"] = getattr(strategy, "type_", str(strategy)) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if "constraints" in scheduler and scheduler["constraints"]: + constraints = scheduler["constraints"] + scheduler_args["constraints"] = ", ".join(constraints.keys()) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + return ( + ", ".join(f"{key}={value}" for key, value in scheduler_args.items()) + if scheduler_args + else "None" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + def _get_env_args_str(self, benchmark: GenerativeBenchmark) -> str: + env_args = benchmark.env_args + if not env_args: + return "None" + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Extract key-value pairs from env_args using model_dump() for Pydantic objects + args_items = [] + try: + env_dict = ( + env_args.model_dump() + if hasattr(env_args, "model_dump") + else dict(env_args) + ) + for key, value in env_dict.items(): + if isinstance(value, (str, int, float, bool)): + args_items.append(f"{key}={value}") + elif value is None: + args_items.append(f"{key}=None") + except Exception: + # Fallback: return string representation + return str(env_args) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + return ", ".join(args_items) if args_items else "None" + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + def _safe_format_timestamp(self, timestamp: float) -> str: + """ + Safely format a timestamp, handling invalid values. + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + :param timestamp: Unix timestamp to format + :return: Formatted time string or "N/A" for invalid timestamps + """ + try: + # Check if timestamp is valid (positive and within reasonable range) + if ( + timestamp <= 0 or timestamp > 2147483647 + ): # Max 32-bit timestamp (year 2038) + return "N/A" + return datetime.fromtimestamp(timestamp).strftime("%H:%M:%S") + except (ValueError, OverflowError, OSError): + return "N/A" + # TODO: Review Cursor generated code (end) + def _get_args_str(self, benchmark: GenerativeBenchmark) -> str: args = benchmark.args args_dict = OrderedDict( @@ -288,7 +383,17 @@ def _get_extras_str(self, benchmark: GenerativeBenchmark) -> str: extras = benchmark.extras if not extras: return "None" - return ", ".join(f"{key}={value}" for key, value in extras.items()) + + # TODO: Review Cursor generated code (start) + try: + extras_dict = ( + extras.model_dump() if hasattr(extras, "model_dump") else dict(extras) + ) + return ", ".join(f"{key}={value}" for key, value in extras_dict.items()) + except Exception: + # Fallback: return string representation + return str(extras) + # TODO: Review Cursor generated code (end) def _print_section_header(self, title: str, indent: int = 0, new_lines: int = 2): self._print_line( @@ -493,7 +598,6 @@ class GenerativeBenchmarkerCSV(GenerativeBenchmarkerOutput): DEFAULT_FILE: ClassVar[str] = "benchmarks.json" @classmethod - @abstractmethod def validated_kwargs(cls, save_path: str | Path | None, **kwargs) -> dict[str, Any]: new_kwargs = {} if save_path is not None: @@ -574,7 +678,9 @@ def _get_benchmark_desc_headers_and_values( benchmark.type_, benchmark.run_id, benchmark.id_, - str(benchmark.args.strategy), + # TODO: Review Cursor generated code (start) + str(benchmark.scheduler.strategy), + # TODO: Review Cursor generated code (end) datetime.fromtimestamp(benchmark.start_time).strftime("%Y-%m-%d %H:%M:%S"), datetime.fromtimestamp(benchmark.end_time).strftime("%Y-%m-%d %H:%M:%S"), benchmark.duration, @@ -585,12 +691,62 @@ def _get_benchmark_extras_headers_and_values( self, benchmark: GenerativeBenchmark ) -> tuple[list[str], list[str]]: """Get extra fields headers and values for a benchmark.""" - headers = ["Args", "Worker", "Request Loader", "Extras"] + # TODO: Review Cursor generated code (start) + headers = ["Benchmarker", "Environment", "Scheduler", "Extras"] + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Use available fields with safe access for Pydantic objects + try: + benchmarker_data = ( + benchmark.benchmarker.model_dump() + if hasattr(benchmark.benchmarker, "model_dump") + else str(benchmark.benchmarker) + ) + except Exception: + benchmarker_data = str(benchmark.benchmarker) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + try: + env_data = ( + benchmark.env_args.model_dump() + if hasattr(benchmark.env_args, "model_dump") + else str(benchmark.env_args) + ) + except Exception: + env_data = str(benchmark.env_args) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + try: + scheduler_data = ( + benchmark.scheduler.model_dump() + if hasattr(benchmark.scheduler, "model_dump") + else str(benchmark.scheduler) + ) + except Exception: + scheduler_data = str(benchmark.scheduler) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + try: + extras_data = ( + benchmark.extras.model_dump() + if hasattr(benchmark.extras, "model_dump") + else str(benchmark.extras) + ) + except Exception: + extras_data = str(benchmark.extras) + # TODO: Review Cursor generated code (end) + values: list[str] = [ - json.dumps(benchmark.args.model_dump()), - json.dumps(benchmark.worker.model_dump()), - json.dumps(benchmark.request_loader.model_dump()), - json.dumps(benchmark.extras), + # TODO: Review Cursor generated code (start) + json.dumps(benchmarker_data), + json.dumps(env_data), + json.dumps(scheduler_data), + json.dumps(extras_data), + # TODO: Review Cursor generated code (end) ] return headers, values @@ -655,7 +811,6 @@ class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput): DEFAULT_FILE: ClassVar[str] = "benchmarks.html" @classmethod - @abstractmethod def validated_kwargs(cls, save_path: str | Path | None, **kwargs) -> dict[str, Any]: new_kwargs = {} if save_path is not None: diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py index 15e3cd81..74f845bb 100644 --- a/src/guidellm/benchmark/scenario.py +++ b/src/guidellm/benchmark/scenario.py @@ -100,6 +100,9 @@ class Config: ) max_seconds: PositiveFloat | None = None max_requests: PositiveInt | None = None + # TODO: Review Cursor generated code (start) + max_error_rate: Annotated[float | None, Field(gt=0, lt=1)] = None + # TODO: Review Cursor generated code (end) warmup_percent: Annotated[float | None, Field(gt=0, le=1)] = None cooldown_percent: Annotated[float | None, Field(gt=0, le=1)] = None output_sampling: NonNegativeInt | None = None diff --git a/src/guidellm/presentation/data_models.py b/src/guidellm/presentation/data_models.py index 2c0c8977..6f31ee74 100644 --- a/src/guidellm/presentation/data_models.py +++ b/src/guidellm/presentation/data_models.py @@ -67,7 +67,25 @@ class RunInfo(BaseModel): @classmethod def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]): - model = benchmarks[0].worker.backend_model or "N/A" + # TODO: Review Cursor generated code (start) + # Try to extract model from benchmarker.backend with safe fallbacks + model = "N/A" + try: + backend = benchmarks[0].benchmarker.backend + if isinstance(backend, dict) and "model" in backend: + model = backend["model"] or "N/A" + elif hasattr(backend, "model"): + model = getattr(backend, "model", "N/A") or "N/A" + elif isinstance(backend, dict) and "info" in backend: + # Try to extract model from info string + info = backend["info"] + if isinstance(info, str) and "model" in info.lower(): + model = info + else: + model = "N/A" + except Exception: + model = "N/A" + # TODO: Review Cursor generated code (end) timestamp = max( bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None ) @@ -108,8 +126,31 @@ class WorkloadDetails(BaseModel): @classmethod def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]): - target = benchmarks[0].worker.backend_target - rate_type = benchmarks[0].args.profile.type_ + # TODO: Review Cursor generated code (start) + # Try to extract target from benchmarker.backend with safe fallbacks + target = "N/A" + try: + backend = benchmarks[0].benchmarker.backend + if isinstance(backend, dict) and "target" in backend: + target = backend["target"] or "N/A" + elif hasattr(backend, "target"): + target = getattr(backend, "target", "N/A") or "N/A" + except Exception: + target = "N/A" + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Try to extract rate_type from benchmarker.profile with safe fallbacks + rate_type = "N/A" + try: + profile = benchmarks[0].benchmarker.profile + if hasattr(profile, "type_"): + rate_type = getattr(profile, "type_", "N/A") or "N/A" + elif isinstance(profile, dict) and "type_" in profile: + rate_type = profile["type_"] or "N/A" + except Exception: + rate_type = "N/A" + # TODO: Review Cursor generated code (end) successful_requests = [ req for bm in benchmarks for req in bm.requests.successful ] @@ -155,10 +196,14 @@ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]): min_start_time = benchmarks[0].run_stats.start_time all_req_times = [ - req.start_time - min_start_time + # TODO: Review Cursor generated code (start) + req.scheduler_info.request_timings.request_start - min_start_time + # TODO: Review Cursor generated code (end) for bm in benchmarks for req in bm.requests.successful - if req.start_time is not None + # TODO: Review Cursor generated code (start) + if req.scheduler_info.request_timings.request_start is not None + # TODO: Review Cursor generated code (end) ] number_of_buckets = len(benchmarks) request_over_time_buckets, bucket_width = Bucket.from_data( diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py index d173d80c..d82db99c 100644 --- a/src/guidellm/scheduler/worker.py +++ b/src/guidellm/scheduler/worker.py @@ -134,6 +134,20 @@ def run(self): try: asyncio.run(self.run_async()) except Exception as exc: + # TODO: Review Cursor generated code (start) + # Add detailed logging for debugging worker failures + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.error( + f"Worker process {self.local_rank} detailed error: {type(exc).__name__}: {exc}" + ) + logger.error( + f"Worker process {self.local_rank} error traceback:", exc_info=True + ) + # TODO: Review Cursor generated code (end) + self.error_event.set() raise RuntimeError( f"Worker process {self.local_rank} encountered an error: {exc}" @@ -383,6 +397,52 @@ async def _process_next_request(self): # Complete request_info.scheduler_timings.resolve_end = time.time() + + # TODO: Review Cursor generated code (start) + # Debug timing data before final update + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if ( + hasattr(request_info, "request_timings") + and request_info.request_timings + ): + logger.debug( + f"Worker: Final timing data before completed update: " + f"first_iteration={getattr(request_info.request_timings, 'first_iteration', None)}, " + f"last_iteration={getattr(request_info.request_timings, 'last_iteration', None)}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug timing data before final update + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if ( + hasattr(request_info, "request_timings") + and request_info.request_timings + ): + logger.debug( + f"Worker: Before final update - timing type: {type(request_info.request_timings)}" + ) + if hasattr(request_info.request_timings, "first_iteration"): + logger.debug( + f"Worker: Before final update - first_iteration: {getattr(request_info.request_timings, 'first_iteration', 'N/A')}" + ) + logger.debug( + f"Worker: Before final update - last_iteration: {getattr(request_info.request_timings, 'last_iteration', 'N/A')}" + ) + else: + logger.debug( + "Worker: Before final update - NO first_iteration attribute" + ) + else: + logger.debug("Worker: Before final update - NO request_timings") + # TODO: Review Cursor generated code (end) + await self._handle_request_update( new_status="completed", response=response, @@ -448,8 +508,55 @@ async def _handle_request_update( ): # Haven't sent resolved update yet request_info.status = new_status + + # TODO: Review Cursor generated code (start) + # Debug before model_copy + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + if ( + hasattr(request_info, "request_timings") + and request_info.request_timings + ): + logger.debug( + f"Worker: Before model_copy - timing type: {type(request_info.request_timings)}" + ) + if hasattr(request_info.request_timings, "first_iteration"): + logger.debug( + f"Worker: Before model_copy - first_iteration: {getattr(request_info.request_timings, 'first_iteration', 'N/A')}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + copied_info = request_info.model_copy(deep=True) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug after model_copy + if ( + hasattr(copied_info, "request_timings") + and copied_info.request_timings + ): + logger.debug( + f"Worker: After model_copy - timing type: {type(copied_info.request_timings)}" + ) + if hasattr(copied_info.request_timings, "first_iteration"): + logger.debug( + f"Worker: After model_copy - first_iteration: {getattr(copied_info.request_timings, 'first_iteration', 'N/A')}" + ) + else: + logger.debug( + "Worker: After model_copy - NO first_iteration attribute" + ) + else: + logger.debug("Worker: After model_copy - NO request_timings") + # TODO: Review Cursor generated code (end) + await self.pending_updates_queue.async_put( - (response, request, request_info.model_copy()) + # TODO: Review Cursor generated code (start) + (response, request, copied_info) + # TODO: Review Cursor generated code (end) ) prev_status = new_status diff --git a/src/guidellm/scheduler/worker_group.py b/src/guidellm/scheduler/worker_group.py index d4a3fc2d..4680c6e1 100644 --- a/src/guidellm/scheduler/worker_group.py +++ b/src/guidellm/scheduler/worker_group.py @@ -300,10 +300,15 @@ async def shutdown(self) -> list[Exception]: # noqa: C901 """ exceptions: list[Exception] = [] + # TODO: Review Cursor generated code (start) + # Signal shutdown to workers but don't clear the event yet + # TODO: Review Cursor generated code (end) if self.shutdown_event is not None: self.shutdown_event.set() - self.shutdown_event = None + # TODO: Review Cursor generated code (start) + # Cancel and wait for background tasks first before clearing events + # TODO: Review Cursor generated code (end) cancel_tasks = [ task for task in (self.populate_requests_task, self.populate_updates_task) @@ -332,7 +337,13 @@ async def shutdown(self) -> list[Exception]: # noqa: C901 self.processes = None self.mp_context = None + # TODO: Review Cursor generated code (start) + # Now it's safe to clear the events since tasks are cancelled + # TODO: Review Cursor generated code (end) self.startup_barrier = None + # TODO: Review Cursor generated code (start) + self.shutdown_event = None + # TODO: Review Cursor generated code (end) self.error_event = None self.requests_queue = None self.updates_queue = None @@ -450,6 +461,20 @@ def _populate_requests_generator(self, scheduler_start_time: float): ) yield None # Yield to check for error in wrapper to stop except Exception as err: # noqa: BLE001 + # TODO: Review Cursor generated code (start) + # Add detailed logging for debugging worker group request processing failures + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.error( + f"WorkerProcessGroup request processing error: {type(err).__name__}: {err}" + ) + logger.error( + "WorkerProcessGroup request processing traceback:", exc_info=True + ) + # TODO: Review Cursor generated code (end) + self.error_event.set() raise err finally: @@ -489,17 +514,59 @@ def _populate_requests_next_message( ) -> tuple[tuple[bytes, bytes] | None, bool]: try: request = next(request_iter) - request_info = ScheduledRequestInfo[MeasuredRequestTimingsT]( + # TODO: Review Cursor generated code (start) + # Initialize the request_timings based on backend type + from guidellm.backend.objects import GenerationRequestTimings + from guidellm.backend.openai import OpenAIHTTPBackend + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + request_timings = None + if isinstance(self.backend, OpenAIHTTPBackend): + request_timings = GenerationRequestTimings() + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + request_info = ScheduledRequestInfo[GenerationRequestTimings]( + # TODO: Review Cursor generated code (end) request_id=( request if isinstance(request, str) - else getattr(request, "id_", getattr(request, "id", id(request))) + # TODO: Review Cursor generated code (start) + else str( + getattr(request, "id_", getattr(request, "id", id(request))) + ) + # TODO: Review Cursor generated code (end) ), status="queued", scheduler_node_id=-1, scheduler_process_id=0, scheduler_start_time=scheduler_start_time, + # TODO: Review Cursor generated code (start) + request_timings=request_timings, + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug: Check what type the request_timings actually has + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.debug( + f"WorkerGroup: Created request_info with request_timings type: {type(request_info.request_timings)}" + # TODO: Review Cursor generated code (end) ) + # TODO: Review Cursor generated code (start) + if hasattr(request_info.request_timings, "first_iteration"): + logger.debug( + "WorkerGroup: request_timings HAS first_iteration attribute" + ) + else: + logger.debug( + "WorkerGroup: request_timings MISSING first_iteration attribute" + ) + # TODO: Review Cursor generated code (end) state, continue_requests, _ = self._update_state(request_info) request_msg = MsgpackEncoding.encode((request, request_info)) @@ -558,6 +625,20 @@ def _populate_updates_generator(self): yield None # Yield to check for error in wrapper to stop except Exception as err: # noqa: BLE001 + # TODO: Review Cursor generated code (start) + # Add detailed logging for debugging worker group updates processing failures + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.error( + f"WorkerProcessGroup updates processing error: {type(err).__name__}: {err}" + ) + logger.error( + "WorkerProcessGroup updates processing traceback:", exc_info=True + ) + # TODO: Review Cursor generated code (end) + self.error_event.set() raise err finally: @@ -568,8 +649,95 @@ def _populate_updates_process_next( ) -> tuple[SchedulerState | None, bool]: try: message = self.updates_queue.get(timeout=settings.scheduler_poll_interval) + # TODO: Review Cursor generated code (start) + # Debug raw message before decoding + from loguru import logger + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + logger.debug( + f"WorkerGroup: Raw message type: {type(message)}, size: {len(message) if hasattr(message, '__len__') else 'unknown'}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Debug the decoded structure to see what's inside + try: + import msgpack + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + raw_decoded = msgpack.unpackb(message, strict_map_key=False) + logger.debug( + f"WorkerGroup: Raw msgpack decoded type: {type(raw_decoded)}" + ) + if isinstance(raw_decoded, (list, tuple)) and len(raw_decoded) >= 3: + _, _, request_info_data = raw_decoded + logger.debug( + f"WorkerGroup: Raw request_info_data type: {type(request_info_data)}" + ) + if isinstance(request_info_data, dict): + logger.debug( + f"WorkerGroup: Raw request_info_data keys: {list(request_info_data.keys())}" + ) + # TODO: Review Cursor generated code (end) + + # TODO: Review Cursor generated code (start) + # Check the actual Pydantic data inside the 'data' key + if "data" in request_info_data: + pydantic_data = request_info_data["data"] + logger.debug( + f"WorkerGroup: Pydantic data type: {type(pydantic_data)}" + ) + if isinstance(pydantic_data, dict): + logger.debug( + f"WorkerGroup: Pydantic data has request_timings: {'request_timings' in pydantic_data}" + ) + if "request_timings" in pydantic_data: + rt_data = pydantic_data["request_timings"] + logger.debug( + f"WorkerGroup: Raw request_timings data: {rt_data}" + ) + if isinstance(rt_data, dict): + logger.debug( + f"WorkerGroup: Raw first_iteration: {rt_data.get('first_iteration')}" + ) + logger.debug( + f"WorkerGroup: Raw last_iteration: {rt_data.get('last_iteration')}" + ) + else: + logger.debug( + f"WorkerGroup: Pydantic data keys: {list(pydantic_data.keys())}" + ) + else: + logger.debug( + "WorkerGroup: No 'data' key in request_info_data" + ) + except Exception as e: + logger.debug(f"WorkerGroup: Error inspecting raw msgpack: {e}") + # TODO: Review Cursor generated code (end) + response, request, request_info = MsgpackEncoding.decode(message) + # TODO: Review Cursor generated code (start) + # Debug timing data received from worker process + if ( + hasattr(request_info, "request_timings") + and request_info.request_timings + ): + logger.debug( + f"WorkerGroup: Received timing data from worker: " + f"status={request_info.status}, " + f"first_iteration={getattr(request_info.request_timings, 'first_iteration', None)}, " + f"last_iteration={getattr(request_info.request_timings, 'last_iteration', None)}" + ) + else: + logger.debug( + f"WorkerGroup: Received from worker with no timing data: status={request_info.status}, " + f"request_timings type: {type(getattr(request_info, 'request_timings', None))}" + ) + # TODO: Review Cursor generated code (end) + scheduler_state, _, continue_updates = self._update_state(request_info) self.pending_updates_queue.sync_put( (response, request, request_info, scheduler_state) diff --git a/src/guidellm/utils/encoding.py b/src/guidellm/utils/encoding.py index 42a94822..641757c0 100644 --- a/src/guidellm/utils/encoding.py +++ b/src/guidellm/utils/encoding.py @@ -76,7 +76,9 @@ def to_primitive(cls, obj: Any) -> Any: encoded = { cls.PYDANTIC_TAG: f"{origin.__module__}.{origin.__name__}", - cls.PYDANTIC_DATA: obj.model_dump(), + # TODO: Review Cursor generated code (start) + cls.PYDANTIC_DATA: obj.model_dump(exclude_none=False), + # TODO: Review Cursor generated code (end) } if args: diff --git a/tests/e2e/README.md b/tests/e2e/README.md new file mode 100644 index 00000000..c29c148d --- /dev/null +++ b/tests/e2e/README.md @@ -0,0 +1,12 @@ +# E2E tests + +The E2E tests in GuideLLM use the [vLLM simulator by llm-d](https://llm-d.ai/docs/architecture/Components/inf-simulator), to run them run the following command: + +```shell +docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./ +``` + +Then to run the tests: +```shell +tox -e test-e2e +``` diff --git a/tests/e2e/test_max_error_benchmark.py b/tests/e2e/test_max_error_benchmark.py new file mode 100644 index 00000000..6079b21c --- /dev/null +++ b/tests/e2e/test_max_error_benchmark.py @@ -0,0 +1,72 @@ +# E2E test for max error rate constraint functionality + +from pathlib import Path + +import pytest + +from tests.e2e.utils import ( + GuidellmClient, + assert_constraint_triggered, + assert_no_python_exceptions, + cleanup_report_file, + load_benchmark_report, +) +from tests.e2e.vllm_sim_server import VllmSimServer + + +@pytest.fixture(scope="module") +def server(): + """ + Pytest fixture to start and stop the server for the entire module + using the TestServer class. + """ + server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo") + try: + server.start() + yield server # Yield the URL for tests to use + finally: + server.stop() # Teardown: Stop the server after tests are done + + +@pytest.mark.timeout(30) +def test_max_error_benchmark(server: VllmSimServer): + """ + Test that the max error rate constraint is properly triggered when server goes down. + """ + report_path = Path("tests/e2e/max_error_benchmarks.json") + rate = 10 + max_error_rate = 0.1 + + # Create and configure the guidellm client + client = GuidellmClient(target=server.get_url(), output_path=report_path) + + try: + # Start the benchmark + client.start_benchmark( + rate=rate, + max_seconds=25, + max_error_rate=max_error_rate, + ) + + # Wait for the benchmark to complete (server will be stopped after 10 seconds) + client.wait_for_completion(timeout=30, stop_server_after=10, server=server) + + # Assert no Python exceptions occurred + assert_no_python_exceptions(client.stderr) + + # Load and validate the report + report = load_benchmark_report(report_path) + benchmark = report["benchmarks"][0] + + # Check that the max error rate constraint was triggered + assert_constraint_triggered( + benchmark, + "max_error_rate", + { + "exceeded_error_rate": True, + "current_error_rate": lambda rate: rate >= max_error_rate, + }, + ) + + finally: + cleanup_report_file(report_path) diff --git a/tests/e2e/test_placeholder.py b/tests/e2e/test_placeholder.py deleted file mode 100644 index 0d35031c..00000000 --- a/tests/e2e/test_placeholder.py +++ /dev/null @@ -1,6 +0,0 @@ -import pytest - - -@pytest.mark.smoke -def test_placeholder(): - assert True diff --git a/tests/e2e/test_successful_benchmark.py b/tests/e2e/test_successful_benchmark.py new file mode 100644 index 00000000..8f0181a3 --- /dev/null +++ b/tests/e2e/test_successful_benchmark.py @@ -0,0 +1,120 @@ +# E2E tests for successful benchmark scenarios with timing validation + +from pathlib import Path + +import pytest + +from tests.e2e.utils import ( + GuidellmClient, + assert_constraint_triggered, + assert_no_python_exceptions, + assert_successful_requests_fields, + cleanup_report_file, + load_benchmark_report, +) +from tests.e2e.vllm_sim_server import VllmSimServer + + +@pytest.fixture(scope="module") +def server(): + """ + Pytest fixture to start and stop the server for the entire module + using the TestServer class. + """ + server = VllmSimServer( + port=8000, + model="databricks/dolly-v2-12b", + mode="echo", + time_to_first_token=1, # 1ms TTFT + inter_token_latency=1, # 1ms ITL + ) + try: + server.start() + yield server # Yield the URL for tests to use + finally: + server.stop() # Teardown: Stop the server after tests are done + + +@pytest.mark.timeout(30) +def test_max_seconds_benchmark(server: VllmSimServer): + """ + Test that the max seconds constraint is properly triggered. + """ + report_path = Path("tests/e2e/max_duration_benchmarks.json") + rate = 10 + + # Create and configure the guidellm client + client = GuidellmClient(target=server.get_url(), output_path=report_path) + + try: + # Start the benchmark + client.start_benchmark( + rate=rate, + max_seconds=1, + ) + + # Wait for the benchmark to complete + client.wait_for_completion(timeout=30) + + # Assert no Python exceptions occurred + assert_no_python_exceptions(client.stderr) + + # Load and validate the report + report = load_benchmark_report(report_path) + benchmark = report["benchmarks"][0] + + # Check that the max duration constraint was triggered + assert_constraint_triggered( + benchmark, "max_seconds", {"duration_exceeded": True} + ) + + # Validate successful requests have all expected fields + successful_requests = benchmark["requests"]["successful"] + assert_successful_requests_fields(successful_requests) + + finally: + cleanup_report_file(report_path) + + +@pytest.mark.timeout(30) +def test_max_requests_benchmark(server: VllmSimServer): + """ + Test that the max requests constraint is properly triggered. + """ + report_path = Path("tests/e2e/max_number_benchmarks.json") + rate = 10 + + # Create and configure the guidellm client + client = GuidellmClient(target=server.get_url(), output_path=report_path) + + try: + # Start the benchmark + client.start_benchmark( + rate=rate, + max_requests=rate, + ) + + # Wait for the benchmark to complete + client.wait_for_completion(timeout=30) + + # Assert no Python exceptions occurred + assert_no_python_exceptions(client.stderr) + + # Load and validate the report + report = load_benchmark_report(report_path) + benchmark = report["benchmarks"][0] + + # Check that the max requests constraint was triggered + assert_constraint_triggered( + benchmark, "max_requests", {"processed_exceeded": True} + ) + + # Validate successful requests have all expected fields + successful_requests = benchmark["requests"]["successful"] + assert len(successful_requests) == rate, ( + f"Expected {rate} successful requests, got {len(successful_requests)}" + ) + assert_successful_requests_fields(successful_requests) + + finally: + cleanup_report_file(report_path) diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py new file mode 100644 index 00000000..9357949c --- /dev/null +++ b/tests/e2e/utils.py @@ -0,0 +1,327 @@ +"""Utilities for E2E tests.""" + +import json +import subprocess +import sys +import time +from pathlib import Path +from typing import Optional + +from loguru import logger + + +def get_guidellm_executable() -> str: + """Get the path to the guidellm executable in the current environment.""" + # Get the directory where the current Python executable is located + python_bin_dir = Path(sys.executable).parent + guidellm_path = python_bin_dir / "guidellm" + if guidellm_path.exists(): + return str(guidellm_path) + else: + # Fallback to just "guidellm" if not found + return "guidellm" + + +class GuidellmClient: + """Wrapper class for running guidellm benchmark commands.""" + + def __init__(self, target: str, output_path: Path): + """ + Initialize the guidellm client. + + :param target: The target URL for the benchmark + :param output_path: Path where the benchmark report will be saved + """ + self.target = target + self.output_path = output_path + self.process: Optional[subprocess.Popen] = None + self.stdout: Optional[str] = None + self.stderr: Optional[str] = None + + def start_benchmark( + self, + rate_type: str = "constant", + rate: int = 10, + max_seconds: Optional[int] = None, + max_requests: Optional[int] = None, + max_error_rate: Optional[float] = None, + data: str = "prompt_tokens=256,output_tokens=128", + processor: str = "gpt2", + additional_args: str = "", + ) -> None: + """ + Start a guidellm benchmark command. + + :param rate_type: Type of rate control (constant, etc.) + :param rate: Request rate + :param max_seconds: Maximum duration in seconds + :param max_requests: Maximum number of requests + :param max_error_rate: Maximum error rate before stopping + :param data: Data configuration string + :param processor: Processor/tokenizer to use + :param additional_args: Additional command line arguments + """ + guidellm_exe = get_guidellm_executable() + + # Build command components + cmd_parts = [ + f"GUIDELLM__MAX_CONCURRENCY=10 GUIDELLM__MAX_WORKER_PROCESSES=10 {guidellm_exe} benchmark", + f'--target "{self.target}"', + f"--rate-type {rate_type}", + f"--rate {rate}", + ] + + if max_seconds is not None: + cmd_parts.append(f"--max-seconds {max_seconds}") + + if max_requests is not None: + cmd_parts.append(f"--max-requests {max_requests}") + + if max_error_rate is not None: + cmd_parts.append(f"--max-error-rate {max_error_rate}") + + cmd_parts.extend( + [ + f'--data "{data}"', + f'--processor "{processor}"', + f"--output-path {self.output_path}", + ] + ) + + if additional_args: + cmd_parts.append(additional_args) + + command = " \\\n ".join(cmd_parts) + + logger.info(f"Client command: {command}") + + self.process = subprocess.Popen( # noqa: S603 + ["/bin/bash", "-c", command], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + def wait_for_completion( + self, timeout: int = 30, stop_server_after: Optional[int] = None, server=None + ) -> None: + """ + Wait for the benchmark to complete. + + :param timeout: Maximum time to wait for completion + :param stop_server_after: If provided, stop the server after this many seconds + :param server: Server object to stop (if stop_server_after is provided) + """ + if self.process is None: + raise RuntimeError("No process started. Call start_benchmark() first.") + + if stop_server_after is not None and server is not None: + logger.info( + f"Waiting {stop_server_after} seconds before stopping server..." + ) + time.sleep(stop_server_after) + server.stop() + + try: + logger.info("Fetching client output") + self.stdout, self.stderr = self.process.communicate(timeout=timeout) + logger.debug(f"Client stdout:\n{self.stdout}") + logger.debug(f"Client stderr:\n{self.stderr}") + + except subprocess.TimeoutExpired: + logger.warning("Client did not complete within timeout, terminating...") + self.process.terminate() + try: + self.stdout, self.stderr = self.process.communicate(timeout=5) + except subprocess.TimeoutExpired: + logger.warning("Client did not terminate gracefully, killing it...") + self.process.kill() + self.stdout, self.stderr = self.process.communicate() + finally: + if self.process and self.process.poll() is None: + self.process.terminate() + try: + self.process.wait(timeout=5) + logger.info("Client stopped successfully.") + except subprocess.TimeoutExpired: + logger.warning("Client did not terminate gracefully, killing it...") + self.process.kill() + self.process.wait() + + +def assert_no_python_exceptions(stderr: Optional[str]) -> None: + """ + Assert that stderr does not contain any Python exception indicators. + + :param stderr: The stderr string to check (can be None) + :raises AssertionError: If Python exceptions are detected + """ + if stderr is None: + return # No stderr to check + + python_exception_indicators = [ + "Traceback (most recent call last):", + "AttributeError:", + "ValueError:", + "TypeError:", + "KeyError:", + "IndexError:", + "NameError:", + "ImportError:", + "RuntimeError:", + ] + + for indicator in python_exception_indicators: + assert indicator not in stderr, ( + f"Python exception detected in stderr: {indicator}" + ) + + +def load_benchmark_report(report_path: Path) -> dict: + """ + Load and validate a benchmark report JSON file. + + :param report_path: Path to the report file + :return: The loaded report dictionary + :raises AssertionError: If the file doesn't exist or is invalid + """ + assert report_path.exists(), f"Report file does not exist: {report_path}" + + with report_path.open("r") as f: + report = json.load(f) + + assert "benchmarks" in report, "Report missing 'benchmarks' field" + benchmarks = report["benchmarks"] + assert len(benchmarks) > 0, "Report contains no benchmarks" + + return report + + +def assert_successful_requests_fields(successful_requests: list) -> None: + """ + Assert that successful requests contain all expected timing and token fields. + + :param successful_requests: List of successful request objects + :raises AssertionError: If required fields are missing or invalid + """ + assert len(successful_requests) >= 1, "No successful requests found" + + for request in successful_requests: + # Basic latency + assert "request_latency" in request, "Missing 'request_latency' field" + assert request["request_latency"] > 0, "request_latency should be > 0" + + # Streaming timing fields + assert "time_to_first_token_ms" in request, ( + "Missing 'time_to_first_token_ms' field" + ) + assert request["time_to_first_token_ms"] is not None, ( + "time_to_first_token_ms should not be None" + ) + assert request["time_to_first_token_ms"] > 0, ( + "time_to_first_token_ms should be > 0" + ) + + assert "time_per_output_token_ms" in request, ( + "Missing 'time_per_output_token_ms' field" + ) + assert request["time_per_output_token_ms"] is not None, ( + "time_per_output_token_ms should not be None" + ) + assert request["time_per_output_token_ms"] > 0, ( + "time_per_output_token_ms should be > 0" + ) + + assert "inter_token_latency_ms" in request, ( + "Missing 'inter_token_latency_ms' field" + ) + assert request["inter_token_latency_ms"] is not None, ( + "inter_token_latency_ms should not be None" + ) + assert request["inter_token_latency_ms"] > 0, ( + "inter_token_latency_ms should be > 0" + ) + + # Token throughput fields + assert "tokens_per_second" in request, "Missing 'tokens_per_second' field" + assert request["tokens_per_second"] > 0, "tokens_per_second should be > 0" + + assert "output_tokens_per_second" in request, ( + "Missing 'output_tokens_per_second' field" + ) + assert request["output_tokens_per_second"] > 0, ( + "output_tokens_per_second should be > 0" + ) + + # Token count fields + assert "total_tokens" in request, "Missing 'total_tokens' field" + assert request["total_tokens"] > 0, "total_tokens should be > 0" + + assert "prompt_tokens" in request, "Missing 'prompt_tokens' field" + assert request["prompt_tokens"] > 0, "prompt_tokens should be > 0" + + assert "output_tokens" in request, "Missing 'output_tokens' field" + assert request["output_tokens"] > 0, "output_tokens should be > 0" + + +def assert_constraint_triggered( + benchmark: dict, constraint_name: str, expected_metadata: dict +) -> None: + """ + Assert that a specific constraint was triggered with expected metadata. + + :param benchmark: The benchmark object + :param constraint_name: Name of the constraint (e.g., 'max_seconds', 'max_requests', 'max_error_rate') + :param expected_metadata: Dictionary of expected metadata fields and values + :raises AssertionError: If constraint was not triggered or metadata is incorrect + """ + assert "scheduler" in benchmark, "Benchmark missing 'scheduler' field" + scheduler = benchmark["scheduler"] + + assert "state" in scheduler, "Scheduler missing 'state' field" + state = scheduler["state"] + + assert "end_processing_constraints" in state, ( + "State missing 'end_processing_constraints' field" + ) + constraints = state["end_processing_constraints"] + + assert constraint_name in constraints, ( + f"Constraint '{constraint_name}' was not triggered" + ) + constraint = constraints[constraint_name] + + assert "metadata" in constraint, ( + f"Constraint '{constraint_name}' missing 'metadata' field" + ) + metadata = constraint["metadata"] + + for key, expected_value in expected_metadata.items(): + assert key in metadata, ( + f"Constraint '{constraint_name}' metadata missing '{key}' field" + ) + actual_value = metadata[key] + + if isinstance(expected_value, bool): + assert actual_value is expected_value, ( + f"Expected {key}={expected_value}, got {actual_value}" + ) + elif callable(expected_value): + # Allow callable predicates for complex validation + assert expected_value(actual_value), ( + f"Predicate failed for {key}={actual_value}" + ) + else: + assert actual_value == expected_value, ( + f"Expected {key}={expected_value}, got {actual_value}" + ) + + +def cleanup_report_file(report_path: Path) -> None: + """ + Clean up the report file if it exists. + + :param report_path: Path to the report file to remove + """ + if report_path.exists(): + report_path.unlink() diff --git a/tests/e2e/vllm-sim.Dockerfile b/tests/e2e/vllm-sim.Dockerfile new file mode 100644 index 00000000..63be0fbd --- /dev/null +++ b/tests/e2e/vllm-sim.Dockerfile @@ -0,0 +1,15 @@ +FROM golang AS base + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y libzmq3-dev pkg-config && \ + git clone https://github.com/llm-d/llm-d-inference-sim.git && \ + cd llm-d-inference-sim && \ + git checkout v0.3.0 && \ + make build + +WORKDIR /app/llm-d-inference-sim + +FROM scratch +COPY --from=base /app/llm-d-inference-sim/bin /bin diff --git a/tests/e2e/vllm_sim_server.py b/tests/e2e/vllm_sim_server.py new file mode 100644 index 00000000..726dba40 --- /dev/null +++ b/tests/e2e/vllm_sim_server.py @@ -0,0 +1,136 @@ +import subprocess +import time +from pathlib import Path +from typing import Optional + +import pytest +import requests +from loguru import logger + + +class VllmSimServer: + """ + [vLLM simulator](https://llm-d.ai/docs/architecture/Components/inf-simulator) + A vLLM simulator wrapper for pytest. + """ + + def __init__( + self, + port: int, + model: str, + lora: Optional[list[str]] = None, + mode: Optional[str] = None, + echo: Optional[bool] = None, + random: Optional[bool] = None, + time_to_first_token: Optional[float] = None, + inter_token_latency: Optional[float] = None, + max_loras: Optional[int] = None, + max_cpu_loras: Optional[int] = None, + max_num_seqs: Optional[int] = None, + ): + self.port = port + self.model = model + self.lora = lora + self.mode = mode + self.echo = echo + self.random = random + self.time_to_first_token = time_to_first_token + self.inter_token_latency = inter_token_latency + self.max_loras = max_loras + self.max_cpu_loras = max_cpu_loras + self.max_num_seqs = max_num_seqs + self.server_url = f"http://127.0.0.1:{self.port}" + self.health_url = f"{self.server_url}/health" + self.app_script = "./bin/llm-d-inference-sim" + self.process: Optional[subprocess.Popen] = None + if not Path(self.app_script).exists(): + message = ( + "The vLLM simulator binary is required for E2E tests, but is missing.\n" + "To build it and enable E2E tests, please run:\n" + "docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./" + ) + logger.warning(message) + pytest.skip("vLLM simlator binary missing", allow_module_level=True) + + def get_cli_parameters(self) -> list[str]: + parameters = ["--port", f"{self.port}", "--model", self.model] + if self.lora is not None: + parameters.extend(["--lora", ",".join(self.lora)]) + if self.mode is not None: + parameters.extend(["--mode", self.mode]) + if self.echo is not None: + parameters.extend(["--echo"]) + if self.random is not None: + parameters.extend(["--random"]) + if self.time_to_first_token is not None: + parameters.extend(["--time-to-first-token", f"{self.time_to_first_token}"]) + if self.inter_token_latency is not None: + parameters.extend(["--inter-token-latency", f"{self.inter_token_latency}"]) + if self.max_loras is not None: + parameters.extend(["--max-loras", f"{self.max_loras}"]) + if self.max_cpu_loras is not None: + parameters.extend(["--max-cpu-loras", f"{self.max_cpu_loras}"]) + if self.max_num_seqs is not None: + parameters.extend(["--max-num-seqs", f"{self.max_num_seqs}"]) + return parameters + + def start(self): + """ + Starts the server process and waits for it to become healthy. + """ + + logger.info(f"Starting server on {self.server_url} using {self.app_script}...") + cli_parameters = self.get_cli_parameters() + command = " ".join([self.app_script, *cli_parameters]) + logger.info(f"Server command: {command}") + self.process = subprocess.Popen( # noqa: S603 + [self.app_script, *cli_parameters], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, # Decode stdout/stderr as text + ) + + # Wait for the server to start and become healthy + max_retries = 20 + retry_delay_sec = 0.5 + for i in range(max_retries): + try: + response = requests.get(self.health_url, timeout=1) + if response.status_code == 200: + logger.info(f"Server started successfully at {self.server_url}") + return + else: + logger.warning(f"Got response with status: {response.status_code}") + logger.warning(response.json()) + except requests.ConnectionError: + logger.warning(f"Waiting for server... (attempt {i + 1}/{max_retries})") + time.sleep(retry_delay_sec) + # If the loop completes without breaking, the server didn't start + stdout, stderr = self.process.communicate() + logger.error(f"Server failed to start after {max_retries} retries.") + logger.error(f"Server stdout:\n{stdout}") + logger.error(f"Server stderr:\n{stderr}") + self.stop() # Attempt to clean up + pytest.fail("Server did not start within the expected time.") + + def stop(self): + """ + Stops the server process. + """ + if self.process: + logger.info(f"Stopping server on {self.server_url}...") + self.process.terminate() # Send SIGTERM + try: + self.process.wait(timeout=1) # Wait for the process to terminate + logger.info("Server stopped successfully.") + except subprocess.TimeoutExpired: + logger.warning("Server did not terminate gracefully, killing it...") + self.process.kill() # Send SIGKILL if it doesn't terminate + self.process.wait() + self.process = None # Clear the process reference + + def get_url(self): + """ + Returns the base URL of the running server. + """ + return self.server_url diff --git a/tests/integration/scheduler/test_scheduler.py b/tests/integration/scheduler/test_scheduler.py index edff9e8f..8a4dd94b 100644 --- a/tests/integration/scheduler/test_scheduler.py +++ b/tests/integration/scheduler/test_scheduler.py @@ -88,7 +88,9 @@ async def resolve(self, request: MockRequest, request_info, request_history): ): raise RuntimeError(f"mock_error_for_{request.payload}") - yield f"response_for_{request.payload}" + # TODO: Review Cursor generated code (start) + yield f"response_for_{request.payload}", request_info + # TODO: Review Cursor generated code (end) @pytest.mark.smoke diff --git a/tests/integration/scheduler/test_worker_group.py b/tests/integration/scheduler/test_worker_group.py index 4c39f36d..7fddf8cd 100644 --- a/tests/integration/scheduler/test_worker_group.py +++ b/tests/integration/scheduler/test_worker_group.py @@ -22,11 +22,13 @@ AsyncPoissonStrategy, BackendInterface, ConcurrentStrategy, - MaxDurationConstraintInitializer, - MaxErrorRateConstraintInitializer, - MaxErrorsConstraintInitializer, - MaxGlobalErrorRateConstraintInitializer, - MaxNumberConstraintInitializer, + # TODO: Review Cursor generated code (start) + MaxDurationConstraint, + MaxErrorRateConstraint, + MaxErrorsConstraint, + MaxGlobalErrorRateConstraint, + MaxNumberConstraint, + # TODO: Review Cursor generated code (end) MeasuredRequestTimings, SynchronousStrategy, ThroughputStrategy, @@ -98,7 +100,9 @@ async def resolve(self, request, request_info, request_history): ): raise RuntimeError("Mock error for testing") - yield f"response_for_{request}" + # TODO: Review Cursor generated code (start) + yield f"response_for_{request}", request_info + # TODO: Review Cursor generated code (end) class TestWorkerGroup: @@ -118,12 +122,16 @@ class TestWorkerGroup: @pytest.mark.parametrize( "constraints_inits", [ - {"max_num": MaxNumberConstraintInitializer(max_num=100)}, - {"max_duration": MaxDurationConstraintInitializer(max_duration=0.5)}, - {"max_errors": MaxErrorsConstraintInitializer(max_errors=20)}, - {"max_error_rate": MaxErrorRateConstraintInitializer(max_error_rate=0.1)}, + # TODO: Review Cursor generated code (start) + {"max_num": MaxNumberConstraint(max_num=100)}, + {"max_duration": MaxDurationConstraint(max_duration=0.5)}, + {"max_errors": MaxErrorsConstraint(max_errors=20)}, + {"max_error_rate": MaxErrorRateConstraint(max_error_rate=0.1)}, + # TODO: Review Cursor generated code (end) { - "max_global_error_rate": MaxGlobalErrorRateConstraintInitializer( + # TODO: Review Cursor generated code (start) + "max_global_error_rate": MaxGlobalErrorRateConstraint( + # TODO: Review Cursor generated code (end) max_error_rate=0.1 ) }, diff --git a/tests/unit/benchmark/test_output.py b/tests/unit/benchmark/test_output.py index 9076834b..0090560d 100644 --- a/tests/unit/benchmark/test_output.py +++ b/tests/unit/benchmark/test_output.py @@ -10,7 +10,11 @@ from guidellm.benchmark import ( GenerativeBenchmarksReport, ) -from guidellm.benchmark.output import GenerativeBenchmarksConsole + +# TODO: Review Cursor generated code (start) +from guidellm.benchmark.output import GenerativeBenchmarkerConsole + +# TODO: Review Cursor generated code (end) from tests.unit.mock_benchmark import mock_generative_benchmark @@ -100,7 +104,9 @@ def test_file_csv(): def test_console_benchmarks_profile_str(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] assert ( @@ -109,7 +115,9 @@ def test_console_benchmarks_profile_str(): def test_console_benchmarks_args_str(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] assert console.benchmarks_args_str == ( @@ -119,14 +127,18 @@ def test_console_benchmarks_args_str(): def test_console_benchmarks_worker_desc_str(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] assert console.benchmarks_worker_desc_str == str(mock_benchmark.worker) def test_console_benchmarks_request_loader_desc_str(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] assert console.benchmarks_request_loader_desc_str == str( @@ -135,35 +147,45 @@ def test_console_benchmarks_request_loader_desc_str(): def test_console_benchmarks_extras_str(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] assert console.benchmarks_extras_str == "None" def test_console_print_section_header(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) with patch.object(console.console, "print") as mock_print: console.print_section_header("Test Header") mock_print.assert_called_once() def test_console_print_labeled_line(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) with patch.object(console.console, "print") as mock_print: console.print_labeled_line("Label", "Value") mock_print.assert_called_once() def test_console_print_line(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) with patch.object(console.console, "print") as mock_print: console.print_line("Test Line") mock_print.assert_called_once() def test_console_print_table(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) headers = ["Header1", "Header2"] rows = [["Row1Col1", "Row1Col2"], ["Row2Col1", "Row2Col2"]] with ( @@ -178,7 +200,9 @@ def test_console_print_table(): def test_console_print_benchmarks_metadata(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] with ( @@ -191,7 +215,9 @@ def test_console_print_benchmarks_metadata(): def test_console_print_benchmarks_info(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] with patch.object(console, "print_table") as mock_table: @@ -200,7 +226,9 @@ def test_console_print_benchmarks_info(): def test_console_print_benchmarks_stats(): - console = GenerativeBenchmarksConsole(enabled=True) + # TODO: Review Cursor generated code (start) + console = GenerativeBenchmarkerConsole() + # TODO: Review Cursor generated code (end) mock_benchmark = mock_generative_benchmark() console.benchmarks = [mock_benchmark] with patch.object(console, "print_table") as mock_table: diff --git a/tests/unit/mock_benchmark.py b/tests/unit/mock_benchmark.py index 511bacbf..27231974 100644 --- a/tests/unit/mock_benchmark.py +++ b/tests/unit/mock_benchmark.py @@ -1,271 +1,192 @@ +# TODO: Review Cursor generated code (start) +"""Mock benchmark objects for unit testing.""" +# TODO: Review Cursor generated code (end) + +# TODO: Review Cursor generated code (start) +from guidellm.backend import GenerationRequestTimings + +# TODO: Review Cursor generated code (end) from guidellm.benchmark import ( - BenchmarkArgs, BenchmarkSchedulerStats, GenerativeBenchmark, + # TODO: Review Cursor generated code (start) + GenerativeMetrics, + # TODO: Review Cursor generated code (end) GenerativeRequestStats, - GenerativeTextErrorStats, - SynchronousProfile, ) -from guidellm.objects import StatusBreakdown -from guidellm.request import GenerativeRequestLoaderDescription -from guidellm.scheduler import ( - GenerativeRequestsWorkerDescription, - SchedulerRequestInfo, - SynchronousStrategy, + +# TODO: Review Cursor generated code (start) +from guidellm.benchmark.objects import BenchmarkerDict, SchedulerDict +from guidellm.benchmark.profile import SynchronousProfile +from guidellm.scheduler import ScheduledRequestInfo, SchedulerState, SynchronousStrategy +from guidellm.utils import ( + DistributionSummary, + Percentiles, + StandardBaseDict, + StatusBreakdown, + StatusDistributionSummary, ) +# TODO: Review Cursor generated code (end) + __all__ = ["mock_generative_benchmark"] +# TODO: Review Cursor generated code (start) +def _create_mock_percentiles() -> Percentiles: + """Create mock percentiles for testing.""" + return Percentiles( + p001=0.1, + p01=1.0, + p05=5.0, + p10=10.0, + p25=25.0, + p50=50.0, + p75=75.0, + p90=90.0, + p95=95.0, + p99=99.0, + p999=99.9, + ) + + +# TODO: Review Cursor generated code (end) + + +# TODO: Review Cursor generated code (start) +def _create_mock_distribution() -> DistributionSummary: + """Create mock distribution summary for testing.""" + return DistributionSummary( + mean=50.0, + median=50.0, + mode=50.0, + variance=10.0, + std_dev=3.16, + min=10.0, + max=100.0, + count=100, + total_sum=5000.0, + percentiles=_create_mock_percentiles(), + ) + + +# TODO: Review Cursor generated code (end) + + +# TODO: Review Cursor generated code (start) +def _create_status_dist() -> StatusDistributionSummary: + """Create mock status distribution summary for testing.""" + dist = _create_mock_distribution() + return StatusDistributionSummary( + successful=dist, + incomplete=dist, + errored=dist, + total=dist, + ) + + +# TODO: Review Cursor generated code (end) + + def mock_generative_benchmark() -> GenerativeBenchmark: - return GenerativeBenchmark.from_stats( - run_id="fa4a92c1-9a1d-4c83-b237-83fcc7971bd3", - successful=[ - GenerativeRequestStats( - request_id="181a63e2-dc26-4268-9cfc-2ed9279aae63", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728125.203447, - queued_time=1744728125.204123, - dequeued_time=1744728125.2048807, - scheduled_time=1744728125.2048993, - worker_start=1744728125.2049701, - request_start=1744728125.2052872, - request_end=1744728126.7004411, - worker_end=1744728126.701175, - process_id=0, - ), - prompt="such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner", # noqa: E501 - output=", as to make their long life together very comfortable and very useful. I feel, if they and the honourable Mr. Thorpe, who still lives amongst us, should be all I need, I could perfectly rest happy. Writes to meet them in that kind of obedience which is necessary and honourable, and such", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728125.2052872, - end_time=1744728126.7004411, - first_token_time=1744728125.2473357, - last_token_time=1744728126.699908, - ), - GenerativeRequestStats( - request_id="8a7846d5-7624-420d-a269-831e568a848f", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728125.204613, - queued_time=1744728125.2047558, - dequeued_time=1744728126.7025175, - scheduled_time=1744728126.7025256, - worker_start=1744728126.702579, - request_start=1744728126.7027814, - request_end=1744728128.1961868, - worker_end=1744728128.196895, - process_id=0, - ), - prompt="a reconciliation; and, after a little further resistance on the part of his aunt, her resentment gave way, either to her affection for him, or her curiosity to see how his wife conducted herself; and she condescended to wait on them at Pemberley, in spite of that pollution which its woods had received, not merely from the presence of such a mistress, but the visits of her uncle and aunt from the city. With the Gardiners they were always on the most intimate terms. Darcy, as well as Elizabeth, really loved them; and they were both ever sensible of the warmest gratitude towards the persons who,", # noqa: E501 - output=" in their own days of poverty, had been so hotel and hospitable to a young couple leaving Pemberley. Till the size of Mr. Bennet\u2019s salary had been altered, the blessing of their friendship was much more greatly needed by the family than it appeared after that event.\n- Mr. Darcy soon deserved", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728126.7027814, - end_time=1744728128.1961868, - first_token_time=1744728126.7526379, - last_token_time=1744728128.1956792, - ), - GenerativeRequestStats( - request_id="4cde0e6c-4531-4e59-aac1-07bc8b6e4139", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728126.7031465, - queued_time=1744728126.7034643, - dequeued_time=1744728128.198447, - scheduled_time=1744728128.1984534, - worker_start=1744728128.198509, - request_start=1744728128.1986883, - request_end=1744728129.6919055, - worker_end=1744728129.692606, - process_id=0, - ), - prompt="struck her, that _she_ was selected from among her sisters as worthy of being the mistress of Hunsford Parsonage, and of assisting to form a quadrille table at Rosings, in the absence of more eligible visitors. The idea soon reached to conviction, as she observed his increasing civilities towards herself, and heard his frequent attempt at a compliment on her wit and vivacity; and though more astonished than gratified herself by this effect of her charms, it was not long before her mother gave her to understand that the probability of their marriage was exceedingly agreeable to _her_. Elizabeth, however, did not choose", # noqa: E501 - output=" to improve this conversation into a prophecy, and her mother would hardly take on herself to announce so important a phenomenon. At last he was to drive to Hunsford from Meryton on Sunday; they staid for an hour at eight o'clock, and the following day appeared to be hung up on the walls of", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728128.1986883, - end_time=1744728129.6919055, - first_token_time=1744728128.2481627, - last_token_time=1744728129.6914039, - ), - GenerativeRequestStats( - request_id="a95b96be-05d4-4130-b0dd-9528c01c9909", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728128.1987216, - queued_time=1744728128.1991177, - dequeued_time=1744728129.6953137, - scheduled_time=1744728129.695318, - worker_start=1744728129.695379, - request_start=1744728129.6955585, - request_end=1744728131.187553, - worker_end=1744728131.188169, - process_id=0, - ), - prompt="were comfortable on this subject. Day after day passed away without bringing any other tidings of him than the report which shortly prevailed in Meryton of his coming no more to Netherfield the whole winter; a report which highly incensed Mrs. Bennet, and which she never failed to contradict as a most scandalous falsehood. Even Elizabeth began to fear not that Bingley was indifferent but that his sisters would be successful in keeping him away. Unwilling as she was to admit an idea so destructive to Jane s happiness, and so dishonourable to the stability of her lover, she could not prevent its frequently recurring", # noqa: E501 - output=" during these indefinite disputes; and was often seriously engaged in blaming her sisters for increasing a suspense which might only be caused by their own inattention to a subject of so much moment. Whether she had really made that impression on the s+.ayers, or whether she had merely imagined it, she could decide no farther, for", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728129.6955585, - end_time=1744728131.187553, - first_token_time=1744728129.7438853, - last_token_time=1744728131.187019, - ), - GenerativeRequestStats( - request_id="714b751c-bbfe-4b2a-a0af-7c1bf2c224ae", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728129.6975086, - queued_time=1744728129.6978767, - dequeued_time=1744728131.190093, - scheduled_time=1744728131.190101, - worker_start=1744728131.1901798, - request_start=1744728131.1904676, - request_end=1744728132.6833503, - worker_end=1744728132.6839745, - process_id=0, - ), - prompt="? cried Elizabeth, brightening up for a moment. Upon my word, said Mrs. Gardiner, I begin to be of your uncle s opinion. It is really too great a violation of decency, honour, and interest, for him to be guilty of it. I cannot think so very ill of Wickham. Can you, yourself, Lizzie, so wholly give him up, as to believe him capable of it? Not perhaps of neglecting his own interest. But of every other neglect I can believe him capable. If, indeed, it should be so! But I dare not hope it. Why should they not go on", # noqa: E501 - output=" together? This is still a motive incapable of being denied. He has such a faculty of pleasing, and you know how much she likes him. \nQuestion: What made elder sisters the center of their families?\nSometimes early this would be discussed in the family circle, but that was a very exceptional treatment.\nThank you,", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728131.1904676, - end_time=1744728132.6833503, - first_token_time=1744728131.2394557, - last_token_time=1744728132.6828275, - ), - GenerativeRequestStats( - request_id="ef73ae8a-4c8f-4c88-b303-cfff152ce378", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=True, - errored=False, - canceled=False, - targeted_start_time=1744728131.1891043, - queued_time=1744728131.1893764, - dequeued_time=1744728132.6859632, - scheduled_time=1744728132.6859682, - worker_start=1744728132.6860242, - request_start=1744728132.6862206, - request_end=1744728134.1805167, - worker_end=1744728134.1813161, - process_id=0, - ), - prompt="was. But her commendation, though costing her some trouble, could by no means satisfy Mr. Collins, and he was very soon obliged to take her Ladyship s praise into his own hands. Sir William stayed only a week at Hunsford; but his visit was long enough to convince him of his daughter s being most comfortably settled, and of her possessing such a husband and such a neighbour as were not often met with. While Sir William was with them, Mr. Collins devoted his mornings to driving him out in his gig, and showing him the country but when he went away, the whole family returned to their usual employments", # noqa: E501 - output=", and the sides of the family in which he was more particularly interested, to their respective places in the establishment. Here Jane was occasionally up as a substitute to her indolent sister, in her matron s stead, but was more frequently left idle, and with her hours of quietness, the unwelcome intrusion", # noqa: E501 - prompt_tokens=128, - output_tokens=64, - start_time=1744728132.6862206, - end_time=1744728134.1805167, - first_token_time=1744728132.7354612, - last_token_time=1744728134.1797993, - ), - ], - errored=[], - incomplete=[ - GenerativeTextErrorStats( - request_id="1b3def04-ca81-4f59-a56c-452a069d91af", - request_type="text_completions", - scheduler_info=SchedulerRequestInfo( - requested=True, - completed=False, - errored=True, - canceled=True, - targeted_start_time=1744728132.686177, - queued_time=1744728132.6866345, - dequeued_time=1744728134.1831052, - scheduled_time=1744728134.1831107, - worker_start=1744728134.183183, - request_start=1744728134.183544, - request_end=1744728135.2031732, - worker_end=1744728135.2033112, - process_id=0, - ), - prompt="is to tempt anyone to our humble abode. Our plain manner of living, our small rooms, and few domestics, and the little we see of the world, must make Hunsford extremely dull to a young lady like yourself; but I hope you will believe us grateful for the condescension, and that we have done everything in our power to prevent you spending your time unpleasantly. Elizabeth was eager with her thanks and assurances of happiness. She had spent six weeks with great enjoyment; and the pleasure of being with Charlotte, and the kind attention she had received, must make _her_ feel the obliged. Mr. Collins", # noqa: E501 - output=", who certainly had an eye to Elizabeth's manner, was glad _he was not to lose the curiosity she had given, and requested her away_ , _for the politeness of her conciliating manner would", # noqa: E501 - prompt_tokens=128, - output_tokens=43, - start_time=1744728134.183544, - end_time=1744728135.2031732, - first_token_time=1744728134.2323751, - last_token_time=1744728135.1950455, - error="TimeoutError: The request timed out before completing.", - ) - ], - args=BenchmarkArgs( - profile=SynchronousProfile(), - strategy_index=0, + # TODO: Review Cursor generated code (start) + """Create a minimal mock GenerativeBenchmark for testing purposes.""" + return GenerativeBenchmark( + run_id="test-run-gen", + run_index=0, + scheduler=SchedulerDict( + # TODO: Review Cursor generated code (end) strategy=SynchronousStrategy(), - max_number=None, - max_duration=10.0, - warmup_number=None, - warmup_duration=None, - cooldown_number=None, - cooldown_duration=None, + # TODO: Review Cursor generated code (start) + constraints={}, + state=SchedulerState(node_id=0, num_processes=1), ), + benchmarker=BenchmarkerDict( + profile=SynchronousProfile.create("synchronous", rate=None), + requests={}, + backend={}, + environment={}, + aggregators={}, + # TODO: Review Cursor generated code (end) + ), + # TODO: Review Cursor generated code (start) + env_args=StandardBaseDict(), + extras=StandardBaseDict(), + # TODO: Review Cursor generated code (end) run_stats=BenchmarkSchedulerStats( - start_time=1744728125.0772898, - end_time=1744728135.8407037, + # TODO: Review Cursor generated code (start) + start_time=1, + end_time=2, + # TODO: Review Cursor generated code (end) requests_made=StatusBreakdown( - successful=6, + # TODO: Review Cursor generated code (start) + successful=1, + incomplete=0, errored=0, - incomplete=1, - total=7, + total=1, + # TODO: Review Cursor generated code (end) ), - queued_time_avg=1.2821388585226876, - scheduled_time_delay_avg=7.96999250139509e-6, - scheduled_time_sleep_avg=0.0, - worker_start_delay_avg=6.399835859026228e-5, - worker_time_avg=1.4266603674207414, - worker_start_time_targeted_delay_avg=1.2825865745544434, - request_start_time_delay_avg=0.6414163964135307, - request_start_time_targeted_delay_avg=1.2827096836907523, - request_time_delay_avg=0.0004316908972603934, - request_time_avg=1.426228676523481, + # TODO: Review Cursor generated code (start) + queued_time_avg=0.1, + worker_resolve_start_delay_avg=0.1, + worker_resolve_time_avg=0.1, + worker_resolve_end_delay_avg=0.1, + finalized_delay_avg=0.1, + worker_targeted_start_delay_avg=0.1, + request_start_delay_avg=0.1, + request_time_avg=0.1, + request_targeted_delay_avg=0.1, + # TODO: Review Cursor generated code (end) ), - worker=GenerativeRequestsWorkerDescription( - backend_type="openai_http", - backend_target="http://localhost:8000", - backend_model="neuralmagic/Qwen2.5-7B-quantized.w8a8", - backend_info={ - "max_output_tokens": 16384, - "timeout": 300, - "http2": True, - "authorization": False, - "organization": None, - "project": None, - "text_completions_path": "/v1/completions", - "chat_completions_path": "/v1/chat/completions", - }, + # TODO: Review Cursor generated code (start) + start_time=1000.0, + end_time=2000.0, + metrics=GenerativeMetrics( + requests_per_second=_create_status_dist(), + request_concurrency=_create_status_dist(), + request_latency=_create_status_dist(), + prompt_token_count=_create_status_dist(), + output_token_count=_create_status_dist(), + total_token_count=_create_status_dist(), + time_to_first_token_ms=_create_status_dist(), + time_per_output_token_ms=_create_status_dist(), + inter_token_latency_ms=_create_status_dist(), + output_tokens_per_second=_create_status_dist(), + tokens_per_second=_create_status_dist(), + # TODO: Review Cursor generated code (end) ), - requests_loader=GenerativeRequestLoaderDescription( - data='{"prompt_tokens": 128, "output_tokens": 64}', - data_args=None, - processor="neuralmagic/Qwen2.5-7B-quantized.w8a8", - processor_args=None, + # TODO: Review Cursor generated code (start) + request_totals=StatusBreakdown( + successful=1, + incomplete=0, + errored=0, + total=1, + # TODO: Review Cursor generated code (end) ), - extras={}, - ) + # TODO: Review Cursor generated code (start) + requests=StatusBreakdown( + successful=[ + GenerativeRequestStats( + scheduler_info=ScheduledRequestInfo( + request_timings=GenerationRequestTimings( + request_start=1, + first_iteration=2, + last_iteration=6, + request_end=6, + ) + ), + request_id="a", + request_type="text_completions", + prompt="p", + request_args={}, + output="o", + iterations=1, + prompt_tokens=1, + output_tokens=2, + ) + ], + incomplete=[], + errored=[], + total=None, + ), + ) # TODO: Review Cursor generated code (end) diff --git a/tests/unit/objects/test_pydantic.py b/tests/unit/objects/test_pydantic.py index b6c19a9a..84252601 100644 --- a/tests/unit/objects/test_pydantic.py +++ b/tests/unit/objects/test_pydantic.py @@ -1,7 +1,10 @@ import pytest from pydantic import computed_field -from guidellm.utils.pydantic import StandardBaseModel +# TODO: Review Cursor generated code (start) +from guidellm.utils.pydantic_utils import StandardBaseModel + +# TODO: Review Cursor generated code (end) class ExampleModel(StandardBaseModel): diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/objects/test_statistics.py index fa8cccd0..2eaac4d6 100644 --- a/tests/unit/objects/test_statistics.py +++ b/tests/unit/objects/test_statistics.py @@ -5,7 +5,9 @@ import numpy as np import pytest -from guidellm.objects import ( +# TODO: Review Cursor generated code (start) +from guidellm.utils import ( + # TODO: Review Cursor generated code (end) DistributionSummary, Percentiles, RunningStats, diff --git a/tox.ini b/tox.ini index 08fc27b9..4e2fde9f 100644 --- a/tox.ini +++ b/tox.ini @@ -35,6 +35,14 @@ commands = python -m pytest tests/e2e {posargs} +[testenv:test-paths] +description = Run provided paths tests +deps = + .[dev] +commands = + python -m pytest {posargs} + + [testenv:quality] description = Run all quality checks deps =