@@ -250,55 +250,83 @@ class ShapeRecommendationReport(BaseModel):
250
250
@classmethod
251
251
def from_deployment_config (cls , deployment_config : AquaDeploymentConfig , model_name : str , valid_shapes : List [ComputeShapeSummary ]) -> "ShapeRecommendationReport" :
252
252
"""
253
- For service models, pre-set deployment configurations (AquaDeploymentConfig) are available.
254
- Derives ShapeRecommendationReport from AquaDeploymentConfig (if service model & available)
253
+ Creates a ShapeRecommendationReport from an AquaDeploymentConfig, extracting recommended
254
+ model configurations for each valid compute shape.
255
+
256
+ Parameters
257
+ ----------
258
+ deployment_config : AquaDeploymentConfig
259
+ The object containing per-shape deployment configurations.
260
+ model_name : str
261
+ The name of the model for which to generate recommendations.
262
+ valid_shapes : list of ComputeShapeSummary
263
+ List of compute shapes to evaluate and recommend deployment configurations for.
264
+
265
+ Returns
266
+ -------
267
+ ShapeRecommendationReport
268
+ Report containing recommendations for each valid compute shape.
269
+
270
+ Notes
271
+ -----
272
+ For service models, this method interprets pre-set deployment configurations to derive
273
+ recommendations for each allowed compute shape, including environment variables, quantization,
274
+ and maximum model length parameters.
255
275
"""
256
276
257
277
recs = []
258
- # may need to sort?
259
278
for shape in valid_shapes :
260
279
current_config = deployment_config .configuration .get (shape .name )
261
- if current_config :
262
- quantization = None
263
- max_model_len = None
264
- recommendation = ""
265
- current_params = current_config .parameters .get (VLLM_PARAMS_KEY )
266
- current_env = current_config .env .get (VLLM_ENV_KEY )
280
+ if not current_config :
281
+ continue
267
282
268
- if current_params :
269
- param_list = current_params .split ()
283
+ quantization = None
284
+ max_model_len = None
285
+ recommendation = ""
286
+ current_params = current_config .parameters .get (VLLM_PARAMS_KEY )
287
+ current_env = current_config .env .get (VLLM_ENV_KEY )
270
288
271
- if QUANT_FLAG in param_list and (idx := param_list .index (QUANT_FLAG )) + 1 < len (param_list ):
289
+ if current_params :
290
+ param_list = current_params .split ()
291
+
292
+ if QUANT_FLAG in param_list :
293
+ idx = param_list .index (QUANT_FLAG )
294
+ if idx + 1 < len (param_list ):
272
295
quantization = param_list [idx + 1 ]
273
296
274
- if MAX_MODEL_LEN_FLAG in param_list and (idx := param_list .index (MAX_MODEL_LEN_FLAG )) + 1 < len (param_list ):
275
- max_model_len = param_list [idx + 1 ]
276
- max_model_len = int (max_model_len )
297
+ if MAX_MODEL_LEN_FLAG in param_list :
298
+ idx = param_list .index (MAX_MODEL_LEN_FLAG )
299
+ if idx + 1 < len (param_list ):
300
+ try :
301
+ max_model_len = int (param_list [idx + 1 ])
302
+ except ValueError :
303
+ max_model_len = None
277
304
278
- if current_env :
279
- recommendation += f"ENV: { json .dumps (current_env )} \n \n "
305
+ if current_env :
306
+ recommendation += f"ENV: { json .dumps (current_env )} \n \n "
280
307
281
- recommendation += "Model fits well within the allowed compute shape."
308
+ if not current_params and not current_env : # model works with default params and no extra env variables
309
+ recommendation += "No override PARAMS and ENV variables needed. \n \n "
282
310
283
- deployment_params = DeploymentParams (
284
- quantization = quantization if quantization else DEFAULT_WEIGHT_SIZE ,
285
- max_model_len = max_model_len ,
286
- params = current_params if current_params else "" ,
287
- )
311
+ recommendation += "Model fits well within the allowed compute shape."
288
312
289
- # TODO: calculate memory footprint based on params??
290
- # TODO: add --env vars not just params, current_config.env
291
- # are there multiple configurations in the SMM configs per shape??
292
- configuration = [ModelConfig (
293
- deployment_params = deployment_params ,
294
- recommendation = recommendation ,
295
- )]
296
-
297
- recs .append (ShapeReport (
298
- shape_details = shape ,
299
- configurations = configuration
300
- )
301
- )
313
+ deployment_params = DeploymentParams (
314
+ quantization = quantization if quantization else DEFAULT_WEIGHT_SIZE ,
315
+ max_model_len = max_model_len ,
316
+ params = current_params if current_params else "" ,
317
+ )
318
+
319
+ # need to adjust for multiple configs per shape
320
+ configuration = [ModelConfig (
321
+ deployment_params = deployment_params ,
322
+ recommendation = recommendation ,
323
+ )]
324
+
325
+ recs .append (ShapeReport (
326
+ shape_details = shape ,
327
+ configurations = configuration
328
+ )
329
+ )
302
330
303
331
return ShapeRecommendationReport (
304
332
display_name = model_name ,
0 commit comments