@@ -114,16 +114,26 @@ def __init__(
114114 if config :
115115 self .config = config
116116 else :
117+ # Handle Anthropic API limitation: don't set top_p for Claude when temperature > 0
118+ config_kwargs = kwargs .copy ()
119+ if "claude" in model .lower () and temperature > 0 :
120+ # Remove top_p from kwargs to avoid the Anthropic API error
121+ config_kwargs .pop ("top_p" , None )
122+
117123 self .config = LiteLLMConfig (
118124 model = model ,
119125 api_key = api_key ,
120126 api_base = api_base ,
121127 temperature = temperature ,
122128 max_tokens = max_tokens ,
123129 fallbacks = fallbacks ,
124- ** kwargs ,
130+ ** config_kwargs ,
125131 )
126132
133+ # Set top_p to None for Claude models with temperature > 0
134+ if "claude" in model .lower () and temperature > 0 :
135+ self .config .top_p = None
136+
127137 # Set up API keys from environment if not provided
128138 self ._setup_api_keys ()
129139
@@ -217,16 +227,19 @@ def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
217227 "messages" : messages ,
218228 "temperature" : kwargs .get ("temperature" , self .config .temperature ),
219229 "max_tokens" : kwargs .get ("max_tokens" , self .config .max_tokens ),
220- "top_p" : kwargs .get ("top_p" , self .config .top_p ),
221230 "timeout" : kwargs .get ("timeout" , self .config .timeout ),
222231 "num_retries" : kwargs .get ("num_retries" , self .config .max_retries ),
223232 "drop_params" : True , # Automatically drop unsupported parameters
224233 }
225234
226- # Work around LiteLLM bug: explicitly drop top_p for Claude when temperature is set
227- # This can be removed once LiteLLM properly handles this with drop_params
235+ # Only add top_p if it's not None (to avoid Anthropic API limitation)
236+ top_p_value = kwargs .get ("top_p" , self .config .top_p )
237+ if top_p_value is not None :
238+ call_params ["top_p" ] = top_p_value
239+
240+ # Work around Anthropic API limitation: cannot specify both temperature and top_p
228241 if "claude" in self .config .model .lower () and call_params ["temperature" ] > 0 :
229- call_params [ "additional_drop_params" ] = [ " top_p"]
242+ call_params . pop ( " top_p", None )
230243
231244 # Add API key if available
232245 if self .config .api_key :
@@ -236,11 +249,17 @@ def complete(self, prompt: str, **kwargs: Any) -> LLMResponse:
236249
237250 # Filter out ACE-specific parameters and add remaining kwargs
238251 ace_specific_params = {"refinement_round" , "max_refinement_rounds" , "stream_thinking" }
252+
253+ # Also exclude top_p for Claude models with temperature > 0
254+ excluded_params = ace_specific_params .copy ()
255+ if "claude" in self .config .model .lower () and call_params ["temperature" ] > 0 :
256+ excluded_params .add ("top_p" )
257+
239258 call_params .update (
240259 {
241260 k : v
242261 for k , v in kwargs .items ()
243- if k not in call_params and k not in ace_specific_params
262+ if k not in call_params and k not in excluded_params
244263 }
245264 )
246265
@@ -292,16 +311,19 @@ async def acomplete(self, prompt: str, **kwargs: Any) -> LLMResponse:
292311 "messages" : messages ,
293312 "temperature" : kwargs .get ("temperature" , self .config .temperature ),
294313 "max_tokens" : kwargs .get ("max_tokens" , self .config .max_tokens ),
295- "top_p" : kwargs .get ("top_p" , self .config .top_p ),
296314 "timeout" : kwargs .get ("timeout" , self .config .timeout ),
297315 "num_retries" : kwargs .get ("num_retries" , self .config .max_retries ),
298316 "drop_params" : True , # Automatically drop unsupported parameters
299317 }
300318
301- # Work around LiteLLM bug: explicitly drop top_p for Claude when temperature is set
302- # This can be removed once LiteLLM properly handles this with drop_params
319+ # Only add top_p if it's not None (to avoid Anthropic API limitation)
320+ top_p_value = kwargs .get ("top_p" , self .config .top_p )
321+ if top_p_value is not None :
322+ call_params ["top_p" ] = top_p_value
323+
324+ # Work around Anthropic API limitation: cannot specify both temperature and top_p
303325 if "claude" in self .config .model .lower () and call_params ["temperature" ] > 0 :
304- call_params [ "additional_drop_params" ] = [ " top_p"]
326+ call_params . pop ( " top_p", None )
305327
306328 # Add API key if available
307329 if self .config .api_key :
@@ -311,11 +333,17 @@ async def acomplete(self, prompt: str, **kwargs: Any) -> LLMResponse:
311333
312334 # Filter out ACE-specific parameters and add remaining kwargs
313335 ace_specific_params = {"refinement_round" , "max_refinement_rounds" , "stream_thinking" }
336+
337+ # Also exclude top_p for Claude models with temperature > 0
338+ excluded_params = ace_specific_params .copy ()
339+ if "claude" in self .config .model .lower () and call_params ["temperature" ] > 0 :
340+ excluded_params .add ("top_p" )
341+
314342 call_params .update (
315343 {
316344 k : v
317345 for k , v in kwargs .items ()
318- if k not in call_params and k not in ace_specific_params
346+ if k not in call_params and k not in excluded_params
319347 }
320348 )
321349
0 commit comments