Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ print(pred.completion_text) # "2 + 2 equals 4."
pred = lm.predict(LmPrompt(
"What is 2+6?",
max_tokens=10,
temperature=0, # Set this to 0 for deterministic completions
temperature=0, # Set this to 0 for deterministic completions.
# Note, GPT-5 does not support temperature.
))
print(pred.completion_text) # "2 + 6 equals 8."

Expand Down
7 changes: 6 additions & 1 deletion lmwrapper/claude_wrapper/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,9 @@ def _predict_maybe_cached(
),
temperature=prompt.temperature,
messages=messages,
top_p=prompt.top_p,
)
if prompt.top_p is not None:
response["top_p"] = prompt.top_p
if system_message is not None:
response["system"] = system_message['content']

Expand Down Expand Up @@ -237,6 +238,10 @@ class ClaudeModelNames(metaclass=_ModelNamesMeta):
"claude-sonnet-4-20250514", 200_000)
claude_4_opus = ClaudeModelInfo(
"claude-opus-4-20250514", 200_000)
claude_4_5_sonnet = ClaudeModelInfo(
"claude-sonnet-4-5-20250929", 1_000_000)
claude_4_5_haiku = ClaudeModelInfo(
"claude-haiku-4-5-20251001", 200_000)

@classmethod
def name_to_info(cls, name: str) -> ClaudeModelInfo | None:
Expand Down
2 changes: 1 addition & 1 deletion lmwrapper/huggingface_wrapper/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _verify_initial_prompt(self, prompt: LmPrompt):
raise NotImplementedError(
msg,
)
if prompt.logprobs and prompt.top_p != 1.0:
if prompt.logprobs and prompt.top_p is not None and prompt.top_p != 1.0:
logging.warning("Logprobs may not be correct if top_p != 1.0")

if prompt.presence_penalty != 0.0:
Expand Down
2 changes: 1 addition & 1 deletion lmwrapper/openai_wrapper/batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def _prompt_to_arg_dict_for_batch(
prompt,
engine_name=lm.model_name(),
chat_model=lm.is_chat_model,
o1_model=lm.is_o1_model,
reasoning_style=lm.reasoning_style,
)
request = {
"body": args,
Expand Down
108 changes: 61 additions & 47 deletions lmwrapper/openai_wrapper/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
)
from lmwrapper.structs import LmPrediction, LmPrompt

# Aliases for shorter lines
NEVER_THINK = LmReasoningStyle.NEVER_THINK
ALWAYS_THINK = LmReasoningStyle.ALWAYS_THINK
CONFIGURABLE_THINKING = LmReasoningStyle.CONFIGURABLE_THINKING

PRINT_ON_PREDICT = False

MAX_LOG_PROB_PARM = 5
Expand All @@ -43,27 +48,31 @@ def __new__(
cls,
name: str,
is_chat_model: bool,
is_o1_model: bool,
reasoning_style: LmReasoningStyle,
token_limit_input: int,
):
instance = super().__new__(cls, name)
instance._is_chat_model = is_chat_model
instance._is_o1_model = is_o1_model
instance._reasoning_style = reasoning_style
instance._token_limit_input = token_limit_input
return instance

@property
def is_chat_model(self):
return self._is_chat_model

@property
def reasoning_style(self):
return self._reasoning_style

@property
def token_limit(self):
return self._token_limit_input

def __reduce__(self):
return (
self.__class__,
(str(self), self._is_chat_model, self._is_o1_model, self._token_limit_input),
(str(self), self._is_chat_model, self._reasoning_style, self._token_limit_input),
)


Expand All @@ -73,26 +82,26 @@ class OpenAiModelNames(metaclass=_ModelNamesMeta):
documentation on OpenAI's website at the time.
"""

gpt_3_5_turbo = OpenAiModelInfo("gpt-3.5-turbo", True, False, 4096)
gpt_3_5_turbo = OpenAiModelInfo("gpt-3.5-turbo", True, NEVER_THINK, 4096)
"""Most capable GPT-3.5 model and optimized for chat at 1/10th the cost of text-davinci-003.
Will be updated with our latest model iteration 2 weeks after it is released."""
gpt_3_5_turbo_16k = OpenAiModelInfo("gpt-3.5-turbo-16k", True, False, 16384)
gpt_3_5_turbo_16k = OpenAiModelInfo("gpt-3.5-turbo-16k", True, NEVER_THINK, 16384)
"""Same capabilities as the standard gpt-3.5-turbo model but with 4 times the context."""
gpt_3_5_turbo_instruct = OpenAiModelInfo(
"gpt-3.5-turbo-instruct", False, False, 4096
"gpt-3.5-turbo-instruct", False, NEVER_THINK, 4096
)
"""A GPT-3.5 version but for completion"""
code_davinci_002 = OpenAiModelInfo("code-davinci-002", False, False, 4097)
code_davinci_002 = OpenAiModelInfo("code-davinci-002", False, NEVER_THINK, 4097)
"""Can do any language task with better quality, longer output, and consistent instruction-following
than the curie, babbage, or ada models.
Also supports some additional features such as inserting text."""
gpt_4 = OpenAiModelInfo("gpt-4", True, False, 8192)
gpt_4 = OpenAiModelInfo("gpt-4", True, NEVER_THINK, 8192)
"""More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat.
Will be updated with our latest model iteration 2 weeks after it is released."""
gpt_4_32k = OpenAiModelInfo("gpt-4-32k", True, False, 32768)
gpt_4_32k = OpenAiModelInfo("gpt-4-32k", True, NEVER_THINK, 32768)
"""Same capabilities as the base gpt-4 mode but with 4x the context length.
Will be updated with our latest model iteration."""
gpt_4_turbo = OpenAiModelInfo("gpt-4-1106-preview", True, False, 128_000)
gpt_4_turbo = OpenAiModelInfo("gpt-4-1106-preview", True, NEVER_THINK, 128_000)
"""GPT-4 model with improved instruction following, JSON mode,
reproducible outputs, parallel function calling, and more.
Returns a maximum of 4,096 output tokens. This preview model is
Expand All @@ -103,9 +112,9 @@ class OpenAiModelNames(metaclass=_ModelNamesMeta):

see: https://help.openai.com/en/articles/8555510-gpt-4-turbo
"""
gpt_4o = OpenAiModelInfo("gpt-4o", True, False, 128_000)
gpt_4o = OpenAiModelInfo("gpt-4o", True, NEVER_THINK, 128_000)
"""
GPT-4o (“o” for omni) is our most advanced model.
GPT-4o ("o" for "omni") is our most advanced model.
It is multimodal (accepting text or image inputs and outputting text),
and it has the same high intelligence as GPT-4 Turbo but
is much more efficient—it generates text 2x faster and is 50% cheaper.
Expand All @@ -115,38 +124,43 @@ class OpenAiModelNames(metaclass=_ModelNamesMeta):

Point can change
"""
gpt_4o_2024_11_20 = OpenAiModelInfo("gpt-4o-2024-11-20", True, False, 128_000)
gpt_4o_2024_05_13 = OpenAiModelInfo("gpt-4o-2024-05-13", True, False, 128_000)
gpt_4o_mini = OpenAiModelInfo("gpt-4o-mini", True, False, 128_000)
gpt_4o_2024_11_20 = OpenAiModelInfo("gpt-4o-2024-11-20", True, NEVER_THINK, 128_000)
gpt_4o_2024_05_13 = OpenAiModelInfo("gpt-4o-2024-05-13", True, NEVER_THINK, 128_000)
gpt_4o_mini = OpenAiModelInfo("gpt-4o-mini", True, NEVER_THINK, 128_000)
"""Our affordable and intelligent small model for fast, lightweight tasks.
GPT-4o mini is cheaper and more capable than GPT-3.5 Turbo. """
gpt_4o_mini_2024_07_18 = OpenAiModelInfo(
"gpt-4o-mini-2024-07-18", True, False, 128_000
"gpt-4o-mini-2024-07-18", True, NEVER_THINK, 128_000
)

o1 = OpenAiModelInfo("o1", True, True, 200_000)
o1 = OpenAiModelInfo("o1", True, ALWAYS_THINK, 200_000)
"""
The o1 series of large language models are trained with reinforcement learning to perform complex reasoning. o1 models think before they answer, producing a long internal chain of thought before responding to the user.
"""
o1_preview = OpenAiModelInfo("o1-preview", True, True, 128_000)
o1_preview = OpenAiModelInfo("o1-preview", True, ALWAYS_THINK, 128_000)
"""o1-preview: reasoning model designed to solve hard problems across domains."""
o1_preview_2024_09_12 = OpenAiModelInfo(
"o1-preview-2024-09-12", True, True, 128_000
"o1-preview-2024-09-12", True, ALWAYS_THINK, 128_000
)
"""o1-mini: faster and cheaper reasoning model particularly good at coding, math, and science."""
o1_mini = OpenAiModelInfo("o1-mini", True, True, 128_000)
o1_mini_2024_09_12 = OpenAiModelInfo("o1-mini-2024-09-12", True, True, 128_000)
o1_mini = OpenAiModelInfo("o1-mini", True, ALWAYS_THINK, 128_000)
o1_mini_2024_09_12 = OpenAiModelInfo("o1-mini-2024-09-12", True, ALWAYS_THINK, 128_000)

o3_mini = OpenAiModelInfo("o3-mini", True, True, 200_000)
o4_mini = OpenAiModelInfo("o4-mini", True, True, 200_000)
o4_mini_2025_04_16 = OpenAiModelInfo("o4-mini-2025-04-16", True, True, 200_000)
o3_mini = OpenAiModelInfo("o3-mini", True, ALWAYS_THINK, 200_000)
o4_mini = OpenAiModelInfo("o4-mini", True, ALWAYS_THINK, 200_000)
o4_mini_2025_04_16 = OpenAiModelInfo("o4-mini-2025-04-16", True, ALWAYS_THINK, 200_000)

o3 = OpenAiModelInfo("o3", True, True, 200_000)
o3 = OpenAiModelInfo("o3", True, ALWAYS_THINK, 200_000)

gpt_4_1 = OpenAiModelInfo("gpt-4.1", True, False, 1_047_576)
gpt_4_1_mini = OpenAiModelInfo("gpt-4.1-mini", True, False, 1_047_576)
gpt_4_1_nano = OpenAiModelInfo("gpt-4.1-nano", True, False, 1_047_576)
gpt_4_1 = OpenAiModelInfo("gpt-4.1", True, NEVER_THINK, 1_047_576)
gpt_4_1_mini = OpenAiModelInfo("gpt-4.1-mini", True, NEVER_THINK, 1_047_576)
gpt_4_1_nano = OpenAiModelInfo("gpt-4.1-nano", True, NEVER_THINK, 1_047_576)

gpt_5 = OpenAiModelInfo("gpt-5", True, ALWAYS_THINK, 400_000)
gpt_5_mini = OpenAiModelInfo("gpt-5-mini", True, ALWAYS_THINK, 400_000)
gpt_5_nano = OpenAiModelInfo("gpt-5-nano", True, ALWAYS_THINK, 400_000)

gpt_5_1 = OpenAiModelInfo("gpt-5.1", True, CONFIGURABLE_THINKING, 400_000)

@classmethod
def name_to_info(cls, name: str) -> OpenAiModelInfo | None:
Expand Down Expand Up @@ -348,7 +362,7 @@ def __init__(
api: OpenAI,
engine_name: str,
chat_mode: bool | None = None,
o1_mode: bool | None = None,
reasoning_style: LmReasoningStyle | None = None,
cache_outputs_default: bool = False,
retry_on_rate_limit: bool = False,
):
Expand All @@ -368,7 +382,7 @@ def __init__(
self._retry_on_rate_limit = retry_on_rate_limit
info = OpenAiModelNames.name_to_info(engine_name)
self._chat_mode = info.is_chat_model if chat_mode is None else chat_mode
self._o1_mode = info._is_o1_model if o1_mode is None else o1_mode
self._reasoning_style = info.reasoning_style if reasoning_style is None else reasoning_style
if self._chat_mode is None:
msg = (
"`chat_mode` is not provided as a parameter and cannot be inferred from"
Expand Down Expand Up @@ -402,9 +416,7 @@ def supports_token_operations(self) -> bool:

@property
def reasoning_style(self) -> LmReasoningStyle:
if self.is_o1_model:
return LmReasoningStyle.ALWAYS_THINK
return LmReasoningStyle.NEVER_THINK
return self._reasoning_style

def _validate_prompt(self, prompt: LmPrompt, raise_on_invalid: bool = True) -> tuple[bool, LmPrompt | None]:
is_valid = True
Expand All @@ -420,7 +432,7 @@ def _validate_prompt(self, prompt: LmPrompt, raise_on_invalid: bool = True) -> t
else:
warnings.warn(message)
return False
if self._o1_mode:
if self._reasoning_style == ALWAYS_THINK:
# if prompt.max_tokens:
# message = f"o1 type models use `max_completion_tokens` instead of `max_tokens` but you have set max tokens to f{prompt.max_tokens}. Instead, `max_completion_tokens` will be used. `max_completion_tokens` is currently set to: {prompt.max_completion_tokens}"
# if raise_on_invalid:
Expand Down Expand Up @@ -465,10 +477,6 @@ def list_engines(self):
@property
def is_chat_model(self):
return self._chat_mode

@property
def is_o1_model(self):
return self._o1_mode

@property
def token_limit(self):
Expand Down Expand Up @@ -526,7 +534,7 @@ def run_func():
prompt,
self._engine_name,
self._chat_mode,
self._o1_mode,
self._reasoning_style,
default_tokens_generated=self.default_tokens_generated,
)

Expand Down Expand Up @@ -702,26 +710,28 @@ def prompt_to_openai_args_dict(
prompt: LmPrompt,
engine_name: str,
chat_model: bool,
o1_model: bool,
reasoning_style: LmReasoningStyle,
default_tokens_generated: int | None = 20,
) -> dict[str, Any]:
if prompt.max_tokens is not None:
max_tokens = prompt.max_tokens
else:
max_tokens = default_tokens_generated
if o1_model:
return dict(
if reasoning_style == ALWAYS_THINK:
args = dict(
messages=prompt.get_text_as_chat().as_dicts(),
model=engine_name,
max_completion_tokens=max_tokens,
n=prompt.num_completions or 1,
presence_penalty=prompt.presence_penalty,
top_logprobs=None,
top_p=prompt.top_p,
)
if prompt.top_p is not None:
args["top_p"] = prompt.top_p
return args
# all o1 models are chat models
elif chat_model:
return dict(
args = dict(
messages=prompt.get_text_as_chat().as_dicts(),
model=engine_name,
logprobs=prompt.logprobs > 0,
Expand All @@ -732,19 +742,23 @@ def prompt_to_openai_args_dict(
temperature=prompt.temperature,
# top_logprobs accepts ints 0 to 20, logprobs must be a boolean true
top_logprobs=prompt.logprobs if prompt.logprobs > 0 else None,
top_p=prompt.top_p,
)
if prompt.top_p is not None:
args["top_p"] = prompt.top_p
return args
else:
return dict(
args = dict(
model=engine_name,
prompt=prompt.get_text_as_string_default_form(),
max_tokens=max_tokens,
stop=prompt.stop,
stream=False,
logprobs=prompt.logprobs,
temperature=prompt.temperature,
top_p=prompt.top_p,
presence_penalty=prompt.presence_penalty,
n=prompt.num_completions or 1,
echo=prompt.echo,
)
if prompt.top_p is not None:
args["top_p"] = prompt.top_p
return args
15 changes: 8 additions & 7 deletions lmwrapper/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,13 @@ class LmPrompt(Generic[T]):
"""What sampling temperature to use, between 0 and 2.
Higher values like 0.8 will make the output more random, while lower values
like 0.2 will make it more focused and deterministic."""
top_p: float = 1.0
top_p: float | None = None
"""An alternative to sampling with temperature, called nucleus sampling, where the
model considers the results of the tokens with top_p probability mass. So 0.1 means
only the tokens comprising the top 10% probability mass are considered.
If set to float < 1, only the smallest set of most probable tokens with
probabilities that add up to top_p or higher are kept for generation."""
probabilities that add up to top_p or higher are kept for generation.
If None, will be passed as None to the API (using model default)."""
presence_penalty: float = 0.0
"""Number between -2.0 and 2.0. Positive values penalize new tokens based on whether
they appear in the text so far, increasing the model's likelihood
Expand Down Expand Up @@ -148,7 +149,7 @@ def __init__(
stop_mode: StopMode = StopMode.AUTO,
logprobs: int = 1,
temperature: float = 1.0,
top_p: float = 1.0,
top_p: float | None = None,
presence_penalty: float = 0.0,
frequency_penalty: float = 0.0,
num_completions: int | None = None,
Expand Down Expand Up @@ -205,9 +206,9 @@ def __init__(
msg = "The temperature parameter should be a positive float."
raise ValueError(msg)

# Validate top_p
if not isinstance(top_p, float):
msg = "The top_p parameter should be a float."
# Validate top_p (allow None to pass through to API)
if top_p is not None and not isinstance(top_p, float):
msg = "The top_p parameter should be a float or None."
raise ValueError(msg)

# Validate presence_penalty
Expand Down Expand Up @@ -253,7 +254,7 @@ def __init__(
object.__setattr__(self, "metadata", metadata)

def is_deterministic_sampling(self) -> bool:
return (self.temperature < 1e-4) or (self.top_p < 1e-4)
return (self.temperature < 1e-4) or (self.top_p is not None and self.top_p < 1e-4)

def replace(self, **kwargs) -> "LmPrompt":
"""Returns a new prompt with the given parameters replaced."""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ include = ["lmwrapper*"]

[project]
name = "lmwrapper"
version = "0.17.1.0"
version = "0.18.0.0"

authors = [
{ name = "David Gros" },
Expand Down
2 changes: 1 addition & 1 deletion run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pip install --upgrade -e '.[hf,anthropic,docs,dev]' || exit 1
pytest "$@"
python -m pytest "$@"
Loading