Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add openai v1 compatible endpoint for chat completions #54

Merged
merged 8 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 82 additions & 1 deletion guardrails_api/blueprints/guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from guardrails_api.clients.postgres_client import postgres_is_enabled
from guardrails_api.utils.handle_error import handle_error
from guardrails_api.utils.get_llm_callable import get_llm_callable

from guardrails_api.utils.openai import outcome_to_chat_completion, outcome_to_stream_response

guards_bp = Blueprint("guards", __name__, url_prefix="/guards")

Expand Down Expand Up @@ -151,6 +151,87 @@ def collect_telemetry(
validate_span.set_attribute("num_of_reasks", num_of_reasks)


@guards_bp.route("/<guard_name>/openai/v1/chat/completions", methods=["POST"])
@handle_error
def openai_v1_chat_completions(guard_name: str):
# This endpoint implements the OpenAI Chat API
# It is mean to be fully compatible
# The only difference is that it uses the Guard API under the hood
# instead of the OpenAI API and supports guardrail API error handling
# To use this with the OpenAI SDK you can use the following code:
# import openai
# openai.base_url = "http://localhost:8000/guards/<guard_name>/openai/v1/"
# response = openai.chat.completions(
# model="gpt-3.5-turbo-0125",
# messages=[
# {"role": "user", "content": "Hello, how are you?"},
# ],
# stream=True,
# )
# print(response)
# to configure guard rails error handling from the server side you can use the following code:
#

payload = request.json
decoded_guard_name = unquote_plus(guard_name)
guard_struct = guard_client.get_guard(decoded_guard_name)
guard = guard_struct
if not isinstance(guard_struct, Guard):
guard: Guard = Guard.from_dict(guard_struct.to_dict())
stream = payload.get("stream", False)
has_tool_gd_tool_call = False

try:
tools = payload.get("tools", [])
tools.filter(lambda tool: tool["funcion"]["name"] == "gd_response_tool")
has_tool_gd_tool_call = len(tools) > 0
except (KeyError, AttributeError):
pass

if not stream:
try:
validation_outcome: ValidationOutcome = guard(
# todo make this come from the guard struct?
# currently we dont support .configure
num_reasks=0,
**payload,
)
llm_response = guard.history[-1].iterations[-1].outputs.llm_response_info
result = outcome_to_chat_completion(
validation_outcome=validation_outcome,
llm_response=llm_response,
has_tool_gd_tool_call=has_tool_gd_tool_call,
)
return result
except Exception as e:
raise HttpError(
status=400,
message="BadRequest",
cause=(str(e)),
)

else:
# need to return validated chunks that look identical to openai's
# should look something like
# data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":None,"finish_reason":None}]}
# ....
# data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":None,"finish_reason":"stop"}]}
def openai_streamer():
guard_stream = guard(
num_reasks=0,
**payload,
)
for result in guard_stream:
chunk_string = f"data: {json.dumps(outcome_to_stream_response(validation_outcome=result))}\n\n"
yield chunk_string.encode("utf-8")
# close the stream
yield b"\n"

return Response(
stream_with_context(openai_streamer()),
)


@guards_bp.route("/<guard_name>/validate", methods=["POST"])
@handle_error
def validate(guard_name: str):
Expand Down
2 changes: 1 addition & 1 deletion guardrails_api/start-dev.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload
gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance
61 changes: 61 additions & 0 deletions guardrails_api/utils/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from guardrails.classes import ValidationOutcome

def outcome_to_stream_response(validation_outcome: ValidationOutcome):
stream_chunk_template = {
"choices": [
{
"delta": {
"content": validation_outcome.validated_output,
},
}
],
"guardrails": {
"reask": validation_outcome.reask or None,
"validation_passed": validation_outcome.validation_passed,
"error": validation_outcome.error or None,
},
}
# does this even make sense with a stream? wed need each chunk as theyre emitted
stream_chunk = stream_chunk_template
stream_chunk["choices"][0]["delta"]["content"] = validation_outcome.validated_output
return stream_chunk


def outcome_to_chat_completion(
validation_outcome: ValidationOutcome,
llm_response,
has_tool_gd_tool_call=False,
):
completion_template = (
{"choices": [{"message": {"content": ""}}]}
if not has_tool_gd_tool_call
else {
"choices": [{"message": {"tool_calls": [{"function": {"arguments": ""}}]}}]
}
)
completion = getattr(llm_response, "full_raw_llm_output", completion_template)
completion["guardrails"] = {
"reask": validation_outcome.reask or None,
"validation_passed": validation_outcome.validation_passed,
"error": validation_outcome.error or None,
}

# string completion
try:
completion["choices"][0]["message"]["content"] = (
validation_outcome.validated_output
)
except KeyError:
pass

# tool completion
try:
choice = completion["choices"][0]
# if this is accessible it means a tool was called so set our validated output to that
choice["message"]["tool_calls"][-1]["function"]["arguments"] = (
validation_outcome.validated_output
)
except KeyError:
pass

return completion
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
keywords = ["Guardrails", "Guardrails AI", "Guardrails API", "Guardrails API"]
requires-python = ">= 3.8.1"
dependencies = [
"guardrails-ai>=0.5.0a2",
"guardrails-ai>=0.5.0a11",
"flask>=3.0.3,<4",
"Flask-SQLAlchemy>=3.1.1,<4",
"Flask-Caching>=2.3.0,<3",
Expand Down
79 changes: 77 additions & 2 deletions tests/blueprints/test_guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tests.mocks.mock_request import MockRequest
from guardrails.classes import ValidationOutcome
from guardrails.classes.generic import Stack
from guardrails.classes.history import Call
from guardrails.classes.history import Call, Iteration
from guardrails_api.app import register_config

# TODO: Should we mock this somehow?
Expand Down Expand Up @@ -44,10 +44,11 @@ def test_route_setup(mocker):

from guardrails_api.blueprints.guards import guards_bp

assert guards_bp.route_call_count == 4
assert guards_bp.route_call_count == 5
assert guards_bp.routes == [
"/",
"/<guard_name>",
"/<guard_name>/openai/v1/chat/completions",
"/<guard_name>/validate",
"/<guard_name>/history/<call_id>",
]
Expand Down Expand Up @@ -546,3 +547,77 @@ def test_validate__call(mocker):
}

del os.environ["PGHOST"]

def test_openai_v1_chat_completions__call(mocker):
from guardrails_api.blueprints.guards import openai_v1_chat_completions
os.environ["PGHOST"] = "localhost"
mock_guard = MockGuardStruct()
mock_outcome = ValidationOutcome(
call_id="mock-call-id",
raw_llm_output="Hello world!",
validated_output="Hello world!",
validation_passed=False,
)

mock___call__ = mocker.patch.object(MockGuardStruct, "__call__")
mock___call__.return_value = mock_outcome

mock_from_dict = mocker.patch("guardrails_api.blueprints.guards.Guard.from_dict")
mock_from_dict.return_value = mock_guard

mock_request = MockRequest(
"POST",
json={
"messages": [{"role":"user", "content":"Hello world!"}],
},
headers={"x-openai-api-key": "mock-key"},
)

mocker.patch("flask.Blueprint", new=MockBlueprint)
mocker.patch("guardrails_api.blueprints.guards.request", mock_request)
mock_get_guard = mocker.patch(
"guardrails_api.blueprints.guards.guard_client.get_guard",
return_value=mock_guard,
)
mocker.patch(
"guardrails_api.blueprints.guards.get_llm_callable",
return_value="openai.Completion.create",
)

mocker.patch("guardrails_api.blueprints.guards.CacheClient.set")

mock_status = mocker.patch(
"guardrails.classes.history.call.Call.status", new_callable=PropertyMock
)
mock_status.return_value = "fail"
mock_call = Call()
mock_call.iterations= Stack(Iteration('some-id', 1))
mock_guard.history = Stack(mock_call)

response = openai_v1_chat_completions("My%20Guard's%20Name")

mock_get_guard.assert_called_once_with("My Guard's Name")

assert mock___call__.call_count == 1

mock___call__.assert_called_once_with(
num_reasks=0,
messages=[{"role":"user", "content":"Hello world!"}],
)

assert response == {
"choices": [
{
"message": {
"content": "Hello world!",
},
}
],
"guardrails": {
"reask": None,
"validation_passed": False,
"error": None,
},
}

del os.environ["PGHOST"]