Skip to content

Commit

Permalink
add openai v1 compatible endpoint for chat completions
Browse files Browse the repository at this point in the history
  • Loading branch information
dtam committed Jul 2, 2024
1 parent c0bf38a commit 66fc0a8
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 6 deletions.
83 changes: 80 additions & 3 deletions guardrails_api/blueprints/guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
from guardrails.hub import * # noqa
from string import Template
from typing import Any, Dict, cast
from flask import Blueprint, Response, request, stream_with_context
from typing import Any, Dict, cast, Iterator
from flask import Blueprint, Response, request, stream_with_context, jsonify, abort
from urllib.parse import unquote_plus
from guardrails import Guard
from guardrails.classes import ValidationOutcome
Expand All @@ -15,7 +15,7 @@
from guardrails_api.clients.postgres_client import postgres_is_enabled
from guardrails_api.utils.handle_error import handle_error
from guardrails_api.utils.get_llm_callable import get_llm_callable

from guardrails_api.utils.openai import outcome_to_chat_completion, outcome_to_stream_response

guards_bp = Blueprint("guards", __name__, url_prefix="/guards")

Expand Down Expand Up @@ -148,6 +148,83 @@ def collect_telemetry(
validate_span.set_attribute("num_of_reasks", num_of_reasks)


@guards_bp.route("/<guard_name>/openai/v1/chat/completions", methods=["POST"])
@handle_error
def chat_completions(guard_name: str):
# This endpoint implements the OpenAI Chat API
# It is mean to be fully compatible
# The only difference is that it uses the Guard API under the hood
# instead of the OpenAI API and supports guardrail API error handling
# To use this with the OpenAI SDK you can use the following code:
# import openai
# openai.base_url = "http://localhost:8000/guards/<guard_name>/openai/v1/"
# response = openai.chat.completions(
# model="gpt-3.5-turbo-0125",
# messages=[
# {"role": "user", "content": "Hello, how are you?"},
# ],
# stream=True,
# )
# print(response)
# to configure guard rails error handling from the server side you can use the following code:
#

payload = request.json
decoded_guard_name = unquote_plus(guard_name)
guard_struct = guard_client.get_guard(decoded_guard_name)
guard = guard_struct
if not isinstance(guard_struct, Guard):
guard: Guard = Guard.from_dict(guard_struct.to_dict())
stream = payload.get("stream", False)
has_tool_gd_tool_call = False

try:
tools = payload.get("tools", [])
tools.filter(lambda tool: tool["funcion"]["name"]== "gd_response_tool")
has_tool_gd_tool_call = len(tools) > 0
except:
pass

if not stream:
try:
validation_outcome: ValidationOutcome = guard(
# todo make this come from the guard struct
# currently we dont support .configure
num_reasks=0,
**payload,
)
result = outcome_to_chat_completion(validation_outcome=validation_outcome, has_tool_gd_tool_call=has_tool_gd_tool_call)
return result
except Exception as e:
raise HttpError(
status=400,
message="BadRequest",
cause=(
str(e)
),
)

else:
# need to return validated chunks that look identical to openai's
# should look something like
# data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":None,"finish_reason":None}]}
# ....
# data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":None,"finish_reason":"stop"}]}
def openai_streamer():
guard_stream = guard(
num_reasks=0,
**payload,
)
for result in guard_stream:
chunk_string = f"data: {json.dumps(outcome_to_stream_response(validation_outcome=result))}\n\n"
yield chunk_string.encode('utf-8')
# close the stream
yield b"\n"

return Response(
stream_with_context(openai_streamer()),
)

@guards_bp.route("/<guard_name>/validate", methods=["POST"])
@handle_error
def validate(guard_name: str):
Expand Down
16 changes: 15 additions & 1 deletion guardrails_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,19 @@
and guards will be persisted into postgres. In that case,
these guards will not be initialized.
"""
from typing import List
from guardrails import Guard
from guardrails.hub import (
RegexMatch
)

from guardrails import Guard # noqa

regexG = Guard().use(
RegexMatch(regex="^[A-Z][a-z]*$", on_fail='exception')
)

regexG.name = "regex"
regexG.configure(num_reasks=0)

no_guards = Guard()
no_guards.name = "no_guards"
4 changes: 3 additions & 1 deletion guardrails_api/start-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ export GUARDRAILS_LOG_LEVEL="INFO"
export GUARDRAILS_PROCESS_COUNT=1
export SELF_ENDPOINT=http://localhost:8000
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
export OPENAI_API_KEY=sk-proj-91HVDNsWNRI8siElbHozT3BlbkFJSERfkuukX7u02hdQvwnI
export GUARDRAILS_OPENAI_ENDPONTS_ERROR_HANDLING=include

gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload
gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance
50 changes: 50 additions & 0 deletions guardrails_api/utils/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import List, Dict, Any
from guardrails.classes import ValidationOutcome

def outcome_to_stream_response(validation_outcome: ValidationOutcome):
stream_chunk_template ={
"choices": [
{
"delta": {
"content": validation_outcome.validated_output,
},
}
],
"guardrails": {
"reask": validation_outcome.reask or None,
"validation_passed": validation_outcome.validation_passed,
"error": validation_outcome.error or None,
},
}
stream_chunk = getattr(validation_outcome, "full_raw_llm_output", stream_chunk_template)
stream_chunk["choices"][0]["delta"]["content"] = validation_outcome.validated_output
return stream_chunk

def outcome_to_chat_completion(validation_outcome: ValidationOutcome, has_tool_gd_tool_call=False,):
completion_template = {
"choices": [{"message": {"content": ""}}]
} if not has_tool_gd_tool_call else {
"choices": [{"message": {"tool_calls": [{"function": {"arguments": ""}}]}}]
}
completion = getattr(validation_outcome, "full_raw_llm_output", completion_template)
completion["guardrails"] = {
"reask": validation_outcome.reask or None,
"validation_passed": validation_outcome.validation_passed,
"error": validation_outcome.error or None,
}

# string completion
try:
completion["choices"][0]["message"]["content"] = validation_outcome.validated_output
except:
pass

# tool completion
try:
choice = completion["choices"][0]
# if this is accessible it means a tool was called so set our validated output to that
choice["message"]["tool_calls"][-1]["function"]["arguments"] = validation_outcome.validated_output
except:
pass

return completion
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ readme = "README.md"
keywords = ["Guardrails", "Guardrails AI", "Guardrails API", "Guardrails API"]
requires-python = ">= 3.8.1"
dependencies = [
"guardrails-ai@git+https://github.com/guardrails-ai/guardrails.git@core-schema-impl",
"guardrails-ai>=0.5.0a1",
"flask>=3.0.3,<4",
"Flask-SQLAlchemy>=3.1.1,<4",
"Flask-Caching>=2.3.0,<3",
"Werkzeug>=3.0.3,<4",
"jsonschema>=4.22.0,<5",
"referencing>=0.35.1,<1",
Expand Down

0 comments on commit 66fc0a8

Please sign in to comment.