add openai v1 compatible endpoint for chat completions

guardrails-ai · Jul 2, 2024 · 66fc0a8 · 66fc0a8
1 parent c0bf38a
commit 66fc0a8
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 6 deletions.
diff --git a/guardrails_api/blueprints/guards.py b/guardrails_api/blueprints/guards.py
@@ -2,8 +2,8 @@
 import os
 from guardrails.hub import *  # noqa
 from string import Template
-from typing import Any, Dict, cast
-from flask import Blueprint, Response, request, stream_with_context
+from typing import Any, Dict, cast, Iterator
+from flask import Blueprint, Response, request, stream_with_context, jsonify, abort
 from urllib.parse import unquote_plus
 from guardrails import Guard
 from guardrails.classes import ValidationOutcome
@@ -15,7 +15,7 @@
 from guardrails_api.clients.postgres_client import postgres_is_enabled
 from guardrails_api.utils.handle_error import handle_error
 from guardrails_api.utils.get_llm_callable import get_llm_callable
-
+from guardrails_api.utils.openai import outcome_to_chat_completion, outcome_to_stream_response
 
 guards_bp = Blueprint("guards", __name__, url_prefix="/guards")
 
@@ -148,6 +148,83 @@ def collect_telemetry(
     validate_span.set_attribute("num_of_reasks", num_of_reasks)
 
 
+@guards_bp.route("/<guard_name>/openai/v1/chat/completions", methods=["POST"])
+@handle_error
+def chat_completions(guard_name: str):
+    # This endpoint implements the OpenAI Chat API
+    # It is mean to be fully compatible
+    # The only difference is that it uses the Guard API under the hood
+    # instead of the OpenAI API and supports guardrail API error handling
+    # To use this with the OpenAI SDK you can use the following code:
+    # import openai
+    # openai.base_url = "http://localhost:8000/guards/<guard_name>/openai/v1/"
+    # response = openai.chat.completions(
+    #     model="gpt-3.5-turbo-0125",
+    #     messages=[
+    #         {"role": "user", "content": "Hello, how are you?"},
+    #     ],
+    #     stream=True,
+    # )
+    # print(response)
+    # to configure guard rails error handling from the server side you can use the following code:
+    #
+
+    payload = request.json
+    decoded_guard_name = unquote_plus(guard_name)
+    guard_struct = guard_client.get_guard(decoded_guard_name)
+    guard = guard_struct
+    if not isinstance(guard_struct, Guard):
+        guard: Guard = Guard.from_dict(guard_struct.to_dict())
+    stream = payload.get("stream", False)
+    has_tool_gd_tool_call = False
+
+    try:
+        tools = payload.get("tools", [])
+        tools.filter(lambda tool: tool["funcion"]["name"]== "gd_response_tool")
+        has_tool_gd_tool_call = len(tools) > 0
+    except:
+        pass
+
+    if not stream:
+        try:
+            validation_outcome: ValidationOutcome = guard(
+                        # todo make this come from the guard struct
+                        # currently we dont support .configure
+                        num_reasks=0,
+                        **payload,
+                    )
+            result = outcome_to_chat_completion(validation_outcome=validation_outcome, has_tool_gd_tool_call=has_tool_gd_tool_call)
+            return result
+        except Exception as e:
+            raise HttpError(
+                status=400,
+                message="BadRequest",
+                cause=(
+                    str(e)
+                ),
+            )
+
+    else:
+    # need to return validated chunks that look identical to openai's
+    # should look something like
+    # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":None,"finish_reason":None}]}
+    # ....
+    # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":None,"finish_reason":"stop"}]}
+        def openai_streamer():
+            guard_stream = guard(
+                num_reasks=0,
+                **payload,
+            )
+            for result in guard_stream:
+                chunk_string = f"data: {json.dumps(outcome_to_stream_response(validation_outcome=result))}\n\n"
+                yield chunk_string.encode('utf-8')
+            # close the stream
+            yield b"\n"
+
+        return Response(
+            stream_with_context(openai_streamer()),
+        )
+
 @guards_bp.route("/<guard_name>/validate", methods=["POST"])
 @handle_error
 def validate(guard_name: str):

diff --git a/guardrails_api/config.py b/guardrails_api/config.py
@@ -7,5 +7,19 @@
 and guards will be persisted into postgres. In that case,
 these guards will not be initialized.
 """
+from typing import List
+from guardrails import Guard
+from guardrails.hub import (
+  RegexMatch
+)
 
-from guardrails import Guard  # noqa
+
+regexG = Guard().use(
+    RegexMatch(regex="^[A-Z][a-z]*$", on_fail='exception')
+)
+
+regexG.name = "regex"
+regexG.configure(num_reasks=0)
+
+no_guards = Guard()
+no_guards.name = "no_guards"
diff --git a/guardrails_api/start-dev.sh b/guardrails_api/start-dev.sh
@@ -5,5 +5,7 @@ export GUARDRAILS_LOG_LEVEL="INFO"
 export GUARDRAILS_PROCESS_COUNT=1
 export SELF_ENDPOINT=http://localhost:8000
 export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
+export OPENAI_API_KEY=sk-proj-91HVDNsWNRI8siElbHozT3BlbkFJSERfkuukX7u02hdQvwnI
+export GUARDRAILS_OPENAI_ENDPONTS_ERROR_HANDLING=include
 
-gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload
+gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance
diff --git a/guardrails_api/utils/openai.py b/guardrails_api/utils/openai.py
@@ -0,0 +1,50 @@
+from typing import List, Dict, Any
+from guardrails.classes import ValidationOutcome
+
+def outcome_to_stream_response(validation_outcome: ValidationOutcome):
+    stream_chunk_template ={
+        "choices": [
+            {
+            "delta": {
+                "content": validation_outcome.validated_output,
+            },
+            }
+        ],
+        "guardrails": {
+            "reask": validation_outcome.reask or None,
+            "validation_passed": validation_outcome.validation_passed,
+            "error": validation_outcome.error or None,
+        },
+    }
+    stream_chunk = getattr(validation_outcome, "full_raw_llm_output", stream_chunk_template)
+    stream_chunk["choices"][0]["delta"]["content"] = validation_outcome.validated_output
+    return stream_chunk
+
+def outcome_to_chat_completion(validation_outcome: ValidationOutcome, has_tool_gd_tool_call=False,):
+    completion_template = {
+        "choices": [{"message": {"content": ""}}]
+    } if not has_tool_gd_tool_call else {
+        "choices": [{"message": {"tool_calls": [{"function": {"arguments": ""}}]}}]
+    }
+    completion = getattr(validation_outcome, "full_raw_llm_output", completion_template)
+    completion["guardrails"] = {
+        "reask": validation_outcome.reask or None,
+        "validation_passed": validation_outcome.validation_passed,
+        "error": validation_outcome.error or None,
+    }
+
+    # string completion
+    try:
+        completion["choices"][0]["message"]["content"] = validation_outcome.validated_output
+    except:
+        pass
+
+    # tool completion
+    try:
+        choice = completion["choices"][0]
+        # if this is accessible it means a tool was called so set our validated output to that
+        choice["message"]["tool_calls"][-1]["function"]["arguments"] = validation_outcome.validated_output
+    except:
+        pass
+
+    return completion
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,9 +10,10 @@ readme = "README.md"
 keywords = ["Guardrails", "Guardrails AI", "Guardrails API", "Guardrails API"]
 requires-python = ">= 3.8.1"
 dependencies = [
-    "guardrails-ai@git+https://github.com/guardrails-ai/guardrails.git@core-schema-impl",
+    "guardrails-ai>=0.5.0a1",
     "flask>=3.0.3,<4",
     "Flask-SQLAlchemy>=3.1.1,<4",
+    "Flask-Caching>=2.3.0,<3",
     "Werkzeug>=3.0.3,<4",
     "jsonschema>=4.22.0,<5",
     "referencing>=0.35.1,<1",