guardrails-ai · zsimjee · Jul 8, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 3, 2024
diff --git a/guardrails_api/blueprints/guards.py b/guardrails_api/blueprints/guards.py
@@ -16,7 +16,7 @@
 from guardrails_api.clients.postgres_client import postgres_is_enabled
 from guardrails_api.utils.handle_error import handle_error
 from guardrails_api.utils.get_llm_callable import get_llm_callable
-
+from guardrails_api.utils.openai import outcome_to_chat_completion, outcome_to_stream_response
 
 guards_bp = Blueprint("guards", __name__, url_prefix="/guards")
 
@@ -151,6 +151,87 @@ def collect_telemetry(
     validate_span.set_attribute("num_of_reasks", num_of_reasks)
 
 
+@guards_bp.route("/<guard_name>/openai/v1/chat/completions", methods=["POST"])
+@handle_error
+def openai_v1_chat_completions(guard_name: str):
+    # This endpoint implements the OpenAI Chat API
+    # It is mean to be fully compatible
+    # The only difference is that it uses the Guard API under the hood
+    # instead of the OpenAI API and supports guardrail API error handling
+    # To use this with the OpenAI SDK you can use the following code:
+    # import openai
+    # openai.base_url = "http://localhost:8000/guards/<guard_name>/openai/v1/"
+    # response = openai.chat.completions(
+    #     model="gpt-3.5-turbo-0125",
+    #     messages=[
+    #         {"role": "user", "content": "Hello, how are you?"},
+    #     ],
+    #     stream=True,
+    # )
+    # print(response)
+    # to configure guard rails error handling from the server side you can use the following code:
+    #
+
+    payload = request.json
+    decoded_guard_name = unquote_plus(guard_name)
+    guard_struct = guard_client.get_guard(decoded_guard_name)
+    guard = guard_struct
+    if not isinstance(guard_struct, Guard):
+        guard: Guard = Guard.from_dict(guard_struct.to_dict())
+    stream = payload.get("stream", False)
+    has_tool_gd_tool_call = False
+
+    try:
+        tools = payload.get("tools", [])
+        tools.filter(lambda tool: tool["funcion"]["name"] == "gd_response_tool")
+        has_tool_gd_tool_call = len(tools) > 0
+    except (KeyError, AttributeError):
+        pass
+
+    if not stream:
+        try:
+            validation_outcome: ValidationOutcome = guard(
+                # todo make this come from the guard struct?
+                # currently we dont support .configure
+                num_reasks=0,
+                **payload,
+            )
+            llm_response = guard.history[-1].iterations[-1].outputs.llm_response_info
+            result = outcome_to_chat_completion(
+                validation_outcome=validation_outcome,
+                llm_response=llm_response,
+                has_tool_gd_tool_call=has_tool_gd_tool_call,
+            )
+            return result
+        except Exception as e:
+            raise HttpError(
+                status=400,
+                message="BadRequest",
+                cause=(str(e)),
+            )
+
+    else:
+        # need to return validated chunks that look identical to openai's
+        # should look something like
+        # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":None,"finish_reason":None}]}
+        # ....
+        # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":None,"finish_reason":"stop"}]}
+        def openai_streamer():
+            guard_stream = guard(
+                num_reasks=0,
+                **payload,
+            )
+            for result in guard_stream:
+                chunk_string = f"data: {json.dumps(outcome_to_stream_response(validation_outcome=result))}\n\n"
+                yield chunk_string.encode("utf-8")
+            # close the stream
+            yield b"\n"
+
+        return Response(
+            stream_with_context(openai_streamer()),
+        )
+
+
 @guards_bp.route("/<guard_name>/validate", methods=["POST"])
 @handle_error
 def validate(guard_name: str):

diff --git a/guardrails_api/start-dev.sh b/guardrails_api/start-dev.sh
@@ -1 +1 @@
-gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload
+gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance
diff --git a/guardrails_api/utils/openai.py b/guardrails_api/utils/openai.py
@@ -0,0 +1,61 @@
+from guardrails.classes import ValidationOutcome
+
+def outcome_to_stream_response(validation_outcome: ValidationOutcome):
+    stream_chunk_template = {
+        "choices": [
+            {
+                "delta": {
+                    "content": validation_outcome.validated_output,
+                },
+            }
+        ],
+        "guardrails": {
+            "reask": validation_outcome.reask or None,
+            "validation_passed": validation_outcome.validation_passed,
+            "error": validation_outcome.error or None,
+        },
+    }
+    # does this even make sense with a stream? wed need each chunk as theyre emitted
+    stream_chunk = stream_chunk_template
+    stream_chunk["choices"][0]["delta"]["content"] = validation_outcome.validated_output
+    return stream_chunk
+
+
+def outcome_to_chat_completion(
+    validation_outcome: ValidationOutcome,
+    llm_response,
+    has_tool_gd_tool_call=False,
+):
+    completion_template = (
+        {"choices": [{"message": {"content": ""}}]}
+        if not has_tool_gd_tool_call
+        else {
+            "choices": [{"message": {"tool_calls": [{"function": {"arguments": ""}}]}}]
+        }
+    )
+    completion = getattr(llm_response, "full_raw_llm_output", completion_template)
+    completion["guardrails"] = {
+        "reask": validation_outcome.reask or None,
+        "validation_passed": validation_outcome.validation_passed,
+        "error": validation_outcome.error or None,
+    }
+
+    # string completion
+    try:
+        completion["choices"][0]["message"]["content"] = (
+            validation_outcome.validated_output
+        )
+    except KeyError:
+        pass
+
+    # tool completion
+    try:
+        choice = completion["choices"][0]
+        # if this is accessible it means a tool was called so set our validated output to that
+        choice["message"]["tool_calls"][-1]["function"]["arguments"] = (
+            validation_outcome.validated_output
+        )
+    except KeyError:
+        pass
+
+    return completion
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.md"
 keywords = ["Guardrails", "Guardrails AI", "Guardrails API", "Guardrails API"]
 requires-python = ">= 3.8.1"
 dependencies = [
-    "guardrails-ai>=0.5.0a2",
+    "guardrails-ai>=0.5.0a11",
     "flask>=3.0.3,<4",
     "Flask-SQLAlchemy>=3.1.1,<4",
     "Flask-Caching>=2.3.0,<3",

diff --git a/tests/blueprints/test_guards.py b/tests/blueprints/test_guards.py
@@ -9,7 +9,7 @@
 from tests.mocks.mock_request import MockRequest
 from guardrails.classes import ValidationOutcome
 from guardrails.classes.generic import Stack
-from guardrails.classes.history import Call
+from guardrails.classes.history import Call, Iteration
 from guardrails_api.app import register_config
 
 # TODO: Should we mock this somehow?
@@ -44,10 +44,11 @@ def test_route_setup(mocker):
 
     from guardrails_api.blueprints.guards import guards_bp
 
-    assert guards_bp.route_call_count == 4
+    assert guards_bp.route_call_count == 5
     assert guards_bp.routes == [
         "/",
         "/<guard_name>",
+        "/<guard_name>/openai/v1/chat/completions",
         "/<guard_name>/validate",
         "/<guard_name>/history/<call_id>",
     ]
@@ -546,3 +547,77 @@ def test_validate__call(mocker):
     }
 
     del os.environ["PGHOST"]
+
+def test_openai_v1_chat_completions__call(mocker):
+    from guardrails_api.blueprints.guards import openai_v1_chat_completions
+    os.environ["PGHOST"] = "localhost"
+    mock_guard = MockGuardStruct()
+    mock_outcome = ValidationOutcome(
+        call_id="mock-call-id",
+        raw_llm_output="Hello world!",
+        validated_output="Hello world!",
+        validation_passed=False,
+    )
+
+    mock___call__ = mocker.patch.object(MockGuardStruct, "__call__")
+    mock___call__.return_value = mock_outcome
+
+    mock_from_dict = mocker.patch("guardrails_api.blueprints.guards.Guard.from_dict")
+    mock_from_dict.return_value = mock_guard
+
+    mock_request = MockRequest(
+        "POST",
+        json={
+            "messages": [{"role":"user", "content":"Hello world!"}],
+        },
+        headers={"x-openai-api-key": "mock-key"},
+    )
+
+    mocker.patch("flask.Blueprint", new=MockBlueprint)
+    mocker.patch("guardrails_api.blueprints.guards.request", mock_request)
+    mock_get_guard = mocker.patch(
+        "guardrails_api.blueprints.guards.guard_client.get_guard",
+        return_value=mock_guard,
+    )
+    mocker.patch(
+        "guardrails_api.blueprints.guards.get_llm_callable",
+        return_value="openai.Completion.create",
+    )
+
+    mocker.patch("guardrails_api.blueprints.guards.CacheClient.set")
+
+    mock_status = mocker.patch(
+        "guardrails.classes.history.call.Call.status", new_callable=PropertyMock
+    )
+    mock_status.return_value = "fail"
+    mock_call = Call()
+    mock_call.iterations= Stack(Iteration('some-id', 1))
+    mock_guard.history = Stack(mock_call)
+
+    response = openai_v1_chat_completions("My%20Guard's%20Name")
+
+    mock_get_guard.assert_called_once_with("My Guard's Name")
+
+    assert mock___call__.call_count == 1
+
+    mock___call__.assert_called_once_with(
+        num_reasks=0,
+        messages=[{"role":"user", "content":"Hello world!"}],
+    )
+
+    assert response == {
+        "choices": [
+            {
+                "message": {
+                    "content": "Hello world!",
+                },
+            }
+        ],
+        "guardrails": {
+            "reask": None,
+            "validation_passed": False,
+            "error": None,
+        },
+    }
+
+    del os.environ["PGHOST"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload
		gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance
zsimjee marked this conversation as resolved. Show resolved Hide resolved