Skip to content

Commit e44f70d

Browse files
Python: bug fixes from graphrag insights (#10270)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Based on insights from the graphrag team, several bug fixes related to data and model validators. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> For all cases of model_validator in before mode, there is no guarantee that the data passed is a dict, especially when a model is used as a field in another model, then a string is passed to the before validator. So in those cases we check before doing logic, otherwise just pass it on and let Pydantic handle the invalid data. Some small logic fixes in the vector store model decorator: - updated docstring with the right exceptions - additional checks on attributes - additional checks on VectorFields with tests ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
1 parent 0cd59f1 commit e44f70d

File tree

20 files changed

+240
-131
lines changed

20 files changed

+240
-131
lines changed

python/.vscode/tasks.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,14 @@
135135
{
136136
"label": "Python: Tests - Code Coverage",
137137
"type": "shell",
138-
"command": "uv run pytest --cov=semantic_kernel --cov-report=term-missing:skip-covered tests/unit/",
138+
"command": "uv",
139+
"args": [
140+
"run",
141+
"pytest",
142+
"--cov=semantic_kernel",
143+
"--cov-report=term-missing:skip-covered",
144+
"tests/unit/"
145+
],
139146
"group": "test",
140147
"presentation": {
141148
"reveal": "always",

python/semantic_kernel/agents/group_chat/broadcast_queue.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import asyncio
55
from collections import deque
66
from dataclasses import dataclass, field
7+
from typing import Any
78

89
from pydantic import Field, SkipValidation, ValidationError, model_validator
910

@@ -28,11 +29,12 @@ def is_empty(self):
2829
return len(self.queue) == 0
2930

3031
@model_validator(mode="before")
31-
def validate_receive_task(cls, values):
32+
def validate_receive_task(cls, values: Any):
3233
"""Validate the receive task."""
33-
receive_task = values.get("receive_task")
34-
if receive_task is not None and not isinstance(receive_task, asyncio.Task):
35-
raise ValidationError("receive_task must be an instance of asyncio.Task or None")
34+
if isinstance(values, dict):
35+
receive_task = values.get("receive_task")
36+
if receive_task is not None and not isinstance(receive_task, asyncio.Task):
37+
raise ValidationError("receive_task must be an instance of asyncio.Task or None")
3638
return values
3739

3840

python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ def validate_function_call(cls, v: str | list[dict[str, Any]] | None = None):
121121
return v
122122

123123
@model_validator(mode="before")
124-
def validate_response_format_and_set_flag(cls, values) -> Any:
124+
def validate_response_format_and_set_flag(cls, values: Any) -> Any:
125125
"""Validate the response_format and set structured_json_response accordingly."""
126+
if not isinstance(values, dict):
127+
return values
126128
response_format = values.get("response_format", None)
127129

128130
if response_format is None:
@@ -153,7 +155,7 @@ def validate_response_format_and_set_flag(cls, values) -> Any:
153155

154156
@model_validator(mode="before")
155157
@classmethod
156-
def validate_function_calling_behaviors(cls, data) -> Any:
158+
def validate_function_calling_behaviors(cls, data: Any) -> Any:
157159
"""Check if function_call_behavior is set and if so, move to use function_choice_behavior instead."""
158160
# In an attempt to phase out the use of `function_call_behavior` in favor of `function_choice_behavior`,
159161
# we are syncing the `function_call_behavior` with `function_choice_behavior` if the former is set.

python/semantic_kernel/connectors/ai/prompt_execution_settings.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ class PromptExecutionSettings(KernelBaseModel):
3838

3939
@model_validator(mode="before")
4040
@classmethod
41-
def parse_function_choice_behavior(cls: type[_T], data: dict[str, Any]) -> dict[str, Any]:
41+
def parse_function_choice_behavior(cls: type[_T], data: Any) -> dict[str, Any]:
4242
"""Parse the function choice behavior data."""
43-
function_choice_behavior_data = data.get("function_choice_behavior")
44-
if function_choice_behavior_data:
45-
if isinstance(function_choice_behavior_data, str):
46-
data["function_choice_behavior"] = FunctionChoiceBehavior.from_string(function_choice_behavior_data)
47-
elif isinstance(function_choice_behavior_data, dict):
48-
data["function_choice_behavior"] = FunctionChoiceBehavior.from_dict(function_choice_behavior_data)
43+
if isinstance(data, dict):
44+
function_choice_behavior_data = data.get("function_choice_behavior")
45+
if function_choice_behavior_data:
46+
if isinstance(function_choice_behavior_data, str):
47+
data["function_choice_behavior"] = FunctionChoiceBehavior.from_string(function_choice_behavior_data)
48+
elif isinstance(function_choice_behavior_data, dict):
49+
data["function_choice_behavior"] = FunctionChoiceBehavior.from_dict(function_choice_behavior_data)
4950
return data
5051

5152
def __init__(self, service_id: str | None = None, **kwargs: Any):

python/semantic_kernel/connectors/memory/qdrant/qdrant_settings.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,15 @@ class QdrantSettings(KernelBaseSettings):
2626
prefer_grpc: bool = False
2727

2828
@model_validator(mode="before")
29-
def validate_settings(cls, values):
29+
def validate_settings(cls, values: dict):
3030
"""Validate the settings."""
31-
if "url" not in values and "host" not in values and "path" not in values and "location" not in values:
31+
if (
32+
isinstance(values, dict)
33+
and "url" not in values
34+
and "host" not in values
35+
and "path" not in values
36+
and "location" not in values
37+
):
3238
values["location"] = IN_MEMORY_STRING
3339
return values
3440

python/semantic_kernel/connectors/memory/weaviate/weaviate_settings.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,25 @@ class WeaviateSettings(KernelBaseSettings):
3939

4040
@model_validator(mode="before")
4141
@classmethod
42-
def validate_settings(cls, data: dict[str, Any]) -> dict[str, Any]:
42+
def validate_settings(cls, data: Any) -> dict[str, Any]:
4343
"""Validate Weaviate settings."""
44-
enabled = sum([
45-
cls.is_using_weaviate_cloud(data),
46-
cls.is_using_local_weaviate(data),
47-
cls.is_using_client_embedding(data),
48-
])
49-
50-
if enabled == 0:
51-
raise ServiceInvalidExecutionSettingsError(
52-
"Weaviate settings must specify either a ",
53-
"Weaviate Cloud instance, a local Weaviate instance, or the client embedding options.",
54-
)
55-
if enabled > 1:
56-
raise ServiceInvalidExecutionSettingsError(
57-
"Weaviate settings must specify only one of the following: ",
58-
"Weaviate Cloud instance, a local Weaviate instance, or the client embedding options.",
59-
)
44+
if isinstance(data, dict):
45+
enabled = sum([
46+
cls.is_using_weaviate_cloud(data),
47+
cls.is_using_local_weaviate(data),
48+
cls.is_using_client_embedding(data),
49+
])
50+
51+
if enabled == 0:
52+
raise ServiceInvalidExecutionSettingsError(
53+
"Weaviate settings must specify either a ",
54+
"Weaviate Cloud instance, a local Weaviate instance, or the client embedding options.",
55+
)
56+
if enabled > 1:
57+
raise ServiceInvalidExecutionSettingsError(
58+
"Weaviate settings must specify only one of the following: ",
59+
"Weaviate Cloud instance, a local Weaviate instance, or the client embedding options.",
60+
)
6061

6162
return data
6263

python/semantic_kernel/contents/utils/data_uri.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ def update_data(self, value: str | bytes):
4141

4242
@model_validator(mode="before")
4343
@classmethod
44-
def _validate_data(cls, values: dict[str, Any]) -> dict[str, Any]:
44+
def _validate_data(cls, values: Any) -> dict[str, Any]:
4545
"""Validate the data."""
46-
if not values.get("data_bytes") and not values.get("data_str"):
46+
if isinstance(values, dict) and not values.get("data_bytes") and not values.get("data_str"):
4747
raise ContentInitializationError("Either data_bytes or data_str must be provided.")
4848
return values
4949

python/semantic_kernel/data/filter_clauses/any_tags_equal_to_filter_clause.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33

4-
from pydantic import Field
4+
from typing import ClassVar
55

66
from semantic_kernel.data.filter_clauses.filter_clause_base import FilterClauseBase
77
from semantic_kernel.utils.experimental_decorator import experimental_class
@@ -16,4 +16,4 @@ class AnyTagsEqualTo(FilterClauseBase):
1616
value: The value to compare against the list of tags.
1717
"""
1818

19-
filter_clause_type: str = Field("any_tags_equal_to", init=False) # type: ignore
19+
filter_clause_type: ClassVar[str] = "any_tags_equal_to"

python/semantic_kernel/data/filter_clauses/equal_to_filter_clause.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

3-
from pydantic import Field
3+
from typing import ClassVar
44

55
from semantic_kernel.data.filter_clauses.filter_clause_base import FilterClauseBase
66
from semantic_kernel.utils.experimental_decorator import experimental_class
@@ -16,4 +16,4 @@ class EqualTo(FilterClauseBase):
1616
1717
"""
1818

19-
filter_clause_type: str = Field("equal_to", init=False) # type: ignore
19+
filter_clause_type: ClassVar[str] = "equal_to"

python/semantic_kernel/data/filter_clauses/filter_clause_base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22

33

44
from abc import ABC
5-
from typing import Any
6-
7-
from pydantic import Field
5+
from typing import Any, ClassVar
86

97
from semantic_kernel.kernel_pydantic import KernelBaseModel
108
from semantic_kernel.utils.experimental_decorator import experimental_class
@@ -14,6 +12,10 @@
1412
class FilterClauseBase(ABC, KernelBaseModel):
1513
"""A base for all filter clauses."""
1614

17-
filter_clause_type: str = Field("FilterClauseBase", init=False) # type: ignore
15+
filter_clause_type: ClassVar[str] = "FilterClauseBase"
1816
field_name: str
1917
value: Any
18+
19+
def __str__(self) -> str:
20+
"""Return a string representation of the filter clause."""
21+
return f"filter_clause_type='{self.filter_clause_type}' field_name='{self.field_name}' value='{self.value}'"

0 commit comments

Comments
 (0)