Skip to content

Commit 7a3d781

Browse files
authored
fix: correct outputSchema type mismatches for get_entity, get_lineage, get_data_product (#72)
go-sdk validates structured output against outputSchema via applySchema before serializing to structuredContent. Three schemas declared incorrect types for fields that are objects (not strings) in the actual Go types, causing schema validation to fail and return a JSON-RPC error — displayed as "Error occurred during tool execution" in MCP hosts. Fixes: schemaGetEntity: - owners.items: string → object (types.Owner has urn/name/type fields) - tags.items: string → object (types.Tag has urn/name/description fields) - domain: string → object|null (types.Domain has urn/name/description fields) - deprecated: boolean → deprecation object|null (field is types.Deprecation, not a bool) - Add query_availability and query_examples properties for with-query-provider path - Add platform field present in types.Entity schemaGetLineage.execution_context: - Remove additionalProperties: {type: object} — ExecutionContext has connections ([]string) and source (string) which are not objects; use plain {type: object} instead schemaGetDataProduct: - domain: string → object|null (types.Domain) - owners.items: string → object (types.Owner) - assets.items: object → string ([]string URNs, not typed objects) entity.go (handleGetEntity with query provider): - Stringify query_table via table.String() instead of returning the TableIdentifier struct; schema declares query_table as {type: string} matching the fully-qualified table path Tests (integration_test.go): - TestToolsViaServer_SchemaValidation: exercises all three tools with rich mock data (populated owners, tags, domain fields) through the real MCP server. Covers both the no-query-provider and with-query-provider code paths including non-nil ResolveTable, GetTableAvailability, GetQueryExamples, and GetExecutionContext returns. A non-nil CallTool error means go-sdk schema validation failed. Fixes #72
1 parent 2854166 commit 7a3d781

3 files changed

Lines changed: 207 additions & 13 deletions

File tree

pkg/tools/entity.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ func (t *Toolkit) handleGetEntity(ctx context.Context, _ *mcp.CallToolRequest, i
6363
"entity": entity,
6464
}
6565

66-
// Add table resolution
66+
// Add table resolution as a fully-qualified string (matches outputSchema type: string).
6767
if table, tableErr := t.queryProvider.ResolveTable(ctx, input.URN); tableErr == nil && table != nil {
68-
response["query_table"] = table
68+
response["query_table"] = table.String()
6969
}
7070

7171
// Add query examples

pkg/tools/integration_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"github.com/modelcontextprotocol/go-sdk/mcp"
88

99
"github.com/txn2/mcp-datahub/pkg/client"
10+
"github.com/txn2/mcp-datahub/pkg/integration"
1011
"github.com/txn2/mcp-datahub/pkg/types"
1112
)
1213

@@ -133,6 +134,151 @@ func TestToolsViaServer(t *testing.T) {
133134
}
134135
}
135136

137+
// richQueryProvider is a query provider that returns non-nil values for all methods,
138+
// exercising the with-query-provider code paths in entity, lineage, and schema handlers.
139+
type richQueryProvider struct{}
140+
141+
func (r *richQueryProvider) Name() string { return "rich-mock" }
142+
func (r *richQueryProvider) ResolveTable(_ context.Context, urn string) (*integration.TableIdentifier, error) {
143+
return &integration.TableIdentifier{Catalog: "cat", Schema: "sch", Table: "tbl"}, nil
144+
}
145+
func (r *richQueryProvider) GetTableAvailability(_ context.Context, _ string) (*integration.TableAvailability, error) {
146+
avail := true
147+
_ = avail
148+
return &integration.TableAvailability{Available: true, Connection: "default"}, nil
149+
}
150+
func (r *richQueryProvider) GetQueryExamples(_ context.Context, _ string) ([]integration.QueryExample, error) {
151+
return []integration.QueryExample{{Name: "sample", SQL: "SELECT 1"}}, nil
152+
}
153+
func (r *richQueryProvider) GetExecutionContext(_ context.Context, _ []string) (*integration.ExecutionContext, error) {
154+
return &integration.ExecutionContext{
155+
Source: "trino",
156+
Connections: []string{"default"},
157+
Tables: map[string]*integration.TableIdentifier{"urn:li:dataset:test": {Catalog: "cat", Schema: "sch", Table: "tbl"}},
158+
}, nil
159+
}
160+
func (r *richQueryProvider) Close() error { return nil }
161+
162+
// TestToolsViaServer_SchemaValidation exercises the three tools whose outputSchema was
163+
// previously mismatched against their actual return types. It uses rich mock data
164+
// (owners, tags, domain as objects; assets as strings) so that go-sdk's applySchema
165+
// call exercises every type constraint in the schema. A non-nil CallTool error
166+
// means schema validation failed inside go-sdk.
167+
func TestToolsViaServer_SchemaValidation(t *testing.T) {
168+
domain := &types.Domain{URN: "urn:li:domain:finance", Name: "Finance"}
169+
owner := types.Owner{URN: "urn:li:corpuser:alice", Name: "Alice"}
170+
171+
mock := &mockClient{
172+
getEntityFunc: func(_ context.Context, urn string) (*types.Entity, error) {
173+
return &types.Entity{
174+
URN: urn,
175+
Name: "Rich Entity",
176+
Type: "dataset",
177+
Owners: []types.Owner{owner},
178+
Tags: []types.Tag{{URN: "urn:li:tag:PII", Name: "PII"}},
179+
Domain: domain,
180+
}, nil
181+
},
182+
getLineageFunc: func(_ context.Context, urn string, _ ...client.LineageOption) (*types.LineageResult, error) {
183+
return &types.LineageResult{
184+
Start: urn,
185+
Direction: "DOWNSTREAM",
186+
Nodes: []types.LineageNode{{URN: "urn:li:dataset:up", Name: "upstream", Type: "dataset"}},
187+
}, nil
188+
},
189+
getDataProductFunc: func(_ context.Context, urn string) (*types.DataProduct, error) {
190+
return &types.DataProduct{
191+
URN: urn,
192+
Name: "Rich Product",
193+
Domain: domain,
194+
Owners: []types.Owner{owner},
195+
Assets: []string{"urn:li:dataset:a", "urn:li:dataset:b"},
196+
}, nil
197+
},
198+
getSchemaFunc: func(_ context.Context, _ string) (*types.SchemaMetadata, error) {
199+
return &types.SchemaMetadata{Name: "schema", Fields: []types.SchemaField{{FieldPath: "id", Type: "string"}}}, nil
200+
},
201+
}
202+
203+
// Test without query provider (exercises direct-return code paths).
204+
t.Run("without_query_provider", func(t *testing.T) {
205+
toolkit := NewToolkit(mock, DefaultConfig())
206+
impl := &mcp.Implementation{Name: "test-server", Version: "1.0.0"}
207+
server := mcp.NewServer(impl, nil)
208+
toolkit.RegisterAll(server)
209+
210+
serverTransport, clientTransport := mcp.NewInMemoryTransports()
211+
mcpClient := mcp.NewClient(&mcp.Implementation{Name: "test-client", Version: "1.0.0"}, nil)
212+
go func() { _ = server.Run(context.Background(), serverTransport) }()
213+
session, err := mcpClient.Connect(context.Background(), clientTransport, nil)
214+
if err != nil {
215+
t.Fatalf("connect: %v", err)
216+
}
217+
218+
for _, tc := range []struct {
219+
name string
220+
tool ToolName
221+
args map[string]any
222+
}{
223+
{"get_entity", ToolGetEntity, map[string]any{"urn": "urn:li:dataset:test"}},
224+
{"get_lineage", ToolGetLineage, map[string]any{"urn": "urn:li:dataset:test"}},
225+
{"get_data_product", ToolGetDataProduct, map[string]any{"urn": "urn:li:dataProduct:test"}},
226+
} {
227+
t.Run(tc.name, func(t *testing.T) {
228+
result, callErr := session.CallTool(context.Background(), &mcp.CallToolParams{
229+
Name: string(tc.tool),
230+
Arguments: tc.args,
231+
})
232+
if callErr != nil {
233+
t.Errorf("CallTool(%s) schema validation error: %v", tc.name, callErr)
234+
}
235+
if result == nil {
236+
t.Errorf("CallTool(%s) returned nil result", tc.name)
237+
}
238+
})
239+
}
240+
})
241+
242+
// Test with query provider (exercises enrichment + query_table stringify code paths).
243+
t.Run("with_query_provider", func(t *testing.T) {
244+
toolkit := NewToolkit(mock, DefaultConfig(), WithQueryProvider(&richQueryProvider{}))
245+
impl := &mcp.Implementation{Name: "test-server", Version: "1.0.0"}
246+
server := mcp.NewServer(impl, nil)
247+
toolkit.RegisterAll(server)
248+
249+
serverTransport, clientTransport := mcp.NewInMemoryTransports()
250+
mcpClient := mcp.NewClient(&mcp.Implementation{Name: "test-client", Version: "1.0.0"}, nil)
251+
go func() { _ = server.Run(context.Background(), serverTransport) }()
252+
session, err := mcpClient.Connect(context.Background(), clientTransport, nil)
253+
if err != nil {
254+
t.Fatalf("connect: %v", err)
255+
}
256+
257+
for _, tc := range []struct {
258+
name string
259+
tool ToolName
260+
args map[string]any
261+
}{
262+
{"get_entity_enriched", ToolGetEntity, map[string]any{"urn": "urn:li:dataset:test"}},
263+
{"get_lineage_enriched", ToolGetLineage, map[string]any{"urn": "urn:li:dataset:test"}},
264+
{"get_data_product_enriched", ToolGetDataProduct, map[string]any{"urn": "urn:li:dataProduct:test"}},
265+
} {
266+
t.Run(tc.name, func(t *testing.T) {
267+
result, callErr := session.CallTool(context.Background(), &mcp.CallToolParams{
268+
Name: string(tc.tool),
269+
Arguments: tc.args,
270+
})
271+
if callErr != nil {
272+
t.Errorf("CallTool(%s) schema validation error: %v", tc.name, callErr)
273+
}
274+
if result == nil {
275+
t.Errorf("CallTool(%s) returned nil result", tc.name)
276+
}
277+
})
278+
}
279+
})
280+
}
281+
136282
// TestWriteToolsViaServer tests write tools through the MCP server.
137283
// This covers the register*Tool closures including type assertions.
138284
func TestWriteToolsViaServer(t *testing.T) {

pkg/tools/output_schemas.go

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,49 @@ var schemaGetEntity = json.RawMessage(`{
9494
"name": {"type": "string"},
9595
"type": {"type": "string"},
9696
"description": {"type": "string"},
97-
"owners": {"type": "array", "items": {"type": "string"}},
98-
"tags": {"type": "array", "items": {"type": "string"}},
99-
"domain": {"type": "string"},
100-
"deprecated": {"type": "boolean"},
101-
"query_table": {"type": "string", "description": "Optional: resolved query engine table path"}
97+
"platform": {"type": "string"},
98+
"owners": {
99+
"type": ["array", "null"],
100+
"items": {
101+
"type": "object",
102+
"properties": {
103+
"urn": {"type": "string"},
104+
"name": {"type": "string"},
105+
"type": {"type": "string"}
106+
}
107+
}
108+
},
109+
"tags": {
110+
"type": ["array", "null"],
111+
"items": {
112+
"type": "object",
113+
"properties": {
114+
"urn": {"type": "string"},
115+
"name": {"type": "string"},
116+
"description": {"type": "string"}
117+
}
118+
}
119+
},
120+
"domain": {
121+
"type": ["object", "null"],
122+
"properties": {
123+
"urn": {"type": "string"},
124+
"name": {"type": "string"},
125+
"description": {"type": "string"}
126+
}
127+
},
128+
"deprecation": {
129+
"type": ["object", "null"],
130+
"properties": {
131+
"deprecated": {"type": "boolean"},
132+
"note": {"type": "string"},
133+
"actor": {"type": "string"},
134+
"decommission_time": {"type": "integer"}
135+
}
136+
},
137+
"query_table": {"type": "string", "description": "Optional: fully-qualified query engine table path"},
138+
"query_availability": {"type": "object", "description": "Optional: query engine availability details"},
139+
"query_examples": {"type": "array", "description": "Optional: example SQL queries"}
102140
}
103141
}`)
104142

@@ -140,8 +178,7 @@ var schemaGetLineage = json.RawMessage(`{
140178
},
141179
"execution_context": {
142180
"type": "object",
143-
"description": "Optional: query engine context per entity URN",
144-
"additionalProperties": {"type": "object"}
181+
"description": "Optional: query engine execution context for lineage bridging"
145182
}
146183
}
147184
}`)
@@ -269,10 +306,16 @@ var schemaGetDataProduct = json.RawMessage(`{
269306
"urn": {"type": "string"},
270307
"name": {"type": "string"},
271308
"description": {"type": "string"},
272-
"domain": {"type": "string"},
273-
"owners": {"type": "array", "items": {"type": "string"}},
274-
"assets": {
275-
"type": "array",
309+
"domain": {
310+
"type": ["object", "null"],
311+
"properties": {
312+
"urn": {"type": "string"},
313+
"name": {"type": "string"},
314+
"description": {"type": "string"}
315+
}
316+
},
317+
"owners": {
318+
"type": ["array", "null"],
276319
"items": {
277320
"type": "object",
278321
"properties": {
@@ -281,6 +324,11 @@ var schemaGetDataProduct = json.RawMessage(`{
281324
"type": {"type": "string"}
282325
}
283326
}
327+
},
328+
"assets": {
329+
"type": ["array", "null"],
330+
"description": "URNs of constituent datasets",
331+
"items": {"type": "string"}
284332
}
285333
}
286334
}`)

0 commit comments

Comments
 (0)