Skip to content

Commit a10ab5e

Browse files
committed
[Store] Add Retriever as the opposite of Indexer
The Retriever is a higher-level abstraction for searching documents in a vector store. While the Indexer loads, transforms, vectorizes and stores documents, the Retriever vectorizes a query and retrieves similar documents from the store. - Add RetrieverInterface and Retriever class - Add AI Bundle configuration for retrievers - Add documentation for retrieving documents - Add examples for basic retriever usage
1 parent 56689f0 commit a10ab5e

File tree

10 files changed

+523
-2
lines changed

10 files changed

+523
-2
lines changed

docs/components/store.rst

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,78 @@ used vector store::
3333
$document = new TextDocument('This is a sample document.');
3434
$indexer->index($document);
3535

36-
You can find more advanced usage in combination with an Agent using the store for RAG in the examples folder:
36+
You can find more advanced usage in combination with an Agent using the store for RAG in the examples folder.
37+
38+
Retrieving
39+
----------
40+
41+
The opposite of indexing is retrieving. The :class:`Symfony\\AI\\Store\\Retriever` is a higher level feature that allows you to
42+
search for documents in a store based on a query string. It vectorizes the query and retrieves similar documents from the store::
43+
44+
use Symfony\AI\Store\Retriever;
45+
46+
$retriever = new Retriever($vectorizer, $store);
47+
$documents = $retriever->retrieve('What is the capital of France?');
48+
49+
foreach ($documents as $document) {
50+
echo $document->metadata->get('source');
51+
}
52+
53+
The retriever accepts optional parameters to customize the retrieval:
54+
55+
* ``$options``: An array of options to pass to the underlying store query (e.g., limit, filters)
56+
57+
Symfony Integration
58+
~~~~~~~~~~~~~~~~~~~
59+
60+
When using the ``AiBundle``, you can configure retrievers in your configuration:
61+
62+
.. code-block:: yaml
63+
64+
# config/packages/ai.yaml
65+
ai:
66+
# First configure a vectorizer
67+
vectorizer:
68+
default:
69+
platform: Symfony\AI\Platform\PlatformInterface
70+
model: 'text-embedding-3-small'
71+
72+
# Then configure a retriever using the vectorizer and store
73+
retriever:
74+
default:
75+
vectorizer: ai.vectorizer.default
76+
store: Symfony\AI\Store\StoreInterface
77+
78+
The retriever can then be injected into your services using the ``RetrieverInterface``::
79+
80+
use Symfony\AI\Store\RetrieverInterface;
81+
82+
class MyService
83+
{
84+
public function __construct(
85+
private RetrieverInterface $retriever,
86+
) {
87+
}
88+
89+
public function search(string $query): array
90+
{
91+
$documents = [];
92+
foreach ($this->retriever->retrieve($query) as $document) {
93+
$documents[] = $document;
94+
}
95+
96+
return $documents;
97+
}
98+
}
99+
100+
Example Usage
101+
~~~~~~~~~~~~~
102+
103+
* `Basic Retriever Example`_
104+
* `Retriever with Options Example`_
105+
106+
Similarity Search Examples
107+
~~~~~~~~~~~~~~~~~~~~~~~~~~
37108

38109
* `Similarity Search with Cloudflare (RAG)`_
39110
* `Similarity Search with Manticore (RAG)`_
@@ -129,6 +200,8 @@ This leads to a store implementing two methods::
129200
}
130201

131202
.. _`Retrieval Augmented Generation`: https://en.wikipedia.org/wiki/Retrieval-augmented_generation
203+
.. _`Basic Retriever Example`: https://github.com/symfony/ai/blob/main/examples/retriever/basic.php
204+
.. _`Retriever with Options Example`: https://github.com/symfony/ai/blob/main/examples/retriever/with-options.php
132205
.. _`Similarity Search with Cloudflare (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/cloudflare.php
133206
.. _`Similarity Search with Manticore (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/manticore.php
134207
.. _`Similarity Search with MariaDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/rag/mariadb-gemini.php

examples/retriever/basic.php

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
13+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
14+
use Symfony\AI\Store\Document\Loader\TextFileLoader;
15+
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
16+
use Symfony\AI\Store\Document\Vectorizer;
17+
use Symfony\AI\Store\Indexer;
18+
use Symfony\AI\Store\Retriever;
19+
20+
require_once dirname(__DIR__).'/bootstrap.php';
21+
22+
// initialize the store
23+
$store = new InMemoryStore();
24+
25+
// create platform and vectorizer
26+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
27+
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small');
28+
29+
// create indexer - responsible for loading, transforming, vectorizing and storing documents
30+
$indexer = new Indexer(
31+
loader: new TextFileLoader(),
32+
vectorizer: $vectorizer,
33+
store: $store,
34+
source: [
35+
dirname(__DIR__, 2).'/fixtures/movies/gladiator.md',
36+
dirname(__DIR__, 2).'/fixtures/movies/inception.md',
37+
dirname(__DIR__, 2).'/fixtures/movies/jurassic-park.md',
38+
],
39+
transformers: [
40+
new TextSplitTransformer(chunkSize: 500, overlap: 100),
41+
],
42+
);
43+
44+
// index the documents
45+
$indexer->index();
46+
47+
// create retriever - the opposite of the indexer
48+
// responsible for vectorizing a query and retrieving similar documents
49+
$retriever = new Retriever(
50+
vectorizer: $vectorizer,
51+
store: $store,
52+
);
53+
54+
// retrieve documents similar to the query
55+
echo "Searching for: 'Roman gladiator revenge'\n\n";
56+
$results = $retriever->retrieve('Roman gladiator revenge');
57+
58+
foreach ($results as $i => $document) {
59+
echo sprintf("%d. Document ID: %s\n", $i + 1, $document->id);
60+
echo sprintf(" Score: %s\n", $document->score ?? 'n/a');
61+
echo sprintf(" Source: %s\n\n", $document->metadata->get('source') ?? 'unknown');
62+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Fixtures\Movies;
13+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
14+
use Symfony\AI\Store\Bridge\Local\InMemoryStore;
15+
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
16+
use Symfony\AI\Store\Document\Metadata;
17+
use Symfony\AI\Store\Document\TextDocument;
18+
use Symfony\AI\Store\Document\Vectorizer;
19+
use Symfony\AI\Store\Indexer;
20+
use Symfony\AI\Store\Retriever;
21+
use Symfony\Component\Uid\Uuid;
22+
23+
require_once dirname(__DIR__).'/bootstrap.php';
24+
25+
// initialize the store
26+
$store = new InMemoryStore();
27+
28+
// create movie documents
29+
$documents = [];
30+
foreach (Movies::all() as $movie) {
31+
$documents[] = new TextDocument(
32+
id: Uuid::v4(),
33+
content: 'Title: '.$movie['title'].PHP_EOL.'Director: '.$movie['director'].PHP_EOL.'Description: '.$movie['description'],
34+
metadata: new Metadata($movie),
35+
);
36+
}
37+
38+
// create platform and vectorizer
39+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
40+
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
41+
42+
// index documents
43+
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
44+
$indexer->index();
45+
46+
// create retriever
47+
$retriever = new Retriever($vectorizer, $store, logger());
48+
49+
// retrieve documents with options (store-specific)
50+
// For InMemoryStore, you can pass options like 'limit' or 'threshold'
51+
echo "Searching for movies about 'crime family mafia'\n";
52+
echo "================================================\n\n";
53+
54+
$results = $retriever->retrieve('crime family mafia');
55+
56+
foreach ($results as $i => $document) {
57+
$title = $document->metadata->get('title');
58+
$director = $document->metadata->get('director');
59+
$score = $document->score;
60+
61+
echo sprintf("%d. %s (Director: %s)\n", $i + 1, $title, $director);
62+
echo sprintf(" Score: %.4f\n\n", $score ?? 0);
63+
}

src/ai-bundle/config/options.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,22 @@
10891089
->end()
10901090
->end()
10911091
->end()
1092+
->arrayNode('retriever')
1093+
->info('Retrievers for fetching documents from a vector store based on a query')
1094+
->useAttributeAsKey('name')
1095+
->arrayPrototype()
1096+
->children()
1097+
->scalarNode('vectorizer')
1098+
->info('Service name of vectorizer')
1099+
->defaultValue(VectorizerInterface::class)
1100+
->end()
1101+
->stringNode('store')
1102+
->info('Service name of store')
1103+
->defaultValue(StoreInterface::class)
1104+
->end()
1105+
->end()
1106+
->end()
1107+
->end()
10921108
->end()
10931109
->validate()
10941110
->ifTrue(function ($v) {

src/ai-bundle/src/AiBundle.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@
104104
use Symfony\AI\Store\Indexer;
105105
use Symfony\AI\Store\IndexerInterface;
106106
use Symfony\AI\Store\ManagedStoreInterface;
107+
use Symfony\AI\Store\Retriever;
108+
use Symfony\AI\Store\RetrieverInterface;
107109
use Symfony\AI\Store\StoreInterface;
108110
use Symfony\Component\Clock\ClockInterface;
109111
use Symfony\Component\Config\Definition\Configurator\DefinitionConfigurator;
@@ -262,6 +264,13 @@ public function loadExtension(array $config, ContainerConfigurator $container, C
262264
$builder->setAlias(IndexerInterface::class, 'ai.indexer.'.$indexerName);
263265
}
264266

267+
foreach ($config['retriever'] ?? [] as $retrieverName => $retriever) {
268+
$this->processRetrieverConfig($retrieverName, $retriever, $builder);
269+
}
270+
if (1 === \count($config['retriever'] ?? []) && isset($retrieverName)) {
271+
$builder->setAlias(RetrieverInterface::class, 'ai.retriever.'.$retrieverName);
272+
}
273+
265274
$builder->registerAttributeForAutoconfiguration(AsTool::class, static function (ChildDefinition $definition, AsTool $attribute): void {
266275
$definition->addTag('ai.tool', [
267276
'name' => $attribute->name,
@@ -1866,6 +1875,23 @@ private function processIndexerConfig(int|string $name, array $config, Container
18661875
$container->registerAliasForArgument($serviceId, IndexerInterface::class, (new Target((string) $name))->getParsedName());
18671876
}
18681877

1878+
/**
1879+
* @param array<string, mixed> $config
1880+
*/
1881+
private function processRetrieverConfig(int|string $name, array $config, ContainerBuilder $container): void
1882+
{
1883+
$definition = new Definition(Retriever::class, [
1884+
new Reference($config['vectorizer']),
1885+
new Reference($config['store']),
1886+
new Reference('logger', ContainerInterface::IGNORE_ON_INVALID_REFERENCE),
1887+
]);
1888+
$definition->addTag('ai.retriever', ['name' => $name]);
1889+
1890+
$serviceId = 'ai.retriever.'.$name;
1891+
$container->setDefinition($serviceId, $definition);
1892+
$container->registerAliasForArgument($serviceId, RetrieverInterface::class, (new Target((string) $name))->getParsedName());
1893+
}
1894+
18691895
/**
18701896
* @param array<string, mixed> $config
18711897
*/

src/ai-bundle/tests/DependencyInjection/AiBundleTest.php

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
use Symfony\AI\Store\Document\VectorizerInterface;
4949
use Symfony\AI\Store\IndexerInterface;
5050
use Symfony\AI\Store\ManagedStoreInterface;
51+
use Symfony\AI\Store\RetrieverInterface;
5152
use Symfony\AI\Store\StoreInterface;
5253
use Symfony\Component\Clock\ClockInterface;
5354
use Symfony\Component\Config\Definition\Exception\InvalidConfigurationException;
@@ -3293,6 +3294,94 @@ public function testInjectionIndexerAliasIsRegistered()
32933294
$this->assertTrue($container->hasAlias(IndexerInterface::class.' $another'));
32943295
}
32953296

3297+
public function testRetrieverWithConfiguredVectorizer()
3298+
{
3299+
$container = $this->buildContainer([
3300+
'ai' => [
3301+
'store' => [
3302+
'memory' => [
3303+
'my_store' => [],
3304+
],
3305+
],
3306+
'vectorizer' => [
3307+
'my_vectorizer' => [
3308+
'platform' => 'my_platform_service_id',
3309+
'model' => 'text-embedding-3-small',
3310+
],
3311+
],
3312+
'retriever' => [
3313+
'my_retriever' => [
3314+
'vectorizer' => 'ai.vectorizer.my_vectorizer',
3315+
'store' => 'ai.store.memory.my_store',
3316+
],
3317+
],
3318+
],
3319+
]);
3320+
3321+
$this->assertTrue($container->hasDefinition('ai.retriever.my_retriever'));
3322+
$this->assertTrue($container->hasDefinition('ai.vectorizer.my_vectorizer'));
3323+
3324+
$retrieverDefinition = $container->getDefinition('ai.retriever.my_retriever');
3325+
$arguments = $retrieverDefinition->getArguments();
3326+
3327+
$this->assertInstanceOf(Reference::class, $arguments[0]);
3328+
$this->assertSame('ai.vectorizer.my_vectorizer', (string) $arguments[0]);
3329+
3330+
$this->assertInstanceOf(Reference::class, $arguments[1]);
3331+
$this->assertSame('ai.store.memory.my_store', (string) $arguments[1]);
3332+
3333+
$this->assertInstanceOf(Reference::class, $arguments[2]); // logger
3334+
$this->assertSame('logger', (string) $arguments[2]);
3335+
}
3336+
3337+
public function testRetrieverAliasIsRegistered()
3338+
{
3339+
$container = $this->buildContainer([
3340+
'ai' => [
3341+
'store' => [
3342+
'memory' => [
3343+
'my_store' => [],
3344+
],
3345+
],
3346+
'retriever' => [
3347+
'my_retriever' => [
3348+
'vectorizer' => 'my_vectorizer_service',
3349+
'store' => 'ai.store.memory.my_store',
3350+
],
3351+
'another' => [
3352+
'vectorizer' => 'my_vectorizer_service',
3353+
'store' => 'ai.store.memory.my_store',
3354+
],
3355+
],
3356+
],
3357+
]);
3358+
3359+
$this->assertTrue($container->hasAlias(RetrieverInterface::class.' $myRetriever'));
3360+
$this->assertTrue($container->hasAlias(RetrieverInterface::class.' $another'));
3361+
}
3362+
3363+
public function testSingleRetrieverCreatesDefaultAlias()
3364+
{
3365+
$container = $this->buildContainer([
3366+
'ai' => [
3367+
'store' => [
3368+
'memory' => [
3369+
'my_store' => [],
3370+
],
3371+
],
3372+
'retriever' => [
3373+
'default' => [
3374+
'vectorizer' => 'my_vectorizer_service',
3375+
'store' => 'ai.store.memory.my_store',
3376+
],
3377+
],
3378+
],
3379+
]);
3380+
3381+
$this->assertTrue($container->hasDefinition('ai.retriever.default'));
3382+
$this->assertTrue($container->hasAlias(RetrieverInterface::class));
3383+
}
3384+
32963385
public function testValidMultiAgentConfiguration()
32973386
{
32983387
$container = $this->buildContainer([

0 commit comments

Comments
 (0)