Skip to content

Commit 7978d23

Browse files
Thoughtseize1riathakkar
authored andcommitted
refactor: (GenAI) Reorganized Rag Samples (Group С) (GoogleCloudPlatform#12612)
* New Samples for Rag folder * Try to run test with created test_file.txt * Returned pytest.skip mark * Added type annotations for Python 3.8
1 parent 1e21c50 commit 7978d23

14 files changed

+946
-0
lines changed
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from typing import Optional
17+
18+
from google.cloud.aiplatform_v1beta1 import RagCorpus
19+
20+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
21+
22+
23+
def create_corpus(
24+
display_name: Optional[str] = None,
25+
description: Optional[str] = None,
26+
) -> RagCorpus:
27+
# [START generativeaionvertexai_rag_create_corpus]
28+
29+
from vertexai.preview import rag
30+
import vertexai
31+
32+
# TODO(developer): Update and un-comment below lines
33+
# PROJECT_ID = "your-project-id"
34+
# display_name = "test_corpus"
35+
# description = "Corpus Description"
36+
37+
# Initialize Vertex AI API once per session
38+
vertexai.init(project=PROJECT_ID, location="us-central1")
39+
40+
# Configure embedding model
41+
embedding_model_config = rag.EmbeddingModelConfig(
42+
publisher_model="publishers/google/models/text-embedding-004"
43+
)
44+
45+
corpus = rag.create_corpus(
46+
display_name=display_name,
47+
description=description,
48+
embedding_model_config=embedding_model_config,
49+
)
50+
print(corpus)
51+
# Example response:
52+
# RagCorpus(name='projects/1234567890/locations/us-central1/ragCorpora/1234567890',
53+
# display_name='test_corpus', description='Corpus Description', embedding_model_config=...
54+
# ...
55+
56+
# [END generativeaionvertexai_rag_create_corpus]
57+
return corpus
58+
59+
60+
if __name__ == "__main__":
61+
create_corpus(display_name="test_corpus", description="Corpus Description")
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
18+
19+
20+
def delete_corpus(corpus_name: str) -> None:
21+
# [START generativeaionvertexai_rag_delete_corpus]
22+
23+
from vertexai.preview import rag
24+
import vertexai
25+
26+
# TODO(developer): Update and un-comment below lines
27+
# PROJECT_ID = "your-project-id"
28+
# corpus_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
29+
30+
# Initialize Vertex AI API once per session
31+
vertexai.init(project=PROJECT_ID, location="us-central1")
32+
33+
rag.delete_corpus(name=corpus_name)
34+
print(f"Corpus {corpus_name} deleted.")
35+
# Example response:
36+
# Successfully deleted the RagCorpus.
37+
# Corpus projects/[PROJECT_ID]/locations/us-central1/ragCorpora/123456789012345 deleted.
38+
39+
# [END generativeaionvertexai_rag_delete_corpus]
40+
41+
42+
if __name__ == "__main__":
43+
delete_corpus(
44+
"projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
45+
)
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
18+
19+
20+
def delete_file(file_name: str) -> None:
21+
# [START generativeaionvertexai_rag_delete_file]
22+
23+
from vertexai.preview import rag
24+
import vertexai
25+
26+
# TODO(developer): Update and un-comment below lines
27+
# PROJECT_ID = "your-project-id"
28+
# file_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}"
29+
30+
# Initialize Vertex AI API once per session
31+
vertexai.init(project=PROJECT_ID, location="us-central1")
32+
33+
rag.delete_file(name=file_name)
34+
print(f"File {file_name} deleted.")
35+
# Example response:
36+
# Successfully deleted the RagFile.
37+
# File projects/1234567890/locations/us-central1/ragCorpora/1111111111/ragFiles/2222222222 deleted.
38+
39+
# [END generativeaionvertexai_rag_delete_file]
40+
41+
42+
if __name__ == "__main__":
43+
delete_file(
44+
"projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}"
45+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from vertexai.generative_models import GenerationResponse
18+
19+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
20+
21+
22+
def generate_content_with_rag(
23+
corpus_name: str,
24+
) -> GenerationResponse:
25+
# [START generativeaionvertexai_rag_generate_content]
26+
27+
from vertexai.preview import rag
28+
from vertexai.preview.generative_models import GenerativeModel, Tool
29+
import vertexai
30+
31+
# TODO(developer): Update and un-comment below lines
32+
# PROJECT_ID = "your-project-id"
33+
# corpus_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
34+
35+
# Initialize Vertex AI API once per session
36+
vertexai.init(project=PROJECT_ID, location="us-central1")
37+
38+
rag_retrieval_tool = Tool.from_retrieval(
39+
retrieval=rag.Retrieval(
40+
source=rag.VertexRagStore(
41+
rag_resources=[
42+
rag.RagResource(
43+
rag_corpus=corpus_name,
44+
# Optional: supply IDs from `rag.list_files()`.
45+
# rag_file_ids=["rag-file-1", "rag-file-2", ...],
46+
)
47+
],
48+
similarity_top_k=3, # Optional
49+
vector_distance_threshold=0.5, # Optional
50+
),
51+
)
52+
)
53+
54+
rag_model = GenerativeModel(
55+
model_name="gemini-1.5-flash-001", tools=[rag_retrieval_tool]
56+
)
57+
response = rag_model.generate_content("Why is the sky blue?")
58+
print(response.text)
59+
# Example response:
60+
# The sky appears blue due to a phenomenon called Rayleigh scattering.
61+
# Sunlight, which contains all colors of the rainbow, is scattered
62+
# by the tiny particles in the Earth's atmosphere....
63+
# ...
64+
65+
# [END generativeaionvertexai_rag_generate_content]
66+
67+
return response
68+
69+
70+
if __name__ == "__main__":
71+
generate_content_with_rag(
72+
"projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
73+
)
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from google.cloud.aiplatform_v1beta1 import RagCorpus
18+
19+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
20+
21+
22+
def get_corpus(corpus_name: str) -> RagCorpus:
23+
# [START generativeaionvertexai_rag_get_corpus]
24+
25+
from vertexai.preview import rag
26+
import vertexai
27+
28+
# TODO(developer): Update and un-comment below lines
29+
# PROJECT_ID = "your-project-id"
30+
# corpus_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
31+
32+
# Initialize Vertex AI API once per session
33+
vertexai.init(project=PROJECT_ID, location="us-central1")
34+
35+
corpus = rag.get_corpus(name=corpus_name)
36+
print(corpus)
37+
# Example response:
38+
# RagCorpus(name='projects/[PROJECT_ID]/locations/us-central1/ragCorpora/1234567890',
39+
# display_name='test_corpus', description='Corpus Description',
40+
# ...
41+
42+
# [END generativeaionvertexai_rag_get_corpus]
43+
return corpus
44+
45+
46+
if __name__ == "__main__":
47+
get_corpus(
48+
corpus_name="projects/your-project-id/locations/us-central1/ragCorpora/[rag_corpus_id]"
49+
)

generative_ai/rag/get_file_example.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from google.cloud.aiplatform_v1beta1 import RagFile
18+
19+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
20+
21+
22+
def get_file(file_name: str) -> RagFile:
23+
# [START generativeaionvertexai_rag_get_file]
24+
25+
from vertexai.preview import rag
26+
import vertexai
27+
28+
# TODO(developer): Update and un-comment below lines
29+
# PROJECT_ID = "your-project-id"
30+
# file_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}"
31+
32+
# Initialize Vertex AI API once per session
33+
vertexai.init(project=PROJECT_ID, location="us-central1")
34+
35+
rag_file = rag.get_file(name=file_name)
36+
print(rag_file)
37+
# Example response:
38+
# RagFile(name='projects/1234567890/locations/us-central1/ragCorpora/11111111111/ragFiles/22222222222',
39+
# display_name='file_display_name', description='file description')
40+
41+
# [END generativeaionvertexai_rag_get_file]
42+
43+
return rag_file
44+
45+
46+
if __name__ == "__main__":
47+
get_file(
48+
"projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}"
49+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from typing import List
18+
19+
from google.cloud.aiplatform_v1beta1 import ImportRagFilesResponse
20+
21+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
22+
23+
24+
async def import_files_async(
25+
corpus_name: str,
26+
paths: List[str],
27+
) -> ImportRagFilesResponse:
28+
# [START generativeaionvertexai_rag_import_files_async]
29+
30+
from vertexai.preview import rag
31+
import vertexai
32+
33+
# TODO(developer): Update and un-comment below lines
34+
# PROJECT_ID = "your-project-id"
35+
# corpus_name = "projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}"
36+
37+
# Supports Google Cloud Storage and Google Drive Links
38+
# paths = ["https://drive.google.com/file/d/123", "gs://my_bucket/my_files_dir"]
39+
40+
# Initialize Vertex AI API once per session
41+
vertexai.init(project=PROJECT_ID, location="us-central1")
42+
43+
response = await rag.import_files_async(
44+
corpus_name=corpus_name,
45+
paths=paths,
46+
chunk_size=512, # Optional
47+
chunk_overlap=100, # Optional
48+
max_embedding_requests_per_min=900, # Optional
49+
)
50+
51+
result = await response.result()
52+
print(f"Imported {result.imported_rag_files_count} files.")
53+
# Example response:
54+
# Imported 2 files.
55+
56+
# [END generativeaionvertexai_rag_import_files_async]
57+
return result
58+
59+
60+
if __name__ == "__main__":
61+
import asyncio
62+
63+
gdrive_path = "https://drive.google.com/file/1234567890"
64+
gcloud_path = "gs://your-bucket-name/file.txt"
65+
asyncio.run(
66+
import_files_async(
67+
corpus_name="projects/{PROJECT_ID}/locations/us-central1/ragCorpora/{rag_corpus_id}",
68+
paths=[gdrive_path, gcloud_path],
69+
)
70+
)

0 commit comments

Comments
 (0)