Skip to content

Commit 6de0008

Browse files
authored
Upstream changes for v0.5.0 release (#71)
1 parent 478cf96 commit 6de0008

File tree

299 files changed

+36776
-2402
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

299 files changed

+36776
-2402
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text

.github/workflows/docs-build.yaml

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
name: docs-build
2+
3+
on:
4+
pull_request:
5+
branches: [ main, release-* ]
6+
types: [ opened, synchronize ]
7+
8+
push:
9+
branches: [ main ]
10+
tags:
11+
- v*
12+
workflow_dispatch:
13+
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
16+
cancel-in-progress: true
17+
18+
defaults:
19+
run:
20+
shell: bash
21+
22+
jobs:
23+
build-docs:
24+
runs-on: ubuntu-latest
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
28+
- name: Build image
29+
run: |
30+
docker build --pull --tag docs-builder:latest --file docs/Dockerfile .
31+
- name: Build docs
32+
run: |
33+
docker run -v $(pwd):/work -w /work docs-builder:latest sphinx-build -b html -d /tmp docs docs/_build/output
34+
- name: Delete unnecessary files
35+
run: |
36+
sudo rm -rf docs/_build/jupyter_execute
37+
sudo rm -rf docs/_build/.buildinfo
38+
- name: Upload HTML
39+
uses: actions/upload-artifact@v4
40+
with:
41+
name: html-build-artifact
42+
path: docs/_build/
43+
if-no-files-found: error
44+
retention-days: 1
45+
- name: Store PR information
46+
if: ${{ github.event_name == 'pull_request' }}
47+
run: |
48+
mkdir ./pr
49+
echo ${{ github.event.number }} > ./pr/pr.txt
50+
echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt
51+
echo ${{ github.event.action }} > ./pr/action.txt
52+
- name: Upload PR information
53+
if: ${{ github.event_name == 'pull_request' }}
54+
uses: actions/upload-artifact@v4
55+
with:
56+
name: pr
57+
path: pr/
58+
59+
store-html:
60+
needs: [ build-docs ]
61+
if: ${{ github.event_name == 'push' }}
62+
runs-on: ubuntu-latest
63+
steps:
64+
- uses: actions/checkout@v4
65+
with:
66+
ref: "gh-pages"
67+
- name: Initialize Git configuration
68+
run: |
69+
git config user.name docs-build
70+
git config user.email [email protected]
71+
- name: Download artifacts
72+
uses: actions/download-artifact@v4
73+
with:
74+
name: html-build-artifact
75+
- name: Copy HTML directories
76+
run: |
77+
ls -asl
78+
- name: Store bleeding edge docs from main
79+
if: ${{ github.ref == 'refs/heads/main' }}
80+
run: |
81+
mkdir main || true
82+
rsync -av --progress --delete output/ main/
83+
git add main
84+
- name: Store docs for a release tag
85+
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
86+
env:
87+
LATEST: ${{ contains(github.event.head_commit.message, '/not-latest') && 'not-true' || 'true' }}
88+
run: |
89+
printenv LATEST
90+
if [[ "${GITHUB_REF}" =~ "-rc" ]]; then
91+
echo "Not saving documents for release candidates."
92+
exit 0
93+
fi
94+
if [[ "${GITHUB_REF}" =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then
95+
TAG="${BASH_REMATCH[1]}"
96+
mkdir "${TAG}" || true
97+
rsync -av --progress --delete output/ "${TAG}/"
98+
git add "${TAG}/"
99+
if [[ "${LATEST}" == 'true' ]]; then
100+
mkdir latest || true
101+
rsync -av --progress --delete output/ latest/
102+
cp output/versions.json .
103+
git add latest
104+
git add versions.json
105+
fi
106+
fi
107+
- name: Check or create dot-no-jekyll file
108+
run: |
109+
if [ -f ".nojekyll" ]; then
110+
echo "The dot-no-jekyll file already exists."
111+
exit 0
112+
fi
113+
touch .nojekyll
114+
git add .nojekyll
115+
- name: Check or create redirect page
116+
env:
117+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
118+
run: |
119+
resp=$(grep 'http-equiv="refresh"' index.html 2>/dev/null) || true
120+
if [ -n "${resp}" ]; then
121+
echo "The redirect file already exists."
122+
exit 0
123+
fi
124+
# If any of these commands fail, fail the build.
125+
html_url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" --jq ".html_url")
126+
# Beware ugly quotation mark avoidance in the foll lines.
127+
echo '<!DOCTYPE html>' > index.html
128+
echo '<html>' >> index.html
129+
echo ' <head>' >> index.html
130+
echo ' <title>Redirect to documentation</title>' >> index.html
131+
echo ' <meta charset="utf-8">' >> index.html
132+
echo ' <meta http=equiv="refresh" content="3; URL='${html_url}'/latest/index.html">' >> index.html
133+
echo ' <link rel="canonical" href="'${html_url}'/latest/index.html">' >> index.html
134+
echo ' <script language="javascript">' >> index.html
135+
echo ' function redirect() {' >> index.html
136+
echo ' window.location.assign("'${html_url}'/latest/index.html")' >> index.html
137+
echo ' }' >> index.html
138+
echo ' </script>' >> index.html
139+
echo ' </head>' >> index.html
140+
echo ' <body onload="redirect()">' >> index.html
141+
echo ' <p>Please follow the link to the <a href="'${html_url}'/latest/index.html">' >> index.html
142+
echo 'latest</a> documentation.</p>' >> index.html
143+
echo ' </body>' >> index.html
144+
echo '</html>' >> index.html
145+
git add index.html
146+
- name: Commit changes to the GitHub Pages branch
147+
run: |
148+
git status
149+
if git commit -m 'Pushing changes to GitHub Pages.'; then
150+
git push -f
151+
else
152+
echo "Nothing changed."
153+
fi
+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
name: docs-preview-pr
2+
3+
on:
4+
workflow_run:
5+
workflows: [docs-build]
6+
types: [completed]
7+
8+
env:
9+
WF_ID: ${{ github.event.workflow_run.id }}
10+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
11+
12+
jobs:
13+
# Always determine if GitHub Pages are configured for this repo.
14+
get-gh-pages-url:
15+
if:
16+
github.event.workflow_run.event == 'pull_request' &&
17+
github.event.workflow_run.conclusion == 'success'
18+
runs-on: ubuntu-latest
19+
outputs:
20+
url: ${{ steps.api-resp.outputs.html_url || '' }}
21+
branch: ${{ steps.api-resp.outputs.branch || '' }}
22+
steps:
23+
- name: Check for GitHub Pages
24+
id: api-resp
25+
run: |
26+
has_pages=$(gh api "repos/${GITHUB_REPOSITORY}" -q '.has_pages')
27+
if [ "true" != "${has_pages}" ]; then
28+
echo "GitHub pages is not active for the repository. Quitting."
29+
return
30+
fi
31+
32+
url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.html_url')
33+
branch=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.source.branch')
34+
35+
echo "html_url=${url}" >> $GITHUB_OUTPUT
36+
echo "branch=${branch}" >> $GITHUB_OUTPUT
37+
38+
# Identify the dir for the HTML.
39+
store-html:
40+
runs-on: ubuntu-latest
41+
needs: [get-gh-pages-url]
42+
if: needs.get-gh-pages-url.outputs.url != ''
43+
steps:
44+
- uses: actions/checkout@v4
45+
with:
46+
ref: ${{ needs.get-gh-pages-url.outputs.branch }}
47+
- name: Initialize Git configuration
48+
run: |
49+
git config user.name docs-preview
50+
git config user.email [email protected]
51+
- name: Download artifacts
52+
run: |
53+
gh run view "${WF_ID}"
54+
gh run download "${WF_ID}"
55+
PR=$(cat ./pr/pr.txt)
56+
MERGED=$(cat ./pr/merged.txt)
57+
ACTION=$(cat ./pr/action.txt)
58+
echo "PR_NO=${PR}" >> $GITHUB_ENV
59+
echo "MERGE_STATUS=${MERGED}" >> $GITHUB_ENV
60+
echo "PR_ACTION=${ACTION}" >> $GITHUB_ENV
61+
echo "REVIEW_DIR=review/" >> $GITHUB_ENV
62+
echo "PR_REVIEW_DIR=review/pr-${PR}" >> $GITHUB_ENV
63+
64+
# Remove the pr artifact directory so that it does not
65+
# appear in listings or confuse git with untracked files.
66+
rm -rf ./pr
67+
68+
# Permutations:
69+
# - PR was updated, PR_ACTION is !closed, need to delete review directory and update it.
70+
# - PR was closed (regardless of merge), PR_ACTION is closed, need to delete review directory.
71+
72+
# If this PR is still open, store HTML in a review directory.
73+
- name: Handle HTML review directory for open PRs and updates to PRs
74+
if: env.MERGE_STATUS == 'false' && env.PR_ACTION != 'closed'
75+
run: |
76+
rm -rf "${{ env.PR_REVIEW_DIR }}" 2>/dev/null || true
77+
if [ ! -d "${{ env.REVIEW_DIR }}" ]; then
78+
mkdir "${{ env.REVIEW_DIR }}"
79+
fi
80+
mv ./html-build-artifact/latest/ "${{ env.PR_REVIEW_DIR }}"
81+
git add "${{ env.PR_REVIEW_DIR }}"
82+
# If the PR was closed, merged or not, delete review directory.
83+
- name: Delete HTML review directory for closed PRs
84+
if: env.PR_ACTION == 'closed'
85+
run: |
86+
if [ -d ./html-build-artifact/ ]; then
87+
rm -rf ./html-build-artifact/ 2>/dev/null
88+
fi
89+
if [ -d "${{ env.PR_REVIEW_DIR }}" ]; then
90+
git rm -rf "${{ env.PR_REVIEW_DIR }}"
91+
fi
92+
- name: Commit changes to the GitHub Pages branch
93+
run: |
94+
git status
95+
if git commit -m 'Pushing changes to GitHub Pages.'; then
96+
git push -f
97+
else
98+
echo "Nothing changed."
99+
fi
100+
- name: Check for existing documentation review comment
101+
run: |
102+
result=$(gh pr view ${{ env.PR_NO }} --json comments -q 'any(.comments[].body; contains("Documentation preview"))')
103+
echo "COMMENT_EXISTS=${result}" >> $GITHUB_ENV
104+
env:
105+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
106+
- name: Add HTML review URL comment to a newly opened PR
107+
if: env.MERGE_STATUS == 'false' && env.COMMENT_EXISTS == 'false'
108+
env:
109+
URL: ${{ needs.get-gh-pages-url.outputs.url }}
110+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
111+
shell: bash
112+
run: |
113+
echo -e "## Documentation preview" > body
114+
echo -e "" >> body
115+
echo -e "<${{ env.URL }}${{ env.PR_REVIEW_DIR }}>" >> body
116+
cat body
117+
gh pr comment ${{ env.PR_NO }} --body-file body
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
name: docs-remove-stale-reviews
2+
3+
on:
4+
schedule:
5+
# 42 minutes after 0:00 UTC on Sundays
6+
- cron: "42 0 * * 0"
7+
workflow_dispatch:
8+
9+
jobs:
10+
remove:
11+
uses: nvidia-merlin/.github/.github/workflows/docs-remove-stale-reviews-common.yaml@main

.gitignore

+12-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,15 @@ deploy/*.txt
1111

1212
# Docker Compose exclusions
1313
volumes/
14-
uploaded_files/
14+
uploaded_files/
15+
16+
# Visual Studio Code
17+
.vscode
18+
19+
# Node modules
20+
**/node_modules
21+
22+
# File from docs builds
23+
docs/_*
24+
docs/notebooks
25+
docs/experimental

CHANGELOG.md

+43-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,48 @@ All notable changes to this project will be documented in this file.
33

44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6-
## [0.4.0] - 2024-02-22
6+
## [0.5.0] - 2024-03-19
7+
8+
This release adds new dedicated RAG examples showcasing state of the art usecases, switches to the latest [API catalog endpoints from NVIDIA](https://build.nvidia.com/explore/discover) and also refactors the API interface of chain-server. This release also improves the developer experience by adding github pages based documentation and streamlining the example deployment flow using dedicated compose files.
9+
10+
### Added
11+
12+
- Github pages based documentation.
13+
- New examples showcasing
14+
- [Multi-turn RAG](./RetrievalAugmentedGeneration/examples/multi_turn_rag/)
15+
- [Multi-modal RAG](./RetrievalAugmentedGeneration//examples/multimodal_rag/)
16+
- [Structured data CSV RAG](./RetrievalAugmentedGeneration/examples/csv_rag/)
17+
- Support for [delete and list APIs](./docs/api_reference/openapi_schema.json) in chain-server component
18+
- Streamlined RAG example deployment
19+
- Dedicated new [docker compose files](./deploy/compose/) for every examples.
20+
- Dedicated [docker compose files](./deploy/compose/docker-compose-vectordb.yaml) for launching vector DB solutions.
21+
- New configurations to control top k and confidence score of retrieval pipeline.
22+
- Added [a notebook](./models/NeMo/slm/README.md) which covers how to train SLMs with various techniques using NeMo Framework.
23+
- Added more [experimental examples](./experimental/README.md) showcasing new usecases.
24+
- [NVIDIA ORAN chatbot multimodal Assistant](./experimental/oran-chatbot-multimodal/)
25+
- [NVIDIA Retrieval Customization](./experimental/synthetic-data-retriever-customization/)
26+
- [NVIDIA RAG Streaming Document Ingestion Pipeline](./experimental/streaming_ingest_rag/)
27+
- [NVIDIA Live FM Radio ASR RAG](./experimental/fm-asr-streaming-rag/)
28+
- [New dedicated notebook](./notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb) showcasing a RAG pipeline using web pages.
29+
30+
31+
### Changed
32+
33+
- Switched from NVIDIA AI Foundation to [NVIDIA API Catalog endpoints](https://build.nvidia.com/explore/discover) for accessing cloud hosted LLM models.
34+
- Refactored [API schema of chain-server component](./docs/api_reference/openapi_schema.json) to support runtime allocation of llm parameters like temperature, max tokens, chat history etc.
35+
- Renamed `llm-playground` service in compose files to `rag-playground`.
36+
- Switched base containers for all components to ubuntu instead of pytorch and optimized container build time as well as container size.
37+
- Deprecated yaml based configuration to avoid confusion, all configurations are now environment variable based.
38+
- Removed requirement of hardcoding `NVIDIA_API_KEY` in `compose.env` file.
39+
- Upgraded all python dependencies for chain-server and rag-playground services.
40+
41+
### Fixed
42+
43+
- Fixed a bug causing hallucinated answer when retriever fails to return any documents.
44+
- Fixed some accuracy issues for all the examples.
45+
46+
47+
## [0.4.0] - 2024-02-23
748

849
### Added
950

@@ -75,4 +116,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
75116
### Fixed
76117

77118
- [Fixed issue #13](https://github.com/NVIDIA/GenerativeAIExamples/issues/13) of pipeline not able to answer questions unrelated to knowledge base
78-
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files
119+
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files

0 commit comments

Comments
 (0)