Skip to content

Commit 2de0c4d

Browse files
committed
add original starter kit code
0 parents  commit 2de0c4d

33 files changed

+96384
-0
lines changed

.dockerignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
models/**

.gitattributes

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
example_data/qa.json filter=lfs diff=lfs merge=lfs -text
2+
example_data/web.json filter=lfs diff=lfs merge=lfs -text
3+
example_data/dev_data.jsonl.bz2 filter=lfs diff=lfs merge=lfs -text

.gitignore

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
.vscode
2+
__pycache__
3+
api_responses
4+
data
5+
6+
7+
# Python
8+
__pycache__
9+
*.pyc
10+
*.egg-info
11+
dist
12+
13+
# Log
14+
#*.log
15+
#*results*
16+
#*models*
17+
#*runs*
18+
19+
# Data
20+
!**/alpaca-data-conversation.json
21+
22+
# Editor
23+
.idea
24+
*.swp
25+
26+
# Other
27+
.DS_Store
28+
wandb
29+
30+
*.pt
31+
*.pkl
32+
*.vscode
33+
34+
.idea/
35+
36+
37+
# github python gitignore
38+
# Byte-compiled / optimized / DLL files
39+
__pycache__/
40+
*.py[cod]
41+
*$py.class
42+
43+
# C extensions
44+
*.so
45+
46+
# Distribution / packaging
47+
.Python
48+
build/
49+
develop-eggs/
50+
dist/
51+
downloads/
52+
eggs/
53+
.eggs/
54+
lib/
55+
lib64/
56+
parts/
57+
sdist/
58+
var/
59+
wheels/
60+
share/python-wheels/
61+
*.egg-info/
62+
.installed.cfg
63+
*.egg
64+
MANIFEST
65+
66+
# PyInstaller
67+
# Usually these files are written by a python script from a template
68+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
69+
*.manifest
70+
*.spec
71+
72+
# Installer logs
73+
pip-log.txt
74+
pip-delete-this-directory.txt
75+
76+
# Unit test / coverage reports
77+
htmlcov/
78+
.tox/
79+
.nox/
80+
.coverage
81+
.coverage.*
82+
.cache
83+
nosetests.xml
84+
coverage.xml
85+
*.cover
86+
*.py,cover
87+
.hypothesis/
88+
.pytest_cache/
89+
cover/
90+
91+
# Translations
92+
*.mo
93+
*.pot
94+
95+
# Django stuff:
96+
#*.log
97+
local_settings.py
98+
db.sqlite3
99+
db.sqlite3-journal
100+
101+
# Flask stuff:
102+
instance/
103+
.webassets-cache
104+
105+
# Scrapy stuff:
106+
.scrapy
107+
108+
# Sphinx documentation
109+
docs/_build/
110+
111+
# PyBuilder
112+
.pybuilder/
113+
target/
114+
115+
# Jupyter Notebook
116+
.ipynb_checkpoints
117+
118+
# IPython
119+
profile_default/
120+
ipython_config.py
121+
122+
# pyenv
123+
# For a library or package, you might want to ignore these files since the code is
124+
# intended to run in multiple environments; otherwise, check them in:
125+
# .python-version
126+
127+
# pipenv
128+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
130+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
131+
# install all needed dependencies.
132+
#Pipfile.lock
133+
134+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
135+
__pypackages__/
136+
137+
# Celery stuff
138+
celerybeat-schedule
139+
celerybeat.pid
140+
141+
# SageMath parsed files
142+
*.sage.py
143+
144+
# Environments
145+
.env
146+
.venv
147+
env/
148+
venv/
149+
ENV/
150+
env.bak/
151+
venv.bak/
152+
153+
# Spyder project settings
154+
.spyderproject
155+
.spyproject
156+
157+
# Rope project settings
158+
.ropeproject
159+
160+
# mkdocs documentation
161+
/site
162+
163+
# mypy
164+
.mypy_cache/
165+
.dmypy.json
166+
dmypy.json
167+
168+
# Pyre type checker
169+
.pyre/
170+
171+
# pytype static type analyzer
172+
.pytype/
173+
174+
# Cython debug symbols
175+
cython_debug/

Dockerfile

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
## This is an example Dokerfile you can change to make submissions on aicrowd
2+
## To use it, place it in the base of the repo, and remove the underscore (_) from the filename
3+
4+
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
5+
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
8+
COPY apt.txt /tmp/apt.txt
9+
RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` \
10+
&& rm -rf /var/cache/*
11+
RUN apt install -y locales wget build-essential
12+
13+
# Unicode support:
14+
RUN locale-gen en_US.UTF-8
15+
ENV LANG en_US.UTF-8
16+
ENV LANGUAGE en_US:en
17+
ENV LC_ALL en_US.UTF-8
18+
19+
# Create user home directory - This is needed for aicrowd submissions
20+
ENV USER_NAME aicrowd
21+
ENV HOME_DIR /home/$USER_NAME
22+
23+
# Replace HOST_UID/HOST_GUID with your user / group id
24+
ENV HOST_UID 1001
25+
ENV HOST_GID 1001
26+
27+
# Use bash as default shell, rather than sh
28+
ENV SHELL /bin/bash
29+
30+
# Set up user
31+
RUN adduser --disabled-password \
32+
--gecos "Default user" \
33+
--uid ${HOST_UID} \
34+
${USER_NAME}
35+
36+
USER ${USER_NAME}
37+
WORKDIR ${HOME_DIR}
38+
39+
ENV CONDA_DIR ${HOME_DIR}/.conda
40+
41+
42+
RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh \
43+
&& bash miniconda.sh -b -p ${CONDA_DIR} \
44+
&& . ${CONDA_DIR}/etc/profile.d/conda.sh \
45+
&& conda clean -y -a \
46+
&& rm -rf miniconda.sh
47+
48+
ENV PATH ${CONDA_DIR}/bin:${PATH}
49+
50+
RUN conda install cmake -y && conda clean -y -a
51+
COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
52+
RUN pip install -r requirements.txt --no-cache-dir
53+
54+
COPY --chown=1001:1001 . ${HOME_DIR}
55+
56+
## Add your custom commands below

README.md

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
![banner image](https://aicrowd-production.s3.eu-central-1.amazonaws.com/challenge_images/meta-kdd-cup-24/meta_kdd_cup_24_banner.jpg)
2+
[![Discord](https://img.shields.io/discord/565639094860775436.svg)](https://discord.gg/yWurtB2huX)
3+
4+
# Meta KDD Cup '24 [CRAG: Comprehensive RAG Benchmark](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024) Starter Kit
5+
6+
7+
This repository is the CRAG: Comphrensive RAG Benchmark **Submission template and Starter kit**! Clone the repository to compete now!
8+
9+
**This repository contains**:
10+
* **Documentation** on how to submit your models to the leaderboard
11+
* **The procedure** for best practices and information on how we evaluate your model, etc.
12+
* **Starter code** for you to get started!
13+
14+
# Table of Contents
15+
16+
1. [Competition Overview](#-competition-overview)
17+
2. [Dataset](#-dataset)
18+
3. [Tasks](#-tasks)
19+
4. [Evaluation Metrics](#-evaluation-metrics)
20+
5. [Getting Started](#-getting-started)
21+
- [How to write your own model?](#️-how-to-write-your-own-model)
22+
- [How to start participating?](#-how-to-start-participating)
23+
- [Setup](#setup)
24+
- [How to make a submission?](#-how-to-make-a-submission)
25+
- [What hardware does my code run on?](#-what-hardware-does-my-code-run-on-)
26+
- [How are my model responses parsed by the evaluators?](#-how-are-my-model-responses-parsed-by-the-evaluators-)
27+
- [Baselines](#baselines)
28+
6. [Frequently Asked Questions](#-frequently-asked-questions)
29+
6. [Important Links](#-important-links)
30+
31+
32+
# 📖 Competition Overview
33+
34+
35+
# 📊 Dataset
36+
37+
Please find more details about the dataset in [docs/dataset.md](docs/dataset.md).
38+
39+
# 👨‍💻👩‍💻 Tasks
40+
41+
42+
## 📏 Evaluation Metrics
43+
44+
45+
Please refer to [local_evaluation.py](local_evaluation.py) for more details on how we will evaluate your submissions.
46+
47+
# 🏁 Getting Started
48+
1. **Sign up** to join the competition [on the AIcrowd website](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024).
49+
2. **Fork** this starter kit repository. You can use [this link](https://gitlab.aicrowd.com/aicrowd/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/meta-comphrehensive-rag-benchmark-starter-kit/-/forks/new) to create a fork.
50+
3. **Clone** your forked repo and start developing your model.
51+
4. **Develop** your model(s) following the template in [how to write your own model](#how-to-write-your-own-model) section.
52+
5. [**Submit**](#-how-to-make-a-submission) your trained models to [AIcrowd Gitlab](https://gitlab.aicrowd.com) for evaluation [(full instructions below)](#-how-to-make-a-submission). The automated evaluation will evaluate the submissions on the public test set and report the metrics on the leaderboard of the competition.
53+
54+
# ✍️ How to write your own model?
55+
56+
Please follow the instructions in [models/README.md](models/README.md) for instructions and examples on how to write your own models for this competition.
57+
58+
# 🚴 How to start participating?
59+
60+
## Setup
61+
62+
1. **Add your SSH key** to AIcrowd GitLab
63+
64+
You can add your SSH Keys to your GitLab account by going to your profile settings [here](https://gitlab.aicrowd.com/-/profile/keys). If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/user/ssh.html).
65+
66+
67+
2. **Fork the repository**. You can use [this link](https://gitlab.aicrowd.com/aicrowd/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/meta-comphrehensive-rag-benchmark-starter-kit/-/forks/new) to create a fork.
68+
69+
3. **Clone the repository**
70+
71+
```bash
72+
git clone [email protected]:<YOUR-AICROWD-USERNAME>/meta-comphrehensive-rag-benchmark-starter-kit.git
73+
cd meta-comphrehensive-rag-benchmark-starter-kit
74+
```
75+
76+
4. **Install** competition specific dependencies!
77+
```bash
78+
cd meta-comphrehensive-rag-benchmark-starter-kit
79+
pip install -r requirements.txt
80+
```
81+
82+
5. Write your own model as described in [How to write your own model](#how-to-write-your-own-model) section.
83+
84+
6. Test your model locally using `python local_evaluation.py`.
85+
86+
7. Accept the Challenge Rules on the main [challenge page](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024) by clicking on the **Participate** button. Also accept the Challenge Rules on the Task specific page (link on the challenge page) that you want to submit to.
87+
88+
8. Make a submission as described in [How to make a submission](#-how-to-make-a-submission) section.
89+
90+
# ✍️ How to write your own model?
91+
92+
Please follow the instructions in [models/README.md](models/README.md) for instructions and examples on how to write your own models for this competition.
93+
94+
95+
## 📮 How to make a submission?
96+
97+
Please follow the instructions in [docs/submission.md](docs/submission.md) to make your first submission.
98+
This also includes instructions on [specifying your software runtime](docs/submission.md#specifying-software-runtime-and-dependencies), [code structure](docs/submission.md#code-structure-guidelines), [submitting to different tracks](docs/submission.md#submitting-to-different-tracks).
99+
100+
**Note**: **Remember to accept the Challenge Rules** on the challenge page, **and** the task page before making your first submission.
101+
102+
## 💻 What hardware does my code run on ?
103+
You can find more details about the hardware and system configuration in [docs/hardware-and-system-config.md](docs/hardware-and-system-config.md).
104+
In summary, we provide you `4` x [[NVIDIA T4 GPUs](https://www.nvidia.com/en-us/data-center/tesla-t4/)].
105+
106+
## 🏁 Baseline
107+
We include three baselines for demonstration purposes, and you can read more abou them in [docs/baselines.md](docs/baselines.md).
108+
109+
# ❓ Frequently Asked Questions
110+
## Which track is this starter kit for ?
111+
This starter kit can be used to submit to any of the tracks. You can find more information in [docs/submission.md#submitting-to-different-tracks](docs/submission.md#submitting-to-different-tracks).
112+
113+
## Where can I know more about the dataset schema ?
114+
The dataset schema is described in [docs/dataset.md](docs/dataset.md).
115+
116+
If you want to use Croissant to view the data, please use [docs/croissant.json](docs/croissant.json).
117+
118+
**Best of Luck** :tada: :tada:
119+
120+
# 📎 Important links
121+
122+
- 💪 Challenge Page: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024
123+
- 🗣 Discussion Forum: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/discussion
124+
- 🏆 Leaderboard: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/leaderboards

aicrowd.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"challenge_id": "meta-kdd-cup-24-crag-retrieval-summarization",
3+
"authors": [
4+
"aicrowd-bot"
5+
],
6+
"gpu": false,
7+
"gpu_count": 0,
8+
"description": "(optional) description about your awesome agent"
9+
}

apt.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
git

0 commit comments

Comments
 (0)