yecchen
diff --git a/‎.dockerignore
+1 b/‎.dockerignore
+1
diff --git a/‎.gitattributes
+3 b/‎.gitattributes
+3
diff --git a/‎.gitignore
+175 b/‎.gitignore
+175
diff --git a/‎Dockerfile
+56 b/‎Dockerfile
+56
diff --git a/‎README.md
+124 b/‎README.md
+124
diff --git a/‎aicrowd.json
+9 b/‎aicrowd.json
+9
diff --git a/‎apt.txt
+1 b/‎apt.txt
+1
@@ -0,0 +1 @@
+models/**
@@ -0,0 +1,3 @@
+example_data/qa.json filter=lfs diff=lfs merge=lfs -text
+example_data/web.json filter=lfs diff=lfs merge=lfs -text
+example_data/dev_data.jsonl.bz2 filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,175 @@
+.vscode
+__pycache__
+api_responses
+data
+
+
+# Python
+__pycache__
+*.pyc
+*.egg-info
+dist
+
+# Log
+#*.log
+#*results*
+#*models*
+#*runs*
+
+# Data
+!**/alpaca-data-conversation.json
+
+# Editor
+.idea
+*.swp
+
+# Other
+.DS_Store
+wandb
+
+*.pt
+*.pkl
+*.vscode
+
+.idea/
+
+
+# github python gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+#*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
@@ -0,0 +1,56 @@
+## This is an example Dokerfile you can change to make submissions on aicrowd
+## To use it, place it in the base of the repo, and remove the underscore (_) from the filename
+
+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` \
+ && rm -rf /var/cache/*
+RUN apt install -y locales wget build-essential
+
+# Unicode support:
+RUN locale-gen en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US:en
+ENV LC_ALL en_US.UTF-8
+
+# Create user home directory - This is needed for aicrowd submissions
+ENV USER_NAME aicrowd 
+ENV HOME_DIR /home/$USER_NAME
+
+# Replace HOST_UID/HOST_GUID with your user / group id
+ENV HOST_UID 1001
+ENV HOST_GID 1001
+
+# Use bash as default shell, rather than sh
+ENV SHELL /bin/bash
+
+# Set up user
+RUN adduser --disabled-password \
+    --gecos "Default user" \
+    --uid ${HOST_UID} \
+    ${USER_NAME}
+
+USER ${USER_NAME}
+WORKDIR ${HOME_DIR}
+
+ENV CONDA_DIR ${HOME_DIR}/.conda
+
+
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh \
+ && bash miniconda.sh -b -p ${CONDA_DIR} \
+ && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+ && conda clean -y -a \
+ && rm -rf miniconda.sh
+
+ENV PATH ${CONDA_DIR}/bin:${PATH}
+
+RUN conda install cmake -y && conda clean -y -a
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir
+
+COPY --chown=1001:1001 . ${HOME_DIR}
+
+## Add your custom commands below
@@ -0,0 +1,124 @@
+![banner image](https://aicrowd-production.s3.eu-central-1.amazonaws.com/challenge_images/meta-kdd-cup-24/meta_kdd_cup_24_banner.jpg)
+[![Discord](https://img.shields.io/discord/565639094860775436.svg)](https://discord.gg/yWurtB2huX)
+
+# Meta KDD Cup '24 [CRAG: Comprehensive RAG Benchmark](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024) Starter Kit
+
+
+This repository is the CRAG: Comphrensive RAG Benchmark **Submission template and Starter kit**! Clone the repository to compete now!
+
+**This repository contains**:
+*  **Documentation** on how to submit your models to the leaderboard
+*  **The procedure** for best practices and information on how we evaluate your model, etc.
+*  **Starter code** for you to get started!
+
+# Table of Contents
+
+1. [Competition Overview](#-competition-overview)
+2. [Dataset](#-dataset)
+3. [Tasks](#-tasks)
+4. [Evaluation Metrics](#-evaluation-metrics)
+5. [Getting Started](#-getting-started)
+   - [How to write your own model?](#️-how-to-write-your-own-model)
+   - [How to start participating?](#-how-to-start-participating)
+      - [Setup](#setup)
+      - [How to make a submission?](#-how-to-make-a-submission)
+      - [What hardware does my code run on?](#-what-hardware-does-my-code-run-on-)
+      - [How are my model responses parsed by the evaluators?](#-how-are-my-model-responses-parsed-by-the-evaluators-)
+      - [Baselines](#baselines)
+6. [Frequently Asked Questions](#-frequently-asked-questions)
+6. [Important Links](#-important-links)
+
+
+# 📖 Competition Overview
+
+
+# 📊 Dataset
+
+Please find more details about the dataset in [docs/dataset.md](docs/dataset.md).
+
+# 👨‍💻👩‍💻 Tasks  
+
+
+## 📏 Evaluation Metrics
+
+
+Please refer to [local_evaluation.py](local_evaluation.py) for more details on how we will evaluate your submissions.
+
+# 🏁 Getting Started
+1. **Sign up** to join the competition [on the AIcrowd website](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024).
+2. **Fork** this starter kit repository. You can use [this link](https://gitlab.aicrowd.com/aicrowd/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/meta-comphrehensive-rag-benchmark-starter-kit/-/forks/new) to create a fork.
+3. **Clone** your forked repo and start developing your model.
+4. **Develop** your model(s) following the template in [how to write your own model](#how-to-write-your-own-model) section.
+5. [**Submit**](#-how-to-make-a-submission) your trained models to [AIcrowd Gitlab](https://gitlab.aicrowd.com) for evaluation [(full instructions below)](#-how-to-make-a-submission). The automated evaluation will evaluate the submissions on the public test set and report the metrics on the leaderboard of the competition.
+
+# ✍️ How to write your own model?
+
+Please follow the instructions in [models/README.md](models/README.md) for instructions and examples on how to write your own models for this competition.
+
+# 🚴 How to start participating?
+
+## Setup
+
+1. **Add your SSH key** to AIcrowd GitLab
+
+You can add your SSH Keys to your GitLab account by going to your profile settings [here](https://gitlab.aicrowd.com/-/profile/keys). If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/user/ssh.html).
+
+
+2. **Fork the repository**. You can use [this link](https://gitlab.aicrowd.com/aicrowd/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/meta-comphrehensive-rag-benchmark-starter-kit/-/forks/new) to create a fork.
+
+3.  **Clone the repository**
+
+    ```bash
+    git clone [email protected]:<YOUR-AICROWD-USERNAME>/meta-comphrehensive-rag-benchmark-starter-kit.git
+    cd meta-comphrehensive-rag-benchmark-starter-kit
+    ```
+
+4. **Install** competition specific dependencies!
+    ```bash
+    cd meta-comphrehensive-rag-benchmark-starter-kit
+    pip install -r requirements.txt
+    ```
+
+5. Write your own model as described in [How to write your own model](#how-to-write-your-own-model) section.
+
+6. Test your model locally using `python local_evaluation.py`.
+
+7. Accept the Challenge Rules on the main [challenge page](https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024) by clicking on the **Participate** button. Also accept the Challenge Rules on the Task specific page (link on the challenge page) that you want to submit to.
+
+8. Make a submission as described in [How to make a submission](#-how-to-make-a-submission) section.
+
+# ✍️ How to write your own model?
+
+Please follow the instructions in [models/README.md](models/README.md) for instructions and examples on how to write your own models for this competition.
+
+
+## 📮 How to make a submission?
+
+Please follow the instructions in [docs/submission.md](docs/submission.md) to make your first submission. 
+This also includes instructions on [specifying your software runtime](docs/submission.md#specifying-software-runtime-and-dependencies), [code structure](docs/submission.md#code-structure-guidelines), [submitting to different tracks](docs/submission.md#submitting-to-different-tracks).
+
+**Note**: **Remember to accept the Challenge Rules** on the challenge page, **and** the task page before making your first submission.
+
+## 💻 What hardware does my code run on ?
+You can find more details about the hardware and system configuration in [docs/hardware-and-system-config.md](docs/hardware-and-system-config.md).
+In summary, we provide you `4` x [[NVIDIA T4 GPUs](https://www.nvidia.com/en-us/data-center/tesla-t4/)].
+
+## 🏁 Baseline
+We include three baselines for demonstration purposes, and you can read more abou them in [docs/baselines.md](docs/baselines.md).
+
+# ❓ Frequently Asked Questions
+## Which track is this starter kit for ?
+This starter kit can be used to submit to any of the tracks. You can find more information in [docs/submission.md#submitting-to-different-tracks](docs/submission.md#submitting-to-different-tracks).
+
+## Where can I know more about the dataset schema ?
+The dataset schema is described in [docs/dataset.md](docs/dataset.md).
+
+If you want to use Croissant to view the data, please use [docs/croissant.json](docs/croissant.json).
+
+**Best of Luck** :tada: :tada:
+
+# 📎 Important links
+
+- 💪 Challenge Page: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024
+- 🗣 Discussion Forum: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/discussion
+- 🏆 Leaderboard: https://www.aicrowd.com/challenges/meta-comprehensive-rag-benchmark-kdd-cup-2024/leaderboards
@@ -0,0 +1,9 @@
+{
+    "challenge_id": "meta-kdd-cup-24-crag-retrieval-summarization",
+    "authors": [
+      "aicrowd-bot"
+    ],
+    "gpu": false,
+    "gpu_count": 0,
+    "description": "(optional) description about your awesome agent"
+}
@@ -0,0 +1 @@
+git
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+example_data/qa.json filter=lfs diff=lfs merge=lfs -text`
	`2`	`+example_data/web.json filter=lfs diff=lfs merge=lfs -text`
	`3`	`+example_data/dev_data.jsonl.bz2 filter=lfs diff=lfs merge=lfs -text`