Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/scripts/spellcheck_conf/wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1561,3 +1561,21 @@ VSCode
applyTo
mdc
windsurfrules
FPFT
PagedAdamW
Torchtune
WandB
bfclv
bwd
cellpadding
cellspacing
chartqa
checkpointer
docvqa
enconder
gsm
preprocessors
seqs
torchtune
verifications
Waitlist
3 changes: 2 additions & 1 deletion getting-started/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ If you are new to developing with Meta Llama models, this is where you should st
* The [Build_with_Llama API](./build_with_llama_api.ipynb) notebook highlights some of the features of [Llama API](https://llama.developer.meta.com?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=getting_started).
* The [inference](./inference/) folder contains scripts to deploy Llama for inference on server and mobile. See also [3p_integrations/vllm](../3p-integrations/vllm/) and [3p_integrations/tgi](../3p-integrations/tgi/) for hosting Llama on open-source model servers.
* The [RAG](./RAG/) folder contains a simple Retrieval-Augmented Generation application using Llama.
* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-cookbook finetuning code found in [finetuning.py](../src/llama_cookbook/finetuning.py) which supports these features.
* The [finetuning](./finetuning/) folder contains resources to help you finetune Llama on your custom datasets, for both single- and multi-GPU setups. The scripts use the native llama-cookbook finetuning code found in [finetuning.py](../src/llama_cookbook/finetuning.py) which supports these features:
* **NEW:** [Vision fine-tuning recipe](./finetuning/vision/README.md) for Llama 3.2 11B Vision - Learn how to fine-tune multimodal models for document understanding with 98% accuracy on structured data extraction!
* The [llama-tools](./llama-tools/) folder contains resources to help you use Llama tools, such as [llama-prompt-ops](../llama-tools/llama-prompt-ops_101.ipynb).
3 changes: 3 additions & 0 deletions getting-started/finetuning/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ If you are new to fine-tuning techniques, check out [an overview](./LLM_finetuni
> [!TIP]
> If you want to try finetuning Meta Llama 3 in a Jupyter notebook you can find a quickstart notebook [here](./quickstart_peft_finetuning.ipynb)

> [!NOTE]
> **New: Vision Fine-tuning Recipe** - Looking to fine-tune Llama 3.2 11B Vision for structured data extraction? Check out our comprehensive [vision fine-tuning recipe](./vision/README.md) that achieves 98% accuracy on document understanding tasks using torchtune, with detailed benchmarking comparing LoRA vs Full Parameter Fine-Tuning approaches.


## How to configure finetuning settings?

Expand Down
7 changes: 7 additions & 0 deletions getting-started/finetuning/vision/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
results/
outputs/
w2_evaluation_results/
Llama-3.2-11B-Vision-Instruct/
fake_w2_us_tax_form_dataset_train30_test70/
fake_w2_us_tax_form_dataset_train80_test20/
htmlcov/
100 changes: 100 additions & 0 deletions getting-started/finetuning/vision/11B_full_w2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Top-level output directory
output_dir: ./outputs/Llama-3.2-11B-Instruct-w2-full

# Model
model:
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_11b
decoder_trainable: False
encoder_trainable: True
fusion_trainable: True
image_size: 560 # Make sure this matches the image_size in tokenizer

# Tokenizer / vision transform
tokenizer:
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_transform
path: ./Llama-3.2-11B-Vision-Instruct/original/tokenizer.model
image_size: 560
max_seq_len: 8192

# Checkpointing
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: ./Llama-3.2-11B-Vision-Instruct
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: "00005"
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3_VISION

resume_from_checkpoint: false
save_adapter_weights_only: False # PeFT formatting not available yet. This will save it in torchtune format only.

# Dataset
dataset:
_component_: torchtune.datasets.multimodal.vqa_dataset
source: arrow
data_files:
train: "fake_w2_us_tax_form_dataset_train30_test70/train/data-00000-of-00001.arrow"
split: train
column_map:
input: input
output: ground_truth
image: image

# General data handling
seed: null
shuffle: true
collate_fn: torchtune.data.padded_collate_tiled_images_and_mask

# Training loop & hyperparams

epochs: 5
max_steps_per_epoch: null
batch_size: 1
gradient_accumulation_steps: 1 # Use to increase effective batch size
# explicit optimizer / scheduler / loss
optimizer:
_component_: bitsandbytes.optim.PagedAdamW8bit
lr: 2e-5
optimizer_in_bwd: True # True saves memory. Requires gradient_accumulation_steps=1

loss:
_component_: torchtune.modules.loss.LinearCrossEntropyLoss

# clip_grad_norm: 1.0
compile: true

# Device & memory
device: cuda
enable_activation_checkpointing: true
dtype: bf16

# Logging

metric_logger:
_component_: torchtune.training.metric_logging.WandBLogger
project: llama3_2_w2_extraction
entity: <your_wandb_entity>
job_type: full_finetune_single_device
group: llama-cookbook
log_every_n_steps: 5
save_steps: 100
log_peak_memory_stats: true
log_level: INFO

# Profiler (off by default)
profiler:
_component_: torchtune.training.setup_torch_profiler
enabled: false
output_dir: ${output_dir}/profiling_outputs
cpu: true
cuda: true
profile_memory: false
with_stack: false
record_shapes: true
with_flops: false
wait_steps: 5
warmup_steps: 3
active_steps: 2
num_cycles: 1
118 changes: 118 additions & 0 deletions getting-started/finetuning/vision/11B_lora_w2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Top-level output directory
output_dir: ./outputs/Llama-3.2-11B-Instruct-w2-lora

# Model + LoRA settings
model:
_component_: torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b
# preserve your hyperparams
lora_rank: 8 # higher increases accuracy and memory
lora_alpha: 16 # usually alpha=2*rank
lora_dropout: 0.05
image_size: 560 # Make sure this matches the image_size in tokenizer
# example’s fixed settings
decoder_trainable: "frozen"
encoder_trainable: "lora"
fusion_trainable: "lora"
lora_attn_modules:
- 'q_proj'
- 'v_proj'
- 'output_proj'
apply_lora_to_mlp: true
apply_lora_to_output: false

# Tokenizer / vision transform
tokenizer:
_component_: torchtune.models.llama3_2_vision.llama3_2_vision_transform
path: ./Llama-3.2-11B-Vision-Instruct/original/tokenizer.model
image_size: 560
max_seq_len: 8192

# Checkpointing
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: ./Llama-3.2-11B-Vision-Instruct
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: "00005"
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3_VISION

resume_from_checkpoint: false
save_adapter_weights_only: false # PeFT formatting not available yet. This will save it in torchtune format only.

# Dataset
dataset:
_component_: torchtune.datasets.multimodal.vqa_dataset
source: arrow
data_files:
train: "fake_w2_us_tax_form_dataset_train30_test70/train/data-00000-of-00001.arrow"
split: train
column_map:
input: input
output: ground_truth
image: image

# General data handling
seed: null
shuffle: true
collate_fn: torchtune.data.padded_collate_tiled_images_and_mask

# Training loop & hyperparams

# example’s train-control
epochs: 5
max_steps_per_epoch: null
batch_size: 1
gradient_accumulation_steps: 1 # Use to increase effective batch size
# explicit optimizer / scheduler / loss
optimizer:
_component_: torch.optim.AdamW
fused: true
weight_decay: 0.01
lr: 1e-4
optimizer_in_bwd: true

lr_scheduler:
_component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
num_warmup_steps: 100

loss:
_component_: torchtune.modules.loss.LinearCrossEntropyLoss

clip_grad_norm: 1.0
compile: false

# Device & memory
device: cuda
enable_activation_checkpointing: true
dtype: bf16

# Logging

metric_logger:
_component_: torchtune.training.metric_logging.WandBLogger
project: llama3_2_w2_extraction
entity: <your_wandb_entity>
job_type: lora_finetune_single_device
group: llama-cookbook
log_every_n_steps: 5
save_steps: 100
log_peak_memory_stats: true
log_level: INFO

# Profiler (off by default)
profiler:
_component_: torchtune.training.setup_torch_profiler
enabled: false
output_dir: ${output_dir}/profiling_outputs
cpu: true
cuda: true
profile_memory: false
with_stack: false
record_shapes: true
with_flops: false
wait_steps: 5
warmup_steps: 3
active_steps: 2
num_cycles: 1
Loading