Skip to content

[QEff Finetune]: Enable --help for finetune CLI #392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import warnings
from typing import Any, Dict, Optional, Union

import fire
import numpy as np
import torch
import torch.distributed as dist
Expand All @@ -31,6 +30,7 @@
get_custom_data_collator,
get_preprocessed_dataset,
)
from QEfficient.finetune.utils.parser import get_finetune_parser
from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
from QEfficient.utils._utils import login_and_download_hf_lm

Expand Down Expand Up @@ -354,4 +354,6 @@ def main(peft_config_file: str = None, **kwargs) -> None:


if __name__ == "__main__":
fire.Fire(main)
parser = get_finetune_parser()
args = parser.parse_args()
main(**args.__dict__)
13 changes: 6 additions & 7 deletions QEfficient/finetune/configs/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class TrainConfig:
model_name (str): Name of the pre-trained model to fine-tune (default: "meta-llama/Llama-3.2-1B").
tokenizer_name (str): Name of the tokenizer (defaults to model_name if None).
run_validation (bool): Whether to run validation during training (default: True).
batch_size_training (int): Batch size for training (default: 1).
train_batch_size (int): Batch size for training (default: 1).
val_batch_size (int): Batch size for validation (default: 1).
context_length (Optional[int]): Maximum sequence length for inputs (default: None).
gradient_accumulation_steps (int): Steps for gradient accumulation (default: 4).
gradient checkpointing (bool): Enable gradient checkpointing to save the memory by compromising the speed. (default: False).
Expand All @@ -29,17 +30,17 @@ class TrainConfig:
weight_decay (float): Weight decay for optimizer (default: 0.0).
gamma (float): Learning rate decay factor (default: 0.85).
seed (int): Random seed for reproducibility (default: 42).
use_autocast (bool): Use autocast for mixed precision (default: True).
dataset (str): Dataset name for training (default: "samsum_dataset").
use_fp16 (bool): Use mixed precision training (default: True).
use_autocast (bool): Use autocast for mixed precision (default: True).
val_batch_size (int): Batch size for validation (default: 1).
dataset (str): Dataset name for training (default: "samsum_dataset").
task_type (str): Type of task for which the finetuning is to be done. Options: "generation" and "seq_classification". (default: "generation")
peft_method (str): Parameter-efficient fine-tuning method (default: "lora").
use_peft (bool): Whether to use PEFT (default: True).
from_peft_checkpoint (str): Path to PEFT checkpoint (default: "").
output_dir (str): Directory to save outputs (default: "meta-llama-samsum").
num_freeze_layers (int): Number of layers to freeze (default: 1).
one_qaic (bool): Use single QAIC device (default: False).
save_model (bool): Save the trained model (default: True).
save_metrics (bool): Save training metrics (default: True).
intermediate_step_save (int): Steps between intermediate saves (default: 1000).
Expand All @@ -58,7 +59,8 @@ class TrainConfig:
model_name: str = "meta-llama/Llama-3.2-1B"
tokenizer_name: str = None # if not passed as an argument, it uses the value of model_name
run_validation: bool = True
batch_size_training: int = 1
train_batch_size: int = 1
val_batch_size: int = 1
context_length: int = None
gradient_accumulation_steps: int = 4
gradient_checkpointing: bool = False
Expand All @@ -71,17 +73,14 @@ class TrainConfig:
weight_decay: float = 0.0
gamma: float = 0.85 # multiplicatively decay the learning rate by gamma after each epoch
seed: int = 42
use_fp16: bool = True
use_autocast: bool = True
val_batch_size: int = 1
dataset = "samsum_dataset"
task_type = "generation" # "generation" / "seq_classification"
peft_method: str = "lora"
use_peft: bool = True # use parameter efficient fine tuning
from_peft_checkpoint: str = "" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
output_dir: str = "meta-llama-samsum"
num_freeze_layers: int = 1
one_qaic: bool = False
save_model: bool = True
save_metrics: bool = True # saves training metrics to a json file for later plotting
intermediate_step_save: int = 1000
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/finetune/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def generate_dataset_config(dataset_name: str) -> Any:

def get_dataloader_kwargs(train_config, dataset, dataset_processer, mode):
kwargs = {}
batch_size = train_config.batch_size_training if mode == "train" else train_config.val_batch_size
batch_size = train_config.train_batch_size if mode == "train" else train_config.val_batch_size
if train_config.enable_ddp:
if train_config.enable_sorting_for_ddp:
if train_config.context_length:
Expand Down
18 changes: 0 additions & 18 deletions QEfficient/finetune/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

# from QEfficient.finetune.data.concatenator import ConcatDataset
from QEfficient.finetune.dataset.dataset_config import DATALOADER_COLLATE_FUNC, DATASET_PREPROC
from QEfficient.finetune.utils.config_utils import get_dataloader_kwargs


def get_preprocessed_dataset(
Expand All @@ -29,20 +28,3 @@ def get_custom_data_collator(dataset_processer, dataset_config) -> torch.utils.d
return None

return DATALOADER_COLLATE_FUNC[dataset_config.dataset](dataset_processer, dataset_config)


def get_dataloader(tokenizer, dataset_config, train_config, split: str = "train"):
dataset = get_preprocessed_dataset(tokenizer, dataset_config, split)
dl_kwargs = get_dataloader_kwargs(train_config, dataset, tokenizer, split)

# if split == "train" and train_config.batching_strategy == "packing":
# dataset = ConcatDataset(dataset, chunk_size=train_config.context_length)

# Create data loader
dataloader = torch.utils.data.DataLoader(
dataset,
num_workers=train_config.num_workers_dataloader,
pin_memory=True,
**dl_kwargs,
)
return dataloader
242 changes: 242 additions & 0 deletions QEfficient/finetune/utils/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import argparse


def get_finetune_parser():
parser = argparse.ArgumentParser(
description="Finetune command, the model will be downloaded from Huggingface and finetuned on Cloud AI 100 and weights are saved."
)
parser.add_argument(
"--model-name",
"--model_name",
required=False,
type=str,
default="meta-llama/Llama-3.2-1B",
help="Name of the pre-trained model to fine-tune",
)
parser.add_argument(
"--tokenizer-name",
"--tokenizer_name",
required=False,
type=str,
default=None,
help="Name of the tokenizer,if not passed as an argument, it uses the value of model_name",
)
parser.add_argument(
"--run-validation",
"--run_validation",
required=False,
type=bool,
default=True,
help="To run validation during training",
)
parser.add_argument(
"--train-batch-size", "--train_batch_size", required=False, type=int, default=1, help="Batch size for training"
)
parser.add_argument(
"--val-batch-size", "--val_batch_size", required=False, type=int, default=1, help="Batch size for validation"
)
parser.add_argument(
"--context-length",
"--context_length",
required=False,
type=int,
default=None,
help="Maximum sequence length for inputs",
)
parser.add_argument(
"--gradient-accumulation-steps",
"--gradient_accumulation_steps",
required=False,
type=int,
default=4,
help="Steps for gradient accumulation",
)
parser.add_argument(
"--gradient-checkpointing",
"--gradient_checkpointing",
required=False,
type=bool,
default=False,
help="Use gradient checkpointing",
)
parser.add_argument(
"--num-epochs", "--num_epochs", required=False, type=int, default=1, help="Number of training epochs"
)
parser.add_argument(
"--max-train-step",
"--max_train_step",
required=False,
type=int,
default=0,
help="Maximum training steps, unlimited if 0",
)
parser.add_argument(
"--max-eval-step",
"--max_eval_step",
required=False,
type=int,
default=0,
help="Maximum evaluation steps, unlimited if 0",
)
parser.add_argument("--device", required=False, type=str, default="qaic", help="Device to train on")
parser.add_argument(
"--num-workers-dataloader",
"--num_workers_dataloader",
required=False,
type=int,
default=1,
help="Number of workers for dataloader",
)
parser.add_argument("--lr", required=False, type=float, default=3e-4, help="Learning rate ")
parser.add_argument(
"--weight-decay", "--weight_decay", required=False, type=float, default=0.0, help="Weight decay for optimizer"
)
parser.add_argument(
"--gamma",
required=False,
type=float,
default=0.85,
help="Learning rate decay factor, multiplicatively decays the learning rate by gamma after each epoch",
)
parser.add_argument("--seed", required=False, type=int, default=42, help="Random seed for reproducibility")
parser.add_argument(
"--use-autocast",
"--use_autocast",
required=False,
type=bool,
default=True,
help="Use autocast for mixed precision",
)

parser.add_argument(
"--dataset", required=False, type=str, default="samsum_dataset", help="Dataset name for finetuning"
)
parser.add_argument(
"--task-type",
"--task_type",
required=False,
type=str,
default="generation",
help="generation/seq_classification",
)
parser.add_argument(
"--peft-method",
"--peft_method",
required=False,
type=str,
default="lora",
help="Parameter-efficient fine-tuning method",
)
parser.add_argument(
"--use-peft",
"--use_peft",
required=False,
type=bool,
default=True,
help="Whether to use PEFT(parameter efficient fine tuning)",
)
parser.add_argument(
"--from-peft-checkpoint",
"--from_peft_checkpoint",
required=False,
type=str,
default="",
help="Path to load PEFT checkpoint and resume the fine-tuning on that checkpoint",
)
parser.add_argument(
"--output-dir",
"--output_dir",
required=False,
type=str,
default="meta-llama-samsum",
help="Directory to save outputs",
)
parser.add_argument(
"--num-freeze-layers",
"--num_freeze_layers",
required=False,
type=int,
default=1,
help="Number of layers to freeze",
)
parser.add_argument(
"--save-model", "--save_model", required=False, type=bool, default=True, help="Save the trained model"
)
parser.add_argument(
"--save-metrics",
"--save_metrics",
required=False,
type=bool,
default=True,
help="Save training metrics to a json file for later plotting",
)
parser.add_argument(
"--intermediate-step-save",
"--intermediate_step_save",
required=False,
type=int,
default=1000,
help="Steps between intermediate saves",
)
parser.add_argument(
"--batching-strategy",
"--batching_strategy",
required=False,
type=str,
default="packing",
help="Batching strategy",
)
parser.add_argument(
"--enable-sorting-for-ddp",
"--enable_sorting_for_ddp",
required=False,
type=bool,
default=True,
help="Sort data for DDP",
)
parser.add_argument(
"--convergence-counter",
"--convergence_counter",
required=False,
type=int,
default=5,
help="Steps to check convergence, its value should be >= 1, stop fine tuning when loss <= convergence_loss (defined below) for #convergence_counter steps",
)
parser.add_argument(
"--convergence-loss",
"--convergence_loss",
required=False,
type=float,
default=1e-4,
help="Loss threshold for convergence, if loss value is <= convergence_loss for #convergence_counter consecutive steps, fine tuning stops",
)
parser.add_argument(
"--use-profiler", "--use_profiler", required=False, type=bool, default=False, help="Enable profiling"
)
parser.add_argument(
"--enable-ddp", "--enable_ddp", required=False, type=int, default=1000, help="Enable distributed data parallel"
)
parser.add_argument(
"--dist-backend",
"--dist_backend",
required=False,
type=str,
default="cpu:gloo,qaic:qccl,cuda:gloo",
help="Backend for distributed training",
)
parser.add_argument(
"--grad-scaler", "--grad_scaler", required=False, type=bool, default=True, help="Use gradient scaler"
)
parser.add_argument(
"--dump-root-dir",
"--dump_root_dir",
required=False,
type=str,
default="meta-llama-samsum-mismatches/step_",
help="Directory for mismatch dumps",
)
parser.add_argument(
"--opByOpVerifier", required=False, type=bool, default=True, help="Enable operation-by-operation verification"
)

return parser
8 changes: 0 additions & 8 deletions QEfficient/finetune/utils/train_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,14 +469,6 @@ def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
return longest_seq_length, longest_seq_ix


def get_parameter_dtypes(model):
"""Get the data types of model parameters"""
parameter_dtypes = {}
for name, parameter in model.named_parameters():
parameter_dtypes[name] = parameter.dtype
return parameter_dtypes


def print_model_size(model, config) -> None:
"""
Print model name, the number of trainable parameters and initialization time.
Expand Down
1 change: 1 addition & 0 deletions docs/source/finetune.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ For torch_qaic, assuming QEfficient is already installed,
```bash
pip install /opt/qti-aic/integrations/torch_qaic/py310/torch_qaic-0.1.0-cp310-cp310-linux_x86_64.whl
```
If qeff-env inside docker is used then torch_qaic and accelerate packages are already installed.

## Finetuning

Expand Down
Loading