Skip to content

Switch remainder of recipe tests over to HF format checkpoints #2871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 52 additions & 49 deletions tests/recipes/test_eleuther_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
from tests.common import TUNE_PATH
from tests.recipes.utils import (
llama3_2_vision_test_config,
llama3_test_config,
write_hf_ckpt_config,
MODEL_TEST_CONFIGS,
write_hf_vision_ckpt_config,
)
from tests.test_utils import CKPT_MODEL_PATHS, gpu_test
from tests.test_utils import CKPT_MODEL_PATHS, gpu_test, TOKENIZER_PATHS


class TestEleutherEval:
Expand Down Expand Up @@ -48,42 +47,38 @@ def expected_vision_acc(self):
}

@pytest.mark.parametrize(
"eval_name, expected_acc, bsz",
"model_ckpt, eval_name, expected_acc, bsz",
[
("truthfulqa_gen", 0.1818, 4),
("truthfulqa_mc2", 0.3015, 4),
("llama3_hf_138m", "truthfulqa_gen", 0.1818, 4),
("llama3_hf_138m", "truthfulqa_mc2", 0.3015, 4),
],
)
@pytest.mark.integration_test
@gpu_test(gpu_count=1)
def test_torchtune_checkpoint_eval_results(
self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz
self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz, model_ckpt
):
ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

# explicitly setting limit to an odd number here to ensure generation tasks
# work with KV-cacheing + bsz > 1 - we'll receive batches of size 4, 4, 3
cmd = f"""
tune run eleuther_eval \
--config eleuther_evaluation \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
tokenizer.path='{tokenizer_path}'\
tokenizer.prompt_template=null \
limit=11 \
dtype=fp32 \
tasks=[{eval_name}]\
batch_size={bsz} \
""".split()

model_config = llama3_test_config()
model_config = MODEL_TEST_CONFIGS[model_ckpt]
cmd = cmd + model_config

monkeypatch.setattr(sys, "argv", cmd)
Expand All @@ -108,28 +103,30 @@ def test_torchtune_checkpoint_eval_results(
@pytest.mark.integration_test
@pytest.mark.usefixtures("hide_correct_version_number")
@gpu_test(gpu_count=1)
def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):
ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent
@pytest.mark.parametrize(
"model_ckpt",
[
("llama3_hf_138m"),
],
)
def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir, model_ckpt):
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

cmd = f"""
tune run eleuther_eval \
--config eleuther_evaluation \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
limit=1 \
dtype=fp32 \
""".split()

model_config = llama3_test_config()
model_config = MODEL_TEST_CONFIGS[model_ckpt]
cmd = cmd + model_config

monkeypatch.setattr(sys, "argv", cmd)
Expand All @@ -142,35 +139,34 @@ def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):

@pytest.mark.integration_test
@gpu_test(gpu_count=1)
@pytest.mark.parametrize(
"model_ckpt",
[
("llama3_hf_138m"),
],
)
def test_eval_recipe_errors_with_quantization_hf_checkpointer(
self, monkeypatch, tmpdir
self, monkeypatch, tmpdir, model_ckpt
):
ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent

# Config file needed for model conversion.
write_hf_ckpt_config(ckpt_dir)
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

cmd = f"""
tune run eleuther_eval \
--config eleuther_evaluation \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelHFCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
limit=1 \
dtype=fp32 \
quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \
quantizer.groupsize=256 \
""".split()

model_config = llama3_test_config()
model_config = MODEL_TEST_CONFIGS[model_ckpt]
cmd = cmd + model_config

monkeypatch.setattr(sys, "argv", cmd)
Expand All @@ -183,30 +179,34 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer(

@pytest.mark.integration_test
@gpu_test(gpu_count=1)
def test_eval_recipe_errors_with_qat_quantizer(self, monkeypatch, tmpdir):
ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent
@pytest.mark.parametrize(
"model_ckpt",
[
("llama3_hf_138m"),
],
)
def test_eval_recipe_errors_with_qat_quantizer(
self, monkeypatch, tmpdir, model_ckpt
):
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

cmd = f"""
tune run eleuther_eval \
--config eleuther_evaluation \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
limit=1 \
dtype=fp32 \
quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \
quantizer.groupsize=32\
""".split()

model_config = llama3_test_config()
model_config = MODEL_TEST_CONFIGS[model_ckpt]
cmd = cmd + model_config

monkeypatch.setattr(sys, "argv", cmd)
Expand All @@ -223,6 +223,9 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent

# Config file needed for model conversion.
write_hf_vision_ckpt_config(ckpt_dir)

cmd = f"""
tune run eleuther_eval \
--config llama3_2_vision/11B_evaluation \
Expand Down
80 changes: 29 additions & 51 deletions tests/recipes/test_full_dpo_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
import pytest
import torch
from tests.common import TUNE_PATH
from tests.recipes.utils import (
dummy_stack_exchange_dataset_config,
MODEL_TEST_CONFIGS,
write_hf_ckpt_config,
)
from tests.recipes.utils import dummy_stack_exchange_dataset_config, MODEL_TEST_CONFIGS
from tests.test_utils import (
CKPT_MODEL_PATHS,
gen_log_file_name,
Expand Down Expand Up @@ -48,8 +44,14 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
] + dummy_stack_exchange_dataset_config()

@pytest.mark.integration_test
@pytest.mark.parametrize(
"model_ckpt",
[
("llama3_hf_138m"),
],
)
@gpu_test(gpu_count=2)
def test_training_state_on_resume(self, tmpdir, monkeypatch):
def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
"""Test whether the recipe state is correctly updated on resume. Since this
is model agnostic, we should run this on the small model only. The test
consists of three stages:
Expand All @@ -58,37 +60,26 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
- Make sure final loss matches the expected value of a model successfully resumed from a ckpt
"""

ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
log_file = gen_log_file_name(tmpdir)
tokenizer_path = Path(TOKENIZER_PATHS["llama3"])

# Config file needed for model conversion.
# Create a second copy for training resume
write_hf_ckpt_config(ckpt_dir)
write_hf_ckpt_config(tmpdir)
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

# Train for two epochs
cmd_1 = f"""
tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
--config llama3_1/8B_full_dpo \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
ref_checkpointer.checkpoint_files=[{ckpt_path}]\
ref_checkpointer.checkpoint_files=[model.safetensors]\
ref_checkpointer.output_dir={tmpdir} \
ref_checkpointer.model_type=LLAMA3 \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
metric_logger.filename={log_file} \
""".split()
model_config = MODEL_TEST_CONFIGS["llama3"]
model_config = MODEL_TEST_CONFIGS[model_ckpt]
cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
monkeypatch.setattr(sys, "argv", cmd_1)
runpy.run_path(TUNE_PATH, run_name="__main__")
Expand All @@ -100,7 +91,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
)

# We rename the model and we want to resume from epoch 0 (which trained for 1 epoch)
ckpt_to_resume_from = "epoch_0/model-00001-of-00001.bin"
ckpt_to_resume_from = "epoch_0/model-00001-of-00001.safetensors"

# Now we resume training from epoch 1
resumed_log_dir = (tmpdir / "resumed/").mkdir()
Expand All @@ -109,16 +100,12 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
--config llama3_1/8B_full_dpo \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_to_resume_from}]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
ref_checkpointer.checkpoint_files=[{ckpt_path}]\
ref_checkpointer.checkpoint_files=[model.safetensors]\
ref_checkpointer.output_dir={tmpdir} \
ref_checkpointer.model_type=LLAMA3 \
resume_from_checkpoint=True \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
Expand All @@ -135,44 +122,39 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
)

@pytest.mark.integration_test
@pytest.mark.parametrize(
"model_ckpt",
[
("llama3_hf_138m"),
],
)
@gpu_test(gpu_count=2)
def test_training_state_on_resume_with_async_checkpointing(
self, tmpdir, monkeypatch
self, tmpdir, monkeypatch, model_ckpt
):
"""Same as above test but with async checkpointing."""
ckpt = "llama3_tune"
ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
ckpt_dir = ckpt_path.parent
ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
log_file = gen_log_file_name(tmpdir)
tokenizer_path = Path(TOKENIZER_PATHS["llama3"])

# Config file needed for model conversion.
# Create a second copy for training resume
write_hf_ckpt_config(ckpt_dir)
write_hf_ckpt_config(tmpdir)
tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])

# Train for two epochs
cmd_1 = f"""
tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
--config llama3_1/8B_full_dpo \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
ref_checkpointer.checkpoint_files=[{ckpt_path}]\
ref_checkpointer.checkpoint_files=[model.safetensors]\
ref_checkpointer.output_dir={tmpdir} \
ref_checkpointer.model_type=LLAMA3 \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
metric_logger.filename={log_file} \
enable_async_checkpointing=True \
""".split()

model_config = MODEL_TEST_CONFIGS["llama3"]
model_config = MODEL_TEST_CONFIGS[model_ckpt]

cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
monkeypatch.setattr(sys, "argv", cmd_1)
Expand All @@ -191,16 +173,12 @@ def test_training_state_on_resume_with_async_checkpointing(
tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
--config llama3_1/8B_full_dpo \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
checkpointer.checkpoint_files=[{ckpt_path}]\
checkpointer.checkpoint_files=[model.safetensors]\
checkpointer.output_dir={tmpdir} \
checkpointer.model_type=LLAMA3 \
ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
ref_checkpointer.checkpoint_files=[{ckpt_path}]\
ref_checkpointer.checkpoint_files=[model.safetensors]\
ref_checkpointer.output_dir={tmpdir} \
ref_checkpointer.model_type=LLAMA3 \
resume_from_checkpoint=True \
tokenizer.path='{tokenizer_path}' \
tokenizer.prompt_template=null \
Expand Down
Loading