pytorch · adheep04 · Jul 8, 2025 · Jul 11, 2025
diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
@@ -15,11 +15,10 @@
 from tests.common import TUNE_PATH
 from tests.recipes.utils import (
     llama3_2_vision_test_config,
-    llama3_test_config,
-    write_hf_ckpt_config,
+    MODEL_TEST_CONFIGS,
     write_hf_vision_ckpt_config,
 )
-from tests.test_utils import CKPT_MODEL_PATHS, gpu_test
+from tests.test_utils import CKPT_MODEL_PATHS, gpu_test, TOKENIZER_PATHS
 
 
 class TestEleutherEval:
@@ -48,42 +47,38 @@ def expected_vision_acc(self):
         }
 
     @pytest.mark.parametrize(
-        "eval_name, expected_acc, bsz",
+        "model_ckpt, eval_name, expected_acc, bsz",
         [
-            ("truthfulqa_gen", 0.1818, 4),
-            ("truthfulqa_mc2", 0.3015, 4),
+            ("llama3_hf_138m", "truthfulqa_gen", 0.1818, 4),
+            ("llama3_hf_138m", "truthfulqa_mc2", 0.3015, 4),
         ],
     )
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
     def test_torchtune_checkpoint_eval_results(
-        self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz
+        self, caplog, monkeypatch, tmpdir, eval_name, expected_acc, bsz, model_ckpt
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # explicitly setting limit to an odd number here to ensure generation tasks
         # work with KV-cacheing + bsz > 1 - we'll receive batches of size 4, 4, 3
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}'\
             tokenizer.prompt_template=null \
             limit=11 \
             dtype=fp32 \
             tasks=[{eval_name}]\
             batch_size={bsz} \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -108,28 +103,30 @@ def test_torchtune_checkpoint_eval_results(
     @pytest.mark.integration_test
     @pytest.mark.usefixtures("hide_correct_version_number")
     @gpu_test(gpu_count=1)
-    def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir, model_ckpt):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -142,35 +139,34 @@ def test_eval_recipe_errors_without_lm_eval(self, monkeypatch, tmpdir):
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     def test_eval_recipe_errors_with_quantization_hf_checkpointer(
-        self, monkeypatch, tmpdir
+        self, monkeypatch, tmpdir, model_ckpt
     ):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
-
-        # Config file needed for model conversion.
-        write_hf_ckpt_config(ckpt_dir)
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelHFCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
             quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \
             quantizer.groupsize=256 \
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -183,30 +179,34 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer(
 
     @pytest.mark.integration_test
     @gpu_test(gpu_count=1)
-    def test_eval_recipe_errors_with_qat_quantizer(self, monkeypatch, tmpdir):
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
+    def test_eval_recipe_errors_with_qat_quantizer(
+        self, monkeypatch, tmpdir, model_ckpt
+    ):
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         cmd = f"""
         tune run eleuther_eval \
             --config eleuther_evaluation \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            tokenizer._component_=torchtune.models.llama3.llama3_tokenizer \
-            tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
+            tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             limit=1 \
             dtype=fp32 \
             quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \
             quantizer.groupsize=32\
         """.split()
 
-        model_config = llama3_test_config()
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd = cmd + model_config
 
         monkeypatch.setattr(sys, "argv", cmd)
@@ -223,6 +223,9 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc
         ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
         ckpt_dir = ckpt_path.parent
 
+        # Config file needed for model conversion.
+        write_hf_vision_ckpt_config(ckpt_dir)
+
         cmd = f"""
         tune run eleuther_eval \
             --config llama3_2_vision/11B_evaluation \

diff --git a/tests/recipes/test_full_dpo_distributed.py b/tests/recipes/test_full_dpo_distributed.py
@@ -11,11 +11,7 @@
 import pytest
 import torch
 from tests.common import TUNE_PATH
-from tests.recipes.utils import (
-    dummy_stack_exchange_dataset_config,
-    MODEL_TEST_CONFIGS,
-    write_hf_ckpt_config,
-)
+from tests.recipes.utils import dummy_stack_exchange_dataset_config, MODEL_TEST_CONFIGS
 from tests.test_utils import (
     CKPT_MODEL_PATHS,
     gen_log_file_name,
@@ -48,8 +44,14 @@ def _get_test_config_overrides(self, dtype_str: str = "fp32", epochs: int = 2):
         ] + dummy_stack_exchange_dataset_config()
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     @gpu_test(gpu_count=2)
-    def test_training_state_on_resume(self, tmpdir, monkeypatch):
+    def test_training_state_on_resume(self, tmpdir, monkeypatch, model_ckpt):
         """Test whether the recipe state is correctly updated on resume. Since this
         is model agnostic, we should run this on the small model only. The test
         consists of three stages:
@@ -58,37 +60,26 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
             - Make sure final loss matches the expected value of a model successfully resumed from a ckpt
         """
 
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
         """.split()
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
         runpy.run_path(TUNE_PATH, run_name="__main__")
@@ -100,7 +91,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         )
 
         # We rename the model and we want to resume from epoch 0 (which trained for 1 epoch)
-        ckpt_to_resume_from = "epoch_0/model-00001-of-00001.bin"
+        ckpt_to_resume_from = "epoch_0/model-00001-of-00001.safetensors"
 
         # Now we resume training from epoch 1
         resumed_log_dir = (tmpdir / "resumed/").mkdir()
@@ -109,16 +100,12 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
             checkpointer.checkpoint_files=[{ckpt_to_resume_from}]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
@@ -135,44 +122,39 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         )
 
     @pytest.mark.integration_test
+    @pytest.mark.parametrize(
+        "model_ckpt",
+        [
+            ("llama3_hf_138m"),
+        ],
+    )
     @gpu_test(gpu_count=2)
     def test_training_state_on_resume_with_async_checkpointing(
-        self, tmpdir, monkeypatch
+        self, tmpdir, monkeypatch, model_ckpt
     ):
         """Same as above test but with async checkpointing."""
-        ckpt = "llama3_tune"
-        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
-        ckpt_dir = ckpt_path.parent
+        ckpt_dir = Path(CKPT_MODEL_PATHS[model_ckpt])
         log_file = gen_log_file_name(tmpdir)
-        tokenizer_path = Path(TOKENIZER_PATHS["llama3"])
-
-        # Config file needed for model conversion.
-        # Create a second copy for training resume
-        write_hf_ckpt_config(ckpt_dir)
-        write_hf_ckpt_config(tmpdir)
+        tokenizer_path = Path(TOKENIZER_PATHS[model_ckpt])
 
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \
             metric_logger.filename={log_file} \
             enable_async_checkpointing=True \
         """.split()
 
-        model_config = MODEL_TEST_CONFIGS["llama3"]
+        model_config = MODEL_TEST_CONFIGS[model_ckpt]
 
         cmd_1 = cmd_1 + self._get_test_config_overrides() + model_config
         monkeypatch.setattr(sys, "argv", cmd_1)
@@ -191,16 +173,12 @@ def test_training_state_on_resume_with_async_checkpointing(
         tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed \
             --config llama3_1/8B_full_dpo \
             output_dir={tmpdir} \
-            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
-            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.checkpoint_files=[model.safetensors]\
             checkpointer.output_dir={tmpdir} \
-            checkpointer.model_type=LLAMA3 \
-            ref_checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             ref_checkpointer.checkpoint_dir='{ckpt_dir}' \
-            ref_checkpointer.checkpoint_files=[{ckpt_path}]\
+            ref_checkpointer.checkpoint_files=[model.safetensors]\
             ref_checkpointer.output_dir={tmpdir} \
-            ref_checkpointer.model_type=LLAMA3 \
             resume_from_checkpoint=True \
             tokenizer.path='{tokenizer_path}' \
             tokenizer.prompt_template=null \