diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py index 9e70fbac075..ecbfdbb569e 100644 --- a/tests/experimental/test_bco_trainer.py +++ b/tests/experimental/test_bco_trainer.py @@ -33,6 +33,12 @@ @pytest.mark.low_priority class TestBCOTrainer(TrlTestCase): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "config_name", [ @@ -45,8 +51,7 @@ class TestBCOTrainer(TrlTestCase): ], ) @require_sklearn - def test_train(self, config_name): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train(self, config_name, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -80,9 +85,14 @@ def test_train(self, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_train_with_precompute(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_with_precompute(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -117,9 +127,14 @@ def test_train_with_precompute(self): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_train_eval(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_eval(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -145,9 +160,14 @@ def test_train_eval(self): trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_init_with_ref_model_is_model(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_init_with_ref_model_is_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -168,9 +188,14 @@ def test_init_with_ref_model_is_model(self): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_tokenize_and_process_tokens(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_tokenize_and_process_tokens(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -224,9 +249,14 @@ def test_tokenize_and_process_tokens(self): assert processed_dataset["completion_attention_mask"][0] == [1, 1, 1, 1, 1, 1, 1] assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645] + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_train_without_providing_ref_model(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_without_providing_ref_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -258,9 +288,14 @@ def test_train_without_providing_ref_model(self): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_train_udm(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_udm(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -307,10 +342,15 @@ def embed_prompt(input_ids, attention_mask, model): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_peft - def test_train_without_providing_ref_model_with_lora(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_without_providing_ref_model_with_lora(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -345,10 +385,15 @@ def test_train_without_providing_ref_model_with_lora(self): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_no_wandb - def test_generate_during_eval_no_wandb(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_generate_during_eval_no_wandb(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -376,10 +421,15 @@ def test_generate_during_eval_no_wandb(self): eval_dataset=dataset["test"], ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_peft - def test_lora_train_and_save(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_lora_train_and_save(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -409,9 +459,14 @@ def test_lora_train_and_save(self): # assert that the model is loaded without giving OSError AutoModelForCausalLM.from_pretrained(self.tmp_dir) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_compute_metrics(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) diff --git a/tests/experimental/test_grpo_with_replay_buffer_trainer.py b/tests/experimental/test_grpo_with_replay_buffer_trainer.py index 6ab0fdb2887..85304976dc0 100644 --- a/tests/experimental/test_grpo_with_replay_buffer_trainer.py +++ b/tests/experimental/test_grpo_with_replay_buffer_trainer.py @@ -96,11 +96,12 @@ def test_sample(self): @pytest.mark.low_priority class TestUpdateWithReplayBuffer: def setup_method(self): + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" config = GRPOWithReplayBufferConfig( replay_buffer_size=5, ) self.trainer = GRPOWithReplayBufferTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=config, train_dataset=None, @@ -251,7 +252,13 @@ def test_update_with_inputs_different_seq_len(self): @pytest.mark.low_priority class TestGRPOWithReplayBufferTrainer(TrlTestCase): - def test_training_with_replay_buffer(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_replay_buffer(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") # Guarantee that some rewards have 0 std @@ -271,7 +278,7 @@ def custom_reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOWithReplayBufferTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[custom_reward_func], args=training_args, train_dataset=dataset, diff --git a/tests/experimental/test_trainers_args.py b/tests/experimental/test_trainers_args.py index bd86bb61b5d..c04b291bae6 100644 --- a/tests/experimental/test_trainers_args.py +++ b/tests/experimental/test_trainers_args.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest from datasets import load_dataset from transformers import AutoTokenizer @@ -21,9 +22,14 @@ class TestTrainerArg(TrlTestCase): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_bco(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_bco(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") training_args = BCOConfig( diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py index 9944cfd9a4f..d6964a15ea9 100644 --- a/tests/test_activation_offloading.py +++ b/tests/test_activation_offloading.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest import torch from torch import nn from transformers import AutoModelForCausalLM @@ -28,11 +29,16 @@ class TestActivationOffloading(TrlTestCase): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_torch_accelerator @require_peft - def test_offloading_with_peft_models(self) -> None: + def test_offloading_with_peft_models(self, model_id) -> None: """Test that activation offloading works with PEFT models.""" - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) peft_config = LoraConfig( lora_alpha=16, @@ -75,9 +81,14 @@ def test_offloading_with_peft_models(self) -> None: f"Gradient mismatch for {name_orig}" ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_torch_accelerator - def test_noop_manager_with_offloading(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_noop_manager_with_offloading(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) inp = torch.randint(0, 100, (2, 10), device=torch_device) @@ -122,10 +133,15 @@ def test_min_offload_size(self): # The test passes if no errors occur, as we're mainly testing # that the logic handles both offloaded and non-offloaded tensors + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_torch_accelerator - def test_real_hf_model(self): + def test_real_hf_model(self, model_id): """Test with an actual HuggingFace model""" - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) # Create small input diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py index b5323e9292a..5ed2bb574ae 100644 --- a/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -66,9 +66,10 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi class TestWinRateCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.ref_model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset["train"] = dataset["train"].select(range(8)) @@ -224,8 +225,9 @@ def test_lora(self): class TestLogCompletionsCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset["train"] = dataset["train"].select(range(8)) @@ -318,8 +320,9 @@ def test_basic_comet(self): @require_mergekit class TestMergeModelCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") def test_callback(self): @@ -374,8 +377,9 @@ def test_every_checkpoint(self): class TestBEMACallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") diff --git a/tests/test_cli.py b/tests/test_cli.py index 823a1d85a23..6a9f838b924 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,23 +16,36 @@ from io import StringIO from unittest.mock import patch +import pytest import yaml from .testing_utils import TrlTestCase class TestCLI(TrlTestCase): - def test_dpo(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_dpo(self, model_id): from trl.cli import main - command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none" + command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_dpo_multiple_loss_types(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_dpo_multiple_loss_types(self, model_id): from trl.cli import main - command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5" + command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5" with patch("sys.argv", command.split(" ")): main() @@ -45,17 +58,29 @@ def test_env(self, mock_stdout): main() assert "TRL version: " in mock_stdout.getvalue().strip() - def test_grpo(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_grpo(self, model_id): from trl.cli import main - command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none" + command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_kto(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_kto(self, model_id): from trl.cli import main - command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none" + command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none" with patch("sys.argv", command.split(" ")): main() @@ -66,21 +91,39 @@ def test_reward(self): with patch("sys.argv", command.split(" ")): main() - def test_rloo(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_rloo(self, model_id): from trl.cli import main - command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none" + command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_sft(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_sft(self, model_id): from trl.cli import main - command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none" + command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_sft_config_file(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_sft_config_file(self, model_id): from trl.cli import main output_dir = os.path.join(self.tmp_dir, "output") @@ -88,7 +131,7 @@ def test_sft_config_file(self): # Create a temporary config file config_path = os.path.join(self.tmp_dir, "config.yaml") config_content = { - "model_name_or_path": "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + "model_name_or_path": model_id, "dataset_name": "trl-internal-testing/zen", "dataset_config": "standard_language_modeling", "report_to": "none", diff --git a/tests/test_dataset_formatting.py b/tests/test_dataset_formatting.py index a48f623683e..a95edd94d84 100644 --- a/tests/test_dataset_formatting.py +++ b/tests/test_dataset_formatting.py @@ -121,8 +121,9 @@ def test_get_formatting_func_from_dataset_with_unknown_format(self): @pytest.mark.filterwarnings("ignore::FutureWarning") class TestSetupChatFormat(TrlTestCase): def setup_method(self): - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.model = AutoModelForCausalLM.from_pretrained(model_id) # remove built-in chat_template to simulate a model having no chat_template self.tokenizer.chat_template = None diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index 154e32bf7e0..b47efd4b957 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -167,8 +167,13 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token - def test_train(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) training_args = DPOConfig( @@ -196,6 +201,12 @@ def test_train(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "loss_type", [ @@ -214,8 +225,7 @@ def test_train(self): "apo_down", ], ) - def test_train_loss_types(self, loss_type): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_loss_types(self, loss_type, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -308,12 +318,17 @@ def test_dpo_trainer_with_weighting(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - def test_train_with_multiple_loss_types(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_multiple_loss_types(self, model_id): """ Tests multi-loss combinations, loss type inference, and weight configuration. MPO combines DPO (sigmoid), BCO (bco_pair), and SFT (sft) losses. """ - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -642,14 +657,19 @@ def test_dpo_lora_save(self): except OSError: pytest.fail("Loading the saved peft adapter failed") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft @require_torch_gpu_if_bnb_not_multi_backend_enabled - def test_dpo_lora_bf16_autocast_llama(self): + def test_dpo_lora_bf16_autocast_llama(self, model_id): # Note this test only works on compute capability > 7 GPU devices from peft import LoraConfig from transformers import BitsAndBytesConfig - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) lora_config = LoraConfig( @@ -784,11 +804,16 @@ def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval): # save peft adapter trainer.save_model() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_dpo_lora_tags(self): + def test_dpo_lora_tags(self, model_id): from peft import LoraConfig - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) lora_config = LoraConfig( @@ -830,9 +855,14 @@ def test_dpo_lora_tags(self): for tag in ["dpo", "trl"]: assert tag in trainer.model.model_tags + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_dpo_tags(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_dpo_tags(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -1002,8 +1032,13 @@ def test_dpo_trainer_dtype(self): train_dataset=dummy_dataset["train"], ) - def test_dpo_loss_alpha_div_f(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_dpo_loss_alpha_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -1043,8 +1078,13 @@ def test_dpo_loss_alpha_div_f(self): ) assert torch.isfinite(losses).cpu().numpy().all() - def test_dpo_loss_js_div_f(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_dpo_loss_js_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -1237,10 +1277,16 @@ def test_padding_free(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + ref_model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") @@ -1272,8 +1318,13 @@ def dummy_compute_metrics(*args, **kwargs): assert trainer.state.log_history[-2]["eval_test"] == 0.0 - def test_train_with_length_desensitization(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_length_desensitization(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -1385,8 +1436,13 @@ def test_dpo_trainer_with_liger(self, beta, loss_type): assert output is not None assert "loss" not in output.keys() - def test_train_with_iterable_dataset(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_iterable_dataset(self, model_id): dataset = load_dataset( "trl-internal-testing/zen", "standard_preference", diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index 88d2579a69d..7a90d6e9532 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -128,17 +128,29 @@ def test_compute_entropy_all_masked(self): class TestGRPOTrainer(TrlTestCase): - def test_init_minimal(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_init_minimal(self, model_id): # Test that GRPOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) - def test_training(self, config_name): + def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") training_args = GRPOConfig( @@ -150,7 +162,7 @@ def test_training(self, config_name): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -167,8 +179,14 @@ def test_training(self, config_name): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"]) - def test_training_loss_types(self, loss_type): + def test_training_loss_types(self, loss_type, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -182,7 +200,7 @@ def test_training_loss_types(self, loss_type): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -199,7 +217,13 @@ def test_training_loss_types(self, loss_type): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_eval(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") training_args = GRPOConfig( @@ -213,7 +237,7 @@ def test_training_with_eval(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset["train"], @@ -222,7 +246,13 @@ def test_training_with_eval(self): trainer.train() - def test_training_multiple_iterations(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -235,7 +265,7 @@ def test_training_multiple_iterations(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -252,9 +282,15 @@ def test_training_multiple_iterations(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_training_peft(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_training_peft(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -288,13 +324,19 @@ def test_training_peft(self): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_training_peft_with_gradient_checkpointing(self): + def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") model = AutoModelForCausalLM.from_pretrained( - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model_id, dtype=torch.float32, # Use float32 for testing to avoid precision issues ) @@ -337,7 +379,13 @@ def test_training_peft_with_gradient_checkpointing(self): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." - def test_training_different_reward_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2" @@ -358,7 +406,7 @@ def test_training_different_reward_model(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, args=training_args, train_dataset=dataset, @@ -376,7 +424,13 @@ def test_training_different_reward_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_standard(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -393,7 +447,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -410,7 +464,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_conversational(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -428,7 +488,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -445,7 +505,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs(self, model_id): # Test that GRPOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -466,7 +532,7 @@ def reward_func2(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -483,7 +549,13 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_None_output(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -505,7 +577,7 @@ def non_applicable_reward_func(completions, **kwargs): ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[ applicable_reward_func, non_applicable_reward_func, @@ -527,7 +599,13 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_weights(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that GRPOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -549,7 +627,7 @@ def reward_func2(completions, **kwargs): reward_weights=[0.7, 0.3], # weight of reward_func1 and reward_func2 respectively ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -571,7 +649,13 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_mixed_reward_funcs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -588,7 +672,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"], args=training_args, train_dataset=dataset, @@ -605,7 +689,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_additional_column(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -628,7 +718,7 @@ def reward_func(completions, some_values, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -645,7 +735,13 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_sync_ref_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -659,7 +755,7 @@ def test_training_with_sync_ref_model(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -676,7 +772,13 @@ def test_training_with_sync_ref_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_beta_non_zero(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_beta_non_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -688,7 +790,7 @@ def test_training_beta_non_zero(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -706,11 +808,14 @@ def test_training_beta_non_zero(self): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @pytest.mark.parametrize( - "model_name", - ["trl-internal-testing/tiny-Qwen3ForCausalLM", "trl-internal-testing/tiny-Gemma2ForCausalLM"], - # Gemma2 has the input word embeddings and lm_head tied, Qwen3 does not + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + # Gemma2 has the input word embeddings and lm_head tied, Qwen3 does not + "trl-internal-testing/tiny-Gemma2ForCausalLM", + ], ) - def test_training_with_cast_lm_head_to_fp32(self, model_name): + def test_training_with_cast_lm_head_to_fp32(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -722,7 +827,7 @@ def test_training_with_cast_lm_head_to_fp32(self, model_name): cast_lm_head_to_fp32=True, ) trainer = GRPOTrainer( - model=model_name, + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -739,7 +844,13 @@ def test_training_with_cast_lm_head_to_fp32(self, model_name): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_entropy_filter(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_entropy_filter(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -751,7 +862,7 @@ def test_training_with_entropy_filter(self): top_entropy_quantile=0.2, ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -883,7 +994,13 @@ def test_training_vllm_importance_sampling_correction(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_additional_generation_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -901,7 +1018,7 @@ def test_training_with_additional_generation_kwargs(self): ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -956,8 +1073,14 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"]) - def test_training_scale_rewards(self, scale_rewards): + def test_training_scale_rewards(self, scale_rewards, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -970,7 +1093,7 @@ def test_training_scale_rewards(self, scale_rewards): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -987,8 +1110,14 @@ def test_training_scale_rewards(self, scale_rewards): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @patch("transformers.generation.utils.GenerationMixin.generate") - def test_training_with_mask_truncated_completions(self, mock_generate): + def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" # We mock the generate method because the model's random weights make it extremely unlikely to produce a @@ -1020,7 +1149,7 @@ def fake_generate(input_ids, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1037,7 +1166,13 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_mask_truncated_completions_all_masked(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update @@ -1058,7 +1193,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1075,7 +1210,13 @@ def test_training_with_mask_truncated_completions_all_masked(self): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." - def test_warning_raised_all_rewards_none(self, caplog): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1092,7 +1233,7 @@ def always_none_reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=always_none_reward_func, args=training_args, train_dataset=dataset, @@ -1104,7 +1245,13 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text - def test_training_num_generations_larger_than_batch_size(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1117,7 +1264,7 @@ def test_training_num_generations_larger_than_batch_size(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1134,7 +1281,13 @@ def test_training_num_generations_larger_than_batch_size(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_delta_clipping(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_delta_clipping(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1147,7 +1300,7 @@ def test_training_delta_clipping(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1164,7 +1317,13 @@ def test_training_delta_clipping(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_dataloader_workers(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1177,7 +1336,7 @@ def test_training_multiple_dataloader_workers(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1194,7 +1353,13 @@ def test_training_multiple_dataloader_workers(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_generation_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1207,7 +1372,7 @@ def test_training_with_generation_kwargs(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1224,7 +1389,13 @@ def test_training_with_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_reward_func_accessing_trainer_state(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1242,14 +1413,20 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, ) trainer.train() - def test_prepare_input_called_with_correct_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -1265,7 +1442,7 @@ def test_prepare_input_called_with_correct_data(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1631,7 +1808,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_sequence_importance_sampling(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_sequence_importance_sampling(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1645,7 +1828,7 @@ def test_training_sequence_importance_sampling(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1692,7 +1875,13 @@ def test_training_with_chat_template_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_mismatched_reward_processing_classes_length(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1711,14 +1900,20 @@ def test_mismatched_reward_processing_classes_length(self): with pytest.raises(ValueError, match="must match"): GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=single_processing_class, # only one, but need two args=training_args, train_dataset=dataset, ) - def test_correct_reward_processing_classes_list(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1740,7 +1935,7 @@ def test_correct_reward_processing_classes_list(self): correct_processing_classes = [processing_class1, processing_class2] trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=correct_processing_classes, args=training_args, @@ -1749,7 +1944,13 @@ def test_correct_reward_processing_classes_list(self): assert len(trainer.reward_processing_classes) == len(reward_models) - def test_single_reward_model_with_single_processing_class(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1764,7 +1965,7 @@ def test_single_reward_model_with_single_processing_class(self): training_args = GRPOConfig(output_dir=self.tmp_dir, report_to="none") trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, reward_processing_classes=single_processing_class, # single object for single reward model args=training_args, @@ -1776,7 +1977,13 @@ def test_single_reward_model_with_single_processing_class(self): class TestGSPOTokenTrainer(TrlTestCase): - def test_training(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1790,7 +1997,7 @@ def test_training(self): report_to="none", ) trainer = GSPOTokenTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index ec8f4165fca..74ab1261ba4 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -389,10 +389,16 @@ def test_kto_trainer_with_liger(self): if param.sum() != 0: assert not torch.equal(param, new_param) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + ref_model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference") diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py index 09f986ad558..a34aab347c7 100644 --- a/tests/test_online_dpo_trainer.py +++ b/tests/test_online_dpo_trainer.py @@ -80,7 +80,13 @@ def test_training(self, config_name): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] - def test_training_model_str(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_model_str(self, model_id): training_args = OnlineDPOConfig( output_dir=self.tmp_dir, per_device_train_batch_size=2, @@ -92,7 +98,7 @@ def test_training_model_str(self): dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") trainer = OnlineDPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=self.reward_model, args=training_args, train_dataset=dummy_dataset["train"], diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py index 70f087ac948..ee7032e84f1 100644 --- a/tests/test_orpo_trainer.py +++ b/tests/test_orpo_trainer.py @@ -142,9 +142,15 @@ def test_orpo_trainer_with_lora(self, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param, new_param) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 82810b1cca5..024d6dd1a7f 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -35,17 +35,29 @@ class TestRLOOTrainer(TrlTestCase): - def test_init_minimal(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_init_minimal(self, model_id): # Test that RLOOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) - def test_training(self, config_name): + def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") training_args = RLOOConfig( @@ -57,7 +69,7 @@ def test_training(self, config_name): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -74,7 +86,13 @@ def test_training(self, config_name): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_eval(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") training_args = RLOOConfig( @@ -88,7 +106,7 @@ def test_training_with_eval(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset["train"], @@ -97,7 +115,13 @@ def test_training_with_eval(self): trainer.train() - def test_training_multiple_iterations(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -110,7 +134,7 @@ def test_training_multiple_iterations(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -127,9 +151,15 @@ def test_training_multiple_iterations(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_training_peft(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_training_peft(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -163,13 +193,19 @@ def test_training_peft(self): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_training_peft_with_gradient_checkpointing(self): + def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") model = AutoModelForCausalLM.from_pretrained( - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model_id, dtype=torch.float32, # Use float32 for testing to avoid precision issues ) @@ -212,7 +248,13 @@ def test_training_peft_with_gradient_checkpointing(self): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." - def test_training_different_reward_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2" @@ -233,7 +275,7 @@ def test_training_different_reward_model(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, args=training_args, train_dataset=dataset, @@ -251,7 +293,13 @@ def test_training_different_reward_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_standard(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -268,7 +316,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -285,7 +333,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_conversational(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -303,7 +357,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -320,7 +374,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs(self, model_id): # Test that RLOOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -341,7 +401,7 @@ def reward_func2(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -358,7 +418,13 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_None_output(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -380,7 +446,7 @@ def non_applicable_reward_func(completions, **kwargs): ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[ applicable_reward_func, non_applicable_reward_func, @@ -402,7 +468,13 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_weights(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that RLOOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -424,7 +496,7 @@ def reward_func2(completions, **kwargs): reward_weights=[0.7, 0.3], # weight of reward_func1 and reward_func2 respectively ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -446,7 +518,13 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_mixed_reward_funcs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -463,7 +541,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"], args=training_args, train_dataset=dataset, @@ -480,7 +558,13 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_additional_column(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -503,7 +587,7 @@ def reward_func(completions, some_values, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -520,7 +604,13 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_sync_ref_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -534,7 +624,7 @@ def test_training_with_sync_ref_model(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -551,7 +641,13 @@ def test_training_with_sync_ref_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_beta_zero(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_beta_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( output_dir=self.tmp_dir, @@ -563,7 +659,7 @@ def test_training_beta_zero(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -660,7 +756,13 @@ def test_training_vllm_guided_decoding(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_additional_generation_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -678,7 +780,7 @@ def test_training_with_additional_generation_kwargs(self): ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -733,7 +835,13 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_normalized_advantages(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_normalized_advantages(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -746,7 +854,7 @@ def test_training_with_normalized_advantages(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -763,7 +871,13 @@ def test_training_with_normalized_advantages(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_clipped_rewards(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_clipped_rewards(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -776,7 +890,7 @@ def test_training_with_clipped_rewards(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -793,8 +907,14 @@ def test_training_with_clipped_rewards(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @patch("transformers.generation.utils.GenerationMixin.generate") - def test_training_with_mask_truncated_completions(self, mock_generate): + def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" # We mock the generate method because the model's random weights make it extremely unlikely to produce a @@ -826,7 +946,7 @@ def fake_generate(input_ids, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -843,7 +963,13 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_mask_truncated_completions_all_masked(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update @@ -864,7 +990,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -881,7 +1007,13 @@ def test_training_with_mask_truncated_completions_all_masked(self): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." - def test_warning_raised_all_rewards_none(self, caplog): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -898,7 +1030,7 @@ def always_none_reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=always_none_reward_func, args=training_args, train_dataset=dataset, @@ -910,7 +1042,13 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text - def test_training_num_generations_larger_than_batch_size(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -923,7 +1061,7 @@ def test_training_num_generations_larger_than_batch_size(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -940,7 +1078,13 @@ def test_training_num_generations_larger_than_batch_size(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_dataloader_workers(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -953,7 +1097,7 @@ def test_training_multiple_dataloader_workers(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -970,7 +1114,13 @@ def test_training_multiple_dataloader_workers(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_generation_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -983,7 +1133,7 @@ def test_training_with_generation_kwargs(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1000,7 +1150,13 @@ def test_training_with_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_reward_func_accessing_trainer_state(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1018,14 +1174,20 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, ) trainer.train() - def test_prepare_input_called_with_correct_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( output_dir=self.tmp_dir, @@ -1041,7 +1203,7 @@ def test_prepare_input_called_with_correct_data(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1340,7 +1502,13 @@ def test_training_with_chat_template_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_mismatched_reward_processing_classes_length(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1359,14 +1527,20 @@ def test_mismatched_reward_processing_classes_length(self): with pytest.raises(ValueError, match="must match"): RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=single_processing_class, # only one, but need two args=training_args, train_dataset=dataset, ) - def test_correct_reward_processing_classes_list(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1388,7 +1562,7 @@ def test_correct_reward_processing_classes_list(self): correct_processing_classes = [processing_class1, processing_class2] trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=correct_processing_classes, args=training_args, @@ -1397,7 +1571,13 @@ def test_correct_reward_processing_classes_list(self): assert len(trainer.reward_processing_classes) == len(reward_models) - def test_single_reward_model_with_single_processing_class(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1412,7 +1592,7 @@ def test_single_reward_model_with_single_processing_class(self): training_args = RLOOConfig(output_dir=self.tmp_dir, report_to="none") trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, reward_processing_classes=single_processing_class, # single object for single reward model args=training_args, diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index e3429809160..e8a1a703359 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -316,9 +316,15 @@ def test_train_gpt_oss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_model(self, model_id): # Instantiate the model - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model = AutoModelForCausalLM.from_pretrained(model_id) # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -341,7 +347,13 @@ def test_train_model(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_dft_loss(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_dft_loss(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -357,7 +369,7 @@ def test_train_dft_loss(self): eval_steps=3, ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -405,7 +417,13 @@ def test_train_moe_model_with_aux_loss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_formatting_func(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_formatting_func(self, model_id): # Dummy formatting function def formatting_prompts_func(example): chosen, rejected = example["chosen"], example["rejected"] @@ -417,7 +435,7 @@ def formatting_prompts_func(example): # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset, formatting_func=formatting_prompts_func, @@ -437,7 +455,13 @@ def formatting_prompts_func(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_model_dtype(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_model_dtype(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -448,9 +472,7 @@ def test_train_model_dtype(self): learning_rate=0.1, report_to="none", ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -472,10 +494,15 @@ def test_train_model_dtype(self): assert new_param.dtype == torch.float16 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_dense_with_peft_config_lora(self): + def test_train_dense_with_peft_config_lora(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] @@ -509,6 +536,12 @@ def test_train_dense_with_peft_config_lora(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "peft_type", [ @@ -518,9 +551,8 @@ def test_train_dense_with_peft_config_lora(self): ], ) @require_peft - def test_train_with_peft_config_prompt_tuning(self, peft_type): + def test_train_with_peft_config_prompt_tuning(self, peft_type, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.{n}" for n, _ in model.named_parameters()] @@ -533,7 +565,7 @@ def test_train_with_peft_config_prompt_tuning(self, peft_type): peft_config = PromptTuningConfig( task_type=TaskType.CAUSAL_LM, num_virtual_tokens=4, - tokenizer_name_or_path="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + tokenizer_name_or_path=model_id, ) elif peft_type == "prefix_tuning": peft_config = PrefixTuningConfig( @@ -607,10 +639,15 @@ def test_train_moe_with_peft_config(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_peft_model(self): + def test_train_peft_model(self, model_id): # Get the base model - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) # Get the base model parameter names @@ -644,10 +681,15 @@ def test_train_peft_model(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_dense_with_peft_config_and_gradient_checkpointing(self): + def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] @@ -718,10 +760,15 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_with_peft_model_and_gradient_checkpointing(self): + def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] model = get_peft_model(model, LoraConfig()) @@ -754,16 +801,20 @@ def test_train_with_peft_model_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_liger_kernel - def test_train_with_liger(self): + def test_train_with_liger(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, use_liger_kernel=True, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -779,7 +830,13 @@ def test_train_with_liger(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_non_chatml_conversational_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_non_chatml_conversational_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train") @@ -791,9 +848,7 @@ def rename_fields(example: list[dict]): # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -809,9 +864,14 @@ def rename_fields(example: list[dict]): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_pretokenized_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_pretokenized_data(self, model_id): # Get the dataset - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -839,15 +899,19 @@ def tokenize_example(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_iterable_dataset(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_iterable_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True) # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, max_steps=3, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -863,8 +927,14 @@ def test_train_with_iterable_dataset(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_flash_attn - def test_train_padding_free(self): + def test_train_padding_free(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -876,9 +946,7 @@ def test_train_padding_free(self): bf16=True, # flash_attention_2 only supports bf16 and fp16 report_to="none", ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -894,10 +962,16 @@ def test_train_padding_free(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"]) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_train_packing(self, packing_strategy): + def test_train_packing(self, packing_strategy, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -905,9 +979,7 @@ def test_train_packing(self, packing_strategy): training_args = SFTConfig( output_dir=self.tmp_dir, packing=True, packing_strategy=packing_strategy, max_length=10, report_to="none" ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -923,9 +995,15 @@ def test_train_packing(self, packing_strategy): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_eval_packing(self): + def test_eval_packing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -937,7 +1015,7 @@ def test_eval_packing(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -957,9 +1035,15 @@ def test_eval_packing(self): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 1 # w/ this dataset, we end up with 6 seqs + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_only_train_packing(self): + def test_only_train_packing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -972,7 +1056,7 @@ def test_only_train_packing(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -992,23 +1076,27 @@ def test_only_train_packing(self): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 2 # w/ this dataset, we end up with 6 seqs - def test_train_with_chat_template_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_chat_template_kwargs(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + tokenizer = AutoTokenizer.from_pretrained(model_id) # The following template is a simplified version of the Qwen chat template, where an additional argument # `role_capital` is used to control the capitalization of roles. tokenizer.chat_template = '{%- if messages[0]["role"] == "system" -%} {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\n" + messages[0]["content"] + "<|im_end|>\\n" }}{%- else -%} {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n" }}{%- endif -%}{%- for message in messages -%} {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) -%} {{ "<|im_start|>" + (message.role.upper() if role_capital else message.role) + "\\n" + message.content + "<|im_end|>\\n" }} {%- elif message.role == "assistant" -%} {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") }} {%- if message.content -%} {{ "\\n" + message.content }} {%- endif -%} {{ "<|im_end|>\\n" }} {%- elif message.role == "tool" -%} {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") -%} {{ "<|im_start|>" + ("USER" if role_capital else "user") }} {%- endif -%} {{ "\\n\\n" + message.content + "\\n" }} {%- if loop.last or (messages[loop.index0 + 1].role != "tool") -%} {{ "<|im_end|>\\n" }} {%- endif -%} {%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%} {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") + "\\n" }}{%- endif -%}' dataset.add_column("chat_template_kwargs", [{"role_capital": bool(i % 2)} for i in range(len(dataset))]) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1220,15 +1308,19 @@ def test_train_with_set_chat_template_from_path(self): original_template_content = f.read() assert template_content == original_template_content, "Chat template content does not match the original" - def test_train_toolcall_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_toolcall_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/toolcall", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1244,14 +1336,20 @@ def test_train_toolcall_data(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_eval(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_eval(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -1263,14 +1361,20 @@ def test_train_with_eval(self): # Check that the eval loss is not None assert trainer.state.log_history[0]["eval_loss"] is not None - def test_train_with_multiple_eval_dataset(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_multiple_eval_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset={"data1": dataset["test"], "data2": dataset["test"]}, @@ -1282,15 +1386,19 @@ def test_train_with_multiple_eval_dataset(self): assert trainer.state.log_history[-3]["eval_data1_loss"] is not None assert trainer.state.log_history[-2]["eval_data2_loss"] is not None - def test_train_with_gradient_checkpointing(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_gradient_checkpointing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, gradient_checkpointing=True, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1306,27 +1414,36 @@ def test_train_with_gradient_checkpointing(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_tag_added(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_tag_added(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - train_dataset=dataset, - ) + trainer = SFTTrainer(model=model_id, train_dataset=dataset) for tag in ["sft", "trl"]: assert tag in trainer.model.model_tags + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_tag_added_peft(self): + def test_tag_added_peft(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, train_dataset=dataset, peft_config=LoraConfig(), ) @@ -1545,14 +1662,20 @@ def test_train_vlm_text_only_data(self, model_id): else: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_prompt_tuning(self): + def test_prompt_tuning(self, model_id): """Test that SFT works with Prompt Tuning.""" dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset, peft_config=PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8), @@ -1577,9 +1700,15 @@ def test_prompt_tuning(self): else: raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft @require_bitsandbytes - def test_peft_model_with_quantization(self): + def test_peft_model_with_quantization(self, model_id): """SFTTrainer should not freeze layers of existing PeftModel. This test simulates a realistic QLoRA scenario where a quantized base model is first converted to a PeftModel, @@ -1587,7 +1716,6 @@ def test_peft_model_with_quantization(self): including the LoRA adapters, making training impossible. """ # Get the base model - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) # Simulate a realistic QLoRA setup by mocking quantization attributes @@ -1671,10 +1799,16 @@ def test_peft_model_with_quantization(self): "All original LoRA parameters should remain trainable after SFTTrainer initialization" ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_prompt_tuning_peft_model(self): + def test_prompt_tuning_peft_model(self, model_id): """Test that SFT works with Prompt Tuning and a pre-converted PeftModel""" - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model = AutoModelForCausalLM.from_pretrained(model_id) model = get_peft_model(model, PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8)) dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py index 014ec6ac5da..4da84f4a5a8 100644 --- a/tests/test_trainers_args.py +++ b/tests/test_trainers_args.py @@ -42,8 +42,13 @@ class TestTrainerArg(TrlTestCase): - def test_cpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_cpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = CPOConfig( @@ -83,8 +88,13 @@ def test_cpo(self): assert trainer.args.model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 - def test_dpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_dpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = DPOConfig( @@ -150,8 +160,13 @@ def test_dpo(self): assert trainer.args.rpo_alpha == 0.5 assert trainer.args.discopop_tau == 0.1 - def test_kto(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_kto(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") training_args = KTOConfig( @@ -195,9 +210,14 @@ def test_kto(self): assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("mixtures_coef_list", [False, True]) - def test_nash_md(self, mixtures_coef_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_nash_md(self, mixtures_coef_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) @@ -217,9 +237,14 @@ def test_nash_md(self, mixtures_coef_list): ) assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6]) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("beta_list", [False, True]) - def test_online_dpo(self, beta_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_online_dpo(self, beta_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) @@ -248,8 +273,13 @@ def test_online_dpo(self, beta_list): assert trainer.args.beta == (0.6 if not beta_list else [0.6, 0.7]) assert trainer.args.loss_type == "hinge" - def test_orpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_orpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = ORPOConfig( @@ -275,8 +305,13 @@ def test_orpo(self): assert not trainer.args.disable_dropout assert trainer.args.label_pad_token_id == -99 - def test_reward(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_reward(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") @@ -296,8 +331,13 @@ def test_reward(self): assert trainer.args.dataset_num_proc == 4 assert trainer.args.center_rewards_coefficient == 0.1 - def test_sft(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_sft(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") training_args = SFTConfig( self.tmp_dir, @@ -321,9 +361,14 @@ def test_sft(self): assert trainer.args.dataset_kwargs["append_concat_token"] assert trainer.args.eval_packing + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("alpha_list", [False, True]) - def test_xpo(self, alpha_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_xpo(self, alpha_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id)