From d181a9d45a35c4f20aec93e90c3f5f8dd0d4c2c6 Mon Sep 17 00:00:00 2001 From: Mark <75219117+krammnic@users.noreply.github.com> Date: Sun, 3 Aug 2025 23:54:50 +0300 Subject: [PATCH 1/3] fictionally commit --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 88225172f0..c8e97c0e05 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ - - # torchtune [![Unit Test](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml) From 4e84e65351b7b1eb8128b1a4686dbe819e99fd57 Mon Sep 17 00:00:00 2001 From: Mark <75219117+krammnic@users.noreply.github.com> Date: Mon, 4 Aug 2025 00:02:10 +0300 Subject: [PATCH 2/3] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c8e97c0e05..763fe0340f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ + # torchtune [![Unit Test](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml) From 372495fe902359a1906d07366f68fc7adff7fbd2 Mon Sep 17 00:00:00 2001 From: Mark <75219117+krammnic@users.noreply.github.com> Date: Mon, 4 Aug 2025 00:03:02 +0300 Subject: [PATCH 3/3] Update rewards.py --- torchtune/dev/rl/rewards.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/torchtune/dev/rl/rewards.py b/torchtune/dev/rl/rewards.py index 95c45ee9b0..62762cfe06 100644 --- a/torchtune/dev/rl/rewards.py +++ b/torchtune/dev/rl/rewards.py @@ -296,21 +296,13 @@ def batched_rewards( metadata = {"func_names": [f.__name__ for f in reward_funcs]} for b in range(batch_size): - for g in range(grpo_size): - answer = answers[b][g] - text_completion = tokenizer.decode(completions[b, g].tolist()) - cot, potential_answer = extract_tags(f"{text_completion}") - for rw_idx, reward_func in enumerate(reward_funcs): - reward, success = reward_func(cot, answer, potential_answer) - rewards_tensor[b, g, rw_idx] += reward - successes_tensor[b, g, rw_idx] += success return rewards_tensor, successes_tensor, metadata