From d181a9d45a35c4f20aec93e90c3f5f8dd0d4c2c6 Mon Sep 17 00:00:00 2001
From: Mark <75219117+krammnic@users.noreply.github.com>
Date: Sun, 3 Aug 2025 23:54:50 +0300
Subject: [PATCH 1/3] fictionally commit

---
 README.md | 2 --
 1 file changed, 2 deletions(-)
diff --git a/README.md b/README.md
index 88225172f0..c8e97c0e05 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
 
-
-
 # torchtune
 
 [![Unit Test](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml)

From 4e84e65351b7b1eb8128b1a4686dbe819e99fd57 Mon Sep 17 00:00:00 2001
From: Mark <75219117+krammnic@users.noreply.github.com>
Date: Mon, 4 Aug 2025 00:02:10 +0300
Subject: [PATCH 2/3] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c8e97c0e05..763fe0340f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 
+
 # torchtune
 
 [![Unit Test](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtune/actions/workflows/unit_test.yaml)

From 372495fe902359a1906d07366f68fc7adff7fbd2 Mon Sep 17 00:00:00 2001
From: Mark <75219117+krammnic@users.noreply.github.com>
Date: Mon, 4 Aug 2025 00:03:02 +0300
Subject: [PATCH 3/3] Update rewards.py

---
 torchtune/dev/rl/rewards.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/torchtune/dev/rl/rewards.py b/torchtune/dev/rl/rewards.py
index 95c45ee9b0..62762cfe06 100644
--- a/torchtune/dev/rl/rewards.py
+++ b/torchtune/dev/rl/rewards.py
@@ -296,21 +296,13 @@ def batched_rewards(
     metadata = {"func_names": [f.__name__ for f in reward_funcs]}
 
     for b in range(batch_size):
-
         for g in range(grpo_size):
-
             answer = answers[b][g]
-
             text_completion = tokenizer.decode(completions[b, g].tolist())
-
             cot, potential_answer = extract_tags(f"<think>{text_completion}")
-
             for rw_idx, reward_func in enumerate(reward_funcs):
-
                 reward, success = reward_func(cot, answer, potential_answer)
-
                 rewards_tensor[b, g, rw_idx] += reward
-
                 successes_tensor[b, g, rw_idx] += success
 
     return rewards_tensor, successes_tensor, metadata