add sample log for toy_rl/metrics; support sample log for log_stream

DNXie · DNXie · commit 9a52da50624f · 2025-10-06T18:05:48.000-07:00
diff --git a/apps/toy_rl/toy_metrics/main.py b/apps/toy_rl/toy_metrics/main.py
@@ -72,6 +72,19 @@ async def generate_step(self, step: int, substep: int):
             record_metric("policy/count_sequences_completed", 1, Reduce.SUM)
             record_metric("policy/avg_tokens_per_sample", value, Reduce.MEAN)
 
+            # Sample-level log (e.g. rollout info)
+            record_metric(
+                "rollout/samples",
+                {
+                    "rank": rank,
+                    "step": step,
+                    "substep": substep,
+                    "tokens_generated": value,
+                    "max_tokens": 50,
+                    "timestamp": time.time(),
+                },
+                Reduce.SAMPLE,
+            )
             print(f"🎯 Gen rank {rank}: Step {step}.{substep}, tokens={value}")
 
         return value
diff --git a/src/forge/observability/metrics.py b/src/forge/observability/metrics.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import asyncio
 import heapq
 import itertools
 import logging
@@ -664,7 +665,12 @@ def push(self, metric: Metric) -> None:
 
         # For PER_RANK_NO_REDUCE backends: stream immediately
         for backend in self.per_rank_no_reduce_backends:
-            backend.log_stream(metric=metric, step=self.step)
+            if metric.reduction == Reduce.SAMPLE:
+                # Wrap singleton Metric into expected {key: [list_of_dicts]} format
+                sample = {metric.key: [metric.value]}
+                asyncio.create_task(backend.log_samples(sample, self.step))
+            else:
+                backend.log_stream(metric=metric, step=self.step)
 
         # Always accumulate for reduction and state return
         key = metric.key