@@ -595,8 +595,190 @@ def load_math_suite(**kwargs):
595595 )
596596```
597597
598+ ## Tracking
599+
600+ The tracking module provides a unified interface for experiment tracking across different backends. This allows you to switch between tracking systems or use multiple simultaneously without changing your training code.
601+
602+ ### Available Trackers
603+
604+ #### WandbTracker
605+
606+ Track experiments using Weights & Biases:
607+
608+ ``` python
609+ from verifiers.tracking import WandbTracker
610+
611+ tracker = WandbTracker(
612+ project = " my-project" ,
613+ name = " experiment-1" ,
614+ entity = " my-team" ,
615+ tags = [" baseline" , " grpo" ]
616+ )
617+ tracker.init()
618+ tracker.log_metrics({" accuracy" : 0.95 , " loss" : 0.05 }, step = 1 )
619+ tracker.finish()
620+ ```
621+
622+ #### CSVTracker
623+
624+ Track experiments locally using CSV files:
625+
626+ ``` python
627+ from verifiers.tracking import CSVTracker
628+
629+ tracker = CSVTracker(
630+ log_dir = " ./experiment_logs" ,
631+ project = " my-project" ,
632+ name = " experiment-1"
633+ )
634+ tracker.init() # Creates log directory and config.json
635+ tracker.log_metrics({" accuracy" : 0.95 }, step = 1 )
636+ tracker.log_table(" completions" , {
637+ " prompt" : [" p1" , " p2" ],
638+ " completion" : [" c1" , " c2" ],
639+ " reward" : [0.9 , 0.8 ]
640+ })
641+ ```
642+
643+ #### CompositeTracker
644+
645+ Use multiple trackers simultaneously:
646+
647+ ``` python
648+ from verifiers.tracking import CompositeTracker, WandbTracker, CSVTracker
649+
650+ tracker = CompositeTracker([
651+ WandbTracker(project = " my-project" ),
652+ CSVTracker(log_dir = " ./logs" )
653+ ])
654+ # All operations are forwarded to both trackers
655+ tracker.init()
656+ tracker.log_metrics({" loss" : 0.05 }, step = 1 )
657+ ```
658+
659+ #### NullTracker
660+
661+ No-op tracker for testing or when tracking is disabled:
662+
663+ ``` python
664+ from verifiers.tracking import NullTracker
665+
666+ tracker = NullTracker() # Does nothing
667+ tracker.log_metrics({" loss" : 0.05 }, step = 1 ) # No-op
668+ ```
669+
670+ #### MLFlowTracker
671+
672+ Track experiments using MLFlow:
673+
674+ ``` python
675+ from verifiers.tracking import MLFlowTracker
676+
677+ tracker = MLFlowTracker(
678+ experiment_name = " my-experiment" ,
679+ run_name = " run-1" ,
680+ tracking_uri = " http://localhost:5000" , # Optional
681+ tags = {" env" : " production" }
682+ )
683+ tracker.init()
684+ tracker.log_metrics({" accuracy" : 0.95 , " loss" : 0.05 }, step = 1 )
685+ tracker.log_config({" learning_rate" : 0.001 , " batch_size" : 32 })
686+ tracker.finish()
687+ ```
688+
689+ #### TensorBoardTracker
690+
691+ Track experiments using TensorBoard:
692+
693+ ``` python
694+ from verifiers.tracking import TensorBoardTracker
695+
696+ tracker = TensorBoardTracker(
697+ log_dir = " ./runs" ,
698+ comment = " grpo-experiment"
699+ )
700+ tracker.init()
701+ tracker.log_metrics({" accuracy" : 0.95 , " loss" : 0.05 }, step = 1 )
702+ tracker.log_config({" learning_rate" : 0.001 })
703+ tracker.finish()
704+ ```
705+
706+ ### Custom Trackers
707+
708+ Create your own tracker by extending the base ` Tracker ` class:
709+
710+ ``` python
711+ from verifiers.tracking import Tracker
712+ from typing import Any, Optional
713+
714+ class MyCustomTracker (Tracker ):
715+ def __init__ (self , endpoint : str , ** kwargs ):
716+ super ().__init__ (** kwargs)
717+ self .endpoint = endpoint
718+
719+ def init (self , ** kwargs ) -> None :
720+ # Initialize your tracking backend
721+ self ._initialized = True
722+
723+ def log_metrics (self , metrics : dict[str , float ],
724+ step : Optional[int ] = None , ** kwargs ) -> None :
725+ # Send metrics to your backend
726+ requests.post(f " { self .endpoint} /metrics " , json = metrics)
727+
728+ def log_table (self , table_name : str , data : dict[str , list[Any]],
729+ step : Optional[int ] = None , ** kwargs ) -> None :
730+ # Send table data to your backend
731+ requests.post(f " { self .endpoint} /tables/ { table_name} " , json = data)
732+
733+ def log_completions (self , prompts : list[str ], completions : list[str ],
734+ rewards : list[float ], step : Optional[int ] = None ,
735+ ** kwargs ) -> None :
736+ # Log completion samples
737+ data = {" prompts" : prompts, " completions" : completions, " rewards" : rewards}
738+ self .log_table(" completions" , data, step = step)
739+
740+ def log_config (self , config : dict[str , Any], ** kwargs ) -> None :
741+ super ().log_config(config)
742+ requests.post(f " { self .endpoint} /config " , json = config)
743+
744+ def finish (self , ** kwargs ) -> None :
745+ # Clean up resources
746+ pass
747+ ```
748+
749+ ### Integration with GRPOTrainer
750+
751+ The ` GRPOTrainer ` accepts a ` trackers ` parameter:
752+
753+ ``` python
754+ import verifiers as vf
755+
756+ # Option 1: Explicit trackers
757+ tracker = vf.CSVTracker(log_dir = " ./logs" )
758+ trainer = vf.GRPOTrainer(
759+ model = model,
760+ env = env,
761+ args = args,
762+ processing_class = tokenizer,
763+ trackers = [tracker]
764+ )
765+
766+ # Option 2: Auto-detection from args.report_to
767+ args.report_to = " wandb" # Automatically uses WandbTracker
768+ trainer = vf.GRPOTrainer(model = model, env = env, args = args,
769+ processing_class = tokenizer)
770+ ```
771+
598772## Best Practices
599773
774+ ### For Tracking
775+ - Use ` CSVTracker ` for local development and debugging
776+ - Use ` WandbTracker ` for team collaboration and cloud storage
777+ - Use ` MLFlowTracker ` when using MLFlow for experiment management
778+ - Use ` TensorBoardTracker ` for real-time metric visualization
779+ - Use ` CompositeTracker ` to log to multiple backends simultaneously
780+ - Create custom trackers for integration with internal tooling
781+
600782### For Rubrics
601783- Start simple with basic reward functions
602784- Use JudgeRubric when rule-based evaluation is insufficient
0 commit comments