Add an internal product model manager for signal loss

czxttkl · facebook-github-bot · commit 4ab19c558f1c · 2021-11-23T23:47:03.000-08:00
Summary: Since the code will become more and more specific to the ads signal loss use case, it is better to create a dedicated version which does not sync to OSS.

Reviewed By: j-jiafei

Differential Revision: D32591299

fbshipit-source-id: 02600fd68062a24ff22933e91faae3804a9da2fa
diff --git a/reagent/core/types.py b/reagent/core/types.py
@@ -1063,6 +1063,13 @@ class RewardNetworkOutput(TensorDataClass):
     predicted_reward: torch.Tensor
 
 
+@dataclass
+class SyntheticRewardNetworkOutput(TensorDataClass):
+    predicted_reward: torch.Tensor
+    mask: torch.Tensor
+    output: torch.Tensor
+
+
 @dataclass
 class FrechetSortConfig:
     shape: float
diff --git a/reagent/models/synthetic_reward.py b/reagent/models/synthetic_reward.py
@@ -262,7 +262,11 @@ def forward(self, training_batch: rlt.MemoryNetworkInput):
         output_masked = output * mask
 
         pred_reward = output_masked.sum(dim=1, keepdim=True)
-        return rlt.RewardNetworkOutput(predicted_reward=pred_reward)
+        return rlt.SyntheticRewardNetworkOutput(
+            predicted_reward=pred_reward,
+            mask=mask,
+            output=output,
+        )
 
     def export_mlp(self):
         """
diff --git a/reagent/training/__init__.py b/reagent/training/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
-
+from reagent.core.fb_checker import IS_FB_ENVIRONMENT
 from reagent.training.c51_trainer import C51Trainer
 from reagent.training.cem_trainer import CEMTrainer
 from reagent.training.cfeval import BanditRewardNetTrainer
@@ -68,3 +68,10 @@
     "PPOTrainer",
     "PPOTrainerParameters",
 ]
+
+if IS_FB_ENVIRONMENT:
+    from reagent.training.fb.signal_loss_reward_decomp_trainer import (  # noqa
+        SignalLossRewardDecompTrainer,
+    )
+
+    __all__.append("SignalLossRewardDecompTrainer")