[refactor] reporting + meter (#844)

Sasha Sheng · facebook-github-bot · commit 99b1f749792a · 2021-04-04T17:31:50.000-07:00
Summary: * Clean the API up to prep for moving some of the report/meter updating inside the base_model for pytorch lightning early stopping/checkpointing. Pull Request resolved: #844 Reviewed By: vedanuj Differential Revision: D27486844 Pulled By: ytsheng fbshipit-source-id: 759fbaccdc1ce2ef2a8e736fa179afeec48b89dc
diff --git a/mmf/common/meter.py b/mmf/common/meter.py
@@ -3,6 +3,9 @@
 from collections import defaultdict, deque
 
 import torch
+from mmf.common.registry import registry
+from mmf.utils.distributed import reduce_dict
+from mmf.utils.general import scalarize_dict_values
 
 
 class SmoothedValue:
@@ -55,11 +58,47 @@ def __init__(self, delimiter=", "):
         self.meters = defaultdict(SmoothedValue)
         self.delimiter = delimiter
 
-    def update(self, update_dict, batch_size):
-        for k, v in update_dict.items():
-            if isinstance(v, torch.Tensor):
-                if v.dim() != 0:
-                    v = v.mean()
+    def update_from_report(self, report, should_update_loss=True):
+        """
+        this method updates the provided meter with report info.
+        this method by default handles reducing metrics.
+
+        Args:
+            report (Report): report object which content is used to populate
+            the current meter
+
+        Usage::
+
+        >>> meter = Meter()
+        >>> report = Report(prepared_batch, model_output)
+        >>> meter.update_from_report(report)
+        """
+        if hasattr(report, "metrics"):
+            metrics_dict = report.metrics
+            reduced_metrics_dict = reduce_dict(metrics_dict)
+
+        if should_update_loss:
+            loss_dict = report.losses
+            reduced_loss_dict = reduce_dict(loss_dict)
+
+        with torch.no_grad():
+            meter_update_dict = {}
+            if should_update_loss:
+                meter_update_dict = scalarize_dict_values(reduced_loss_dict)
+                total_loss_key = report.dataset_type + "/total_loss"
+                total_loss = sum(meter_update_dict.values())
+                registry.register(total_loss_key, total_loss)
+                meter_update_dict.update({total_loss_key: total_loss})
+
+            if hasattr(report, "metrics"):
+                metrics_dict = scalarize_dict_values(reduced_metrics_dict)
+                meter_update_dict.update(**metrics_dict)
+
+            self._update(meter_update_dict, report.batch_size)
+
+    def _update(self, update_dict, batch_size):
+        scalarized = scalarize_dict_values(update_dict)
+        for k, v in scalarized.items():
             # Skipping .item() call
             # __format__() for tensor has .item
             # Therefore it will implicitly get called when needed
diff --git a/mmf/trainers/core/evaluation_loop.py b/mmf/trainers/core/evaluation_loop.py
@@ -39,7 +39,7 @@ def evaluation_loop(
                     model_output = self.model(prepared_batch)
                     report = Report(prepared_batch, model_output)
 
-                    self.update_meter(report, meter)
+                    meter.update_from_report(report)
 
                     # accumulate necessary params for metric calculation
                     if combined_report is None:
@@ -52,8 +52,8 @@ def evaluation_loop(
                         )
                         combined_report.batch_size += report.batch_size
 
-                    # Each node generates a separate copy of predict JSON from the report,
-                    # which will be used to evaluate dataset-level metrics
+                    # Each node generates a separate copy of predict JSON from the
+                    # report, which will be used to evaluate dataset-level metrics
                     # (such as mAP in object detection or CIDEr in image captioning)
                     # Since `reporter.add_to_report` changes report keys (e.g. scores),
                     # do this after `combined_report.accumulate_tensor_fields_and_loss`
@@ -73,7 +73,7 @@ def evaluation_loop(
                 combined_report.prediction_report = reporter.report
 
                 combined_report.metrics = self.metrics(combined_report, combined_report)
-                self.update_meter(combined_report, meter, eval_mode=True)
+                meter.update_from_report(combined_report, should_update_loss=False)
 
             # enable train mode again
             self.model.train()
diff --git a/mmf/trainers/core/reporting.py b/mmf/trainers/core/reporting.py
diff --git a/mmf/trainers/core/training_loop.py b/mmf/trainers/core/training_loop.py
@@ -6,6 +6,7 @@
 from typing import Any, Dict
 
 import torch
+from mmf.common.meter import Meter
 from mmf.common.registry import registry
 from mmf.common.report import Report
 from mmf.common.sample import to_device
@@ -21,6 +22,7 @@ class TrainerTrainingLoopMixin(ABC):
     current_epoch: int = 0
     current_iteration: int = 0
     num_updates: int = 0
+    meter: Meter = Meter()
 
     def training_loop(self) -> None:
         self.max_updates = self._calculate_max_updates()
@@ -118,7 +120,7 @@ def run_training_epoch(self) -> None:
                         combined_report.metrics = self.metrics(
                             combined_report, combined_report
                         )
-                    self.update_meter(combined_report, self.meter)
+                    self.meter.update_from_report(combined_report)
 
                 self.on_update_end(
                     report=combined_report, meter=self.meter, should_log=should_log
diff --git a/mmf/trainers/mmf_trainer.py b/mmf/trainers/mmf_trainer.py
@@ -16,7 +16,6 @@
 from mmf.trainers.core.device import TrainerDeviceMixin
 from mmf.trainers.core.evaluation_loop import TrainerEvaluationLoopMixin
 from mmf.trainers.core.profiling import TrainerProfilingMixin
-from mmf.trainers.core.reporting import TrainerReportingMixin
 from mmf.trainers.core.training_loop import TrainerTrainingLoopMixin
 from mmf.utils.build import build_model, build_optimizer
 from mmf.utils.general import print_model_parameters
@@ -32,7 +31,6 @@ class MMFTrainer(
     TrainerTrainingLoopMixin,
     TrainerDeviceMixin,
     TrainerEvaluationLoopMixin,
-    TrainerReportingMixin,
     TrainerProfilingMixin,
     BaseTrainer,
 ):
diff --git a/mmf/utils/general.py b/mmf/utils/general.py
@@ -9,12 +9,12 @@
 import time
 import warnings
 from bisect import bisect
-from typing import Any, Callable
+from typing import Any, Callable, Dict
 
 import torch
 from mmf.utils.distributed import get_rank, get_world_size, is_xla
 from mmf.utils.file_io import PathManager
-from torch import nn
+from torch import Tensor, nn
 
 
 logger = logging.getLogger(__name__)
@@ -446,3 +446,20 @@ def retry_n(n: int, fn: Callable, *args, log_tries=False, **kwargs) -> Any:
                 raise
 
     return output
+
+
+def scalarize_dict_values(dict_with_tensors: Dict[str, Tensor]):
+    """
+    this method returns a new dict where the values of
+    `dict_with_tensors` would be a scalar
+
+    Returns:
+        Dict: a new dict with scalarized values
+    """
+    dict_with_scalar_tensors = {}
+    for key, val in dict_with_tensors.items():
+        if torch.is_tensor(val):
+            if val.dim() != 0:
+                val = val.mean()
+        dict_with_scalar_tensors[key] = val
+    return dict_with_scalar_tensors
diff --git a/tests/common/test_meter.py b/tests/common/test_meter.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import unittest
+
+import torch
+from mmf.common.meter import Meter
+from mmf.common.report import Report
+from mmf.common.sample import SampleList
+
+
+class TestMeter(unittest.TestCase):
+    def test_meter_update_from_report(self):
+        meter = Meter()
+        prepared_batch = SampleList(
+            {"targets": torch.tensor([1, 2, 3, 4]), "dataset_type": "val"}
+        )
+        for idx in range(5):
+            model_output = {
+                "scores": torch.tensor([0, 1, 2, 3]),
+                "losses": {"loss": float(idx)},
+            }
+            report = Report(prepared_batch, model_output)
+            meter.update_from_report(report)
+
+        self.assertEqual(meter.loss.global_avg, 2.0)
+        self.assertEqual(meter.loss.avg, 2.0)