Pull the dependency on protobuf out of the DemixingModule

The DemixingModule is used for both encoding and decoding, but for decoding, we don't want to depend on protobuf. This CL moves the conversion from protobuf UserMetadata out of the class and into a new util. PiperOrigin-RevId: 725680089
AOMediaCodec · Feb 11, 2025 · 7ae60a1 · 7ae60a1
1 parent fa7ec41
commit 7ae60a1
Show file tree

Hide file tree

Showing 11 changed files with 396 additions and 88 deletions.
diff --git a/iamf/cli/BUILD b/iamf/cli/BUILD
@@ -230,6 +230,7 @@ cc_library(
         ":rendering_mix_presentation_finalizer",
         "//iamf/cli/proto:test_vector_metadata_cc_proto",
         "//iamf/cli/proto:user_metadata_cc_proto",
+        "//iamf/cli/proto_conversion:downmixing_reconstruction_util",
         "//iamf/cli/proto_conversion/proto_to_obu:arbitrary_obu_generator",
         "//iamf/cli/proto_conversion/proto_to_obu:audio_element_generator",
         "//iamf/cli/proto_conversion/proto_to_obu:audio_frame_generator",

diff --git a/iamf/cli/demixing_module.cc b/iamf/cli/demixing_module.cc
@@ -31,10 +31,8 @@
 #include "iamf/cli/audio_element_with_data.h"
 #include "iamf/cli/audio_frame_decoder.h"
 #include "iamf/cli/audio_frame_with_data.h"
+#include "iamf/cli/channel_label.h"
 #include "iamf/cli/cli_util.h"
-#include "iamf/cli/proto/audio_frame.pb.h"
-#include "iamf/cli/proto/user_metadata.pb.h"
-#include "iamf/cli/proto_conversion/channel_label_utils.h"
 #include "iamf/common/utils/macros.h"
 #include "iamf/common/utils/numeric_utils.h"
 #include "iamf/obu/audio_element.h"
@@ -48,8 +46,8 @@ namespace {
 
 using enum ChannelLabel::Label;
 
-using DemxingMetadataForAudioElementId =
-    DemixingModule::DemxingMetadataForAudioElementId;
+using DemixingMetadataForAudioElementId =
+    DemixingModule::DemixingMetadataForAudioElementId;
 
 absl::Status S7ToS5DownMixer(const DownMixingParams& down_mixing_params,
                              LabelSamplesMap& label_to_samples) {
@@ -438,10 +436,12 @@ absl::Status Tf2ToT2Demixer(const DownMixingParams& down_mixing_params,
   return absl::OkStatus();
 }
 
+// Helper to fill in the fields of `DemixingMetadataForAudioElementId`.
 absl::Status FillRequiredDemixingMetadata(
     const absl::flat_hash_set<ChannelLabel::Label>& labels_to_demix,
-    const AudioElementWithData& audio_element_with_data,
-    DemxingMetadataForAudioElementId& demixing_metadata) {
+    const SubstreamIdLabelsMap& substream_id_to_labels,
+    const LabelGainMap& label_to_output_gain,
+    DemixingMetadataForAudioElementId& demixing_metadata) {
   auto& down_mixers = demixing_metadata.down_mixers;
   auto& demixers = demixing_metadata.demixers;
 
@@ -450,10 +450,8 @@ absl::Status FillRequiredDemixingMetadata(
         "`FillRequiredDemixingMetadata()` should only be called once per Audio "
         "Element ID");
   }
-  demixing_metadata.substream_id_to_labels =
-      audio_element_with_data.substream_id_to_labels;
-  demixing_metadata.label_to_output_gain =
-      audio_element_with_data.label_to_output_gain;
+  demixing_metadata.substream_id_to_labels = substream_id_to_labels;
+  demixing_metadata.label_to_output_gain = label_to_output_gain;
 
   // Find the input surround number.
   int input_surround_number = 0;
@@ -714,9 +712,10 @@ absl::Status ApplyDemixers(const std::list<Demixer>& demixers,
 
 absl::Status GetDemixerMetadata(
     const DecodedUleb128 audio_element_id,
-    const absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>&
+    const absl::flat_hash_map<DecodedUleb128,
+                              DemixingMetadataForAudioElementId>&
         audio_element_id_to_demixing_metadata,
-    const DemxingMetadataForAudioElementId*& demixing_metadata) {
+    const DemixingMetadataForAudioElementId*& demixing_metadata) {
   const auto iter =
       audio_element_id_to_demixing_metadata.find(audio_element_id);
   if (iter == audio_element_id_to_demixing_metadata.end()) {
@@ -802,26 +801,15 @@ absl::Status DemixingModule::FindSamplesOrDemixedSamples(
 
 absl::StatusOr<DemixingModule>
 DemixingModule::CreateForDownMixingAndReconstruction(
-    const iamf_tools_cli_proto::UserMetadata& user_metadata,
-    const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
-        audio_elements) {
-  absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
+    const absl::flat_hash_map<
+        DecodedUleb128, DownmixingAndReconstructionConfig>&& id_to_config_map) {
+  absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata;
-  for (const auto& audio_frame_metadata :
-       user_metadata.audio_frame_metadata()) {
-    const auto audio_element_id = audio_frame_metadata.audio_element_id();
-    auto audio_element = audio_elements.find(audio_element_id);
-    if (audio_element == audio_elements.end()) {
-      return absl::InvalidArgumentError(
-          absl::StrCat("Audio Element ID= ", audio_element_id, " not found"));
-    }
-
-    absl::flat_hash_set<ChannelLabel::Label> input_channel_labels;
-    RETURN_IF_NOT_OK(ChannelLabelUtils::SelectConvertAndFillLabels(
-        audio_frame_metadata, input_channel_labels));
 
+  for (const auto& [audio_element_id, config] : id_to_config_map) {
     RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
-        input_channel_labels, audio_element->second,
+        config.user_labels, config.substream_id_to_labels,
+        config.label_to_output_gain,
         audio_element_id_to_demixing_metadata[audio_element_id]));
   }
 
@@ -831,7 +819,7 @@ DemixingModule::CreateForDownMixingAndReconstruction(
 absl::StatusOr<DemixingModule> DemixingModule::CreateForReconstruction(
     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
         audio_elements) {
-  absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
+  absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata;
   for (const auto& [audio_element_id, audio_element_with_data] :
        audio_elements) {
@@ -842,11 +830,12 @@ absl::StatusOr<DemixingModule> DemixingModule::CreateForReconstruction(
     }
 
     auto [iter, inserted] = audio_element_id_to_demixing_metadata.insert(
-        {audio_element_id, DemxingMetadataForAudioElementId()});
+        {audio_element_id, DemixingMetadataForAudioElementId()});
     CHECK(inserted) << "The target map was initially empty, iterating over "
                        "`audio_elements` cannot produce a duplicate key.";
     RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
-        *labels_to_reconstruct, audio_element_with_data, iter->second));
+        *labels_to_reconstruct, audio_element_with_data.substream_id_to_labels,
+        audio_element_with_data.label_to_output_gain, iter->second));
     iter->second.down_mixers.clear();
   }
 
@@ -858,7 +847,7 @@ absl::Status DemixingModule::DownMixSamplesToSubstreams(
     LabelSamplesMap& input_label_to_samples,
     absl::flat_hash_map<uint32_t, SubstreamData>&
         substream_id_to_substream_data) const {
-  const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
+  const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));
@@ -972,7 +961,7 @@ absl::Status DemixingModule::DemixAudioSamples(
 absl::Status DemixingModule::GetDownMixers(
     DecodedUleb128 audio_element_id,
     const std::list<Demixer>*& down_mixers) const {
-  const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
+  const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));
@@ -983,7 +972,7 @@ absl::Status DemixingModule::GetDownMixers(
 absl::Status DemixingModule::GetDemixers(
     DecodedUleb128 audio_element_id,
     const std::list<Demixer>*& demixers) const {
-  const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
+  const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));

diff --git a/iamf/cli/demixing_module.h b/iamf/cli/demixing_module.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/node_hash_map.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
@@ -93,13 +94,19 @@ typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);
  */
 class DemixingModule {
  public:
-  struct DemxingMetadataForAudioElementId {
+  struct DemixingMetadataForAudioElementId {
     std::list<Demixer> demixers;
     std::list<Demixer> down_mixers;
     SubstreamIdLabelsMap substream_id_to_labels;
     LabelGainMap label_to_output_gain;
   };
 
+  struct DownmixingAndReconstructionConfig {
+    absl::flat_hash_set<ChannelLabel::Label> user_labels;
+    SubstreamIdLabelsMap substream_id_to_labels;
+    LabelGainMap label_to_output_gain;
+  };
+
   /*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
    *
    * This is most useful from the context of an encoder. For example, to encode
@@ -110,15 +117,16 @@ class DemixingModule {
    * information about the channels and the specific down-mixers and demixers
    * needed for that audio element.
    *
-   * \param user_metadata Input user metadata.
-   * \param audio_elements Audio elements. Used only for `audio_element_id`,
-   *        `substream_id_to_labels`, and `label_to_output_gain`.
+   * \param id_to_config_map Map of Audio Element IDs to
+   * `DownmixingAndReconstructionConfig`, which contains the user-provided
+   * labels and the `substream_id_to_labels` and `label_to_output_gain` from
+   * the corresponding `AudioElementWithData`.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
-      const iamf_tools_cli_proto::UserMetadata& user_metadata,
-      const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
-          audio_elements);
+      const absl::flat_hash_map<DecodedUleb128,
+                                DownmixingAndReconstructionConfig>&&
+          id_to_config_map);
 
   /*!\brief Initializes for reconstruction (demixing) the input audio elements.
    *
@@ -210,12 +218,12 @@ class DemixingModule {
    *        to demixing metadata.
    */
   DemixingModule(
-      absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>&&
+      absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
           audio_element_id_to_demixing_metadata)
       : audio_element_id_to_demixing_metadata_(
             std::move(audio_element_id_to_demixing_metadata)) {}
 
-  const absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
+  const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata_;
 };
 

diff --git a/iamf/cli/iamf_encoder.cc b/iamf/cli/iamf_encoder.cc
@@ -37,6 +37,7 @@
 #include "iamf/cli/parameters_manager.h"
 #include "iamf/cli/proto/test_vector_metadata.pb.h"
 #include "iamf/cli/proto/user_metadata.pb.h"
+#include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
 #include "iamf/cli/proto_conversion/proto_to_obu/arbitrary_obu_generator.h"
 #include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
 #include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
@@ -156,8 +157,15 @@ absl::StatusOr<IamfEncoder> IamfEncoder::Create(
   // Down-mix the audio samples and then demix audio samples while decoding
   // them. This is useful to create multi-layer audio elements and to determine
   // the recon gain parameters and to measuring loudness.
+  const absl::StatusOr<absl::flat_hash_map<
+      DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
+      audio_element_id_to_demixing_metadata =
+          CreateAudioElementIdToDemixingMetadata(user_metadata, audio_elements);
+  if (!audio_element_id_to_demixing_metadata.ok()) {
+    return audio_element_id_to_demixing_metadata.status();
+  }
   auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction(
-      user_metadata, audio_elements);
+      *std::move(audio_element_id_to_demixing_metadata));
   if (!demixing_module.ok()) {
     return demixing_module.status();
   }

diff --git a/iamf/cli/proto_conversion/BUILD b/iamf/cli/proto_conversion/BUILD
@@ -27,6 +27,30 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "downmixing_reconstruction_util",
+    srcs = ["downmixing_reconstruction_util.cc"],
+    hdrs = ["downmixing_reconstruction_util.h"],
+    visibility = [
+        "//iamf/cli:__pkg__",
+        "//iamf/cli/proto_conversion/tests:__pkg__",
+        "//iamf/cli/tests:__pkg__",
+    ],
+    deps = [
+        ":channel_label_utils",
+        "//iamf/cli:audio_element_with_data",
+        "//iamf/cli:channel_label",
+        "//iamf/cli:demixing_module",
+        "//iamf/common/utils:macros",
+        "//iamf/obu:types",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "lookup_tables",
     hdrs = ["lookup_tables.h"],

diff --git a/iamf/cli/proto_conversion/downmixing_reconstruction_util.cc b/iamf/cli/proto_conversion/downmixing_reconstruction_util.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * www.aomedia.org/license/patent.
+ */
+
+#include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_cat.h"
+#include "iamf/cli/audio_element_with_data.h"
+#include "iamf/cli/channel_label.h"
+#include "iamf/cli/demixing_module.h"
+#include "iamf/cli/proto_conversion/channel_label_utils.h"
+#include "iamf/common/utils/macros.h"
+#include "iamf/obu/types.h"
+
+namespace iamf_tools {
+
+absl::StatusOr<absl::flat_hash_map<
+    DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
+CreateAudioElementIdToDemixingMetadata(
+    const iamf_tools_cli_proto::UserMetadata& user_metadata,
+    const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
+        audio_elements) {
+  absl::flat_hash_map<DecodedUleb128,
+                      DemixingModule::DownmixingAndReconstructionConfig>
+      result;
+  // For each AudioFrameObuMetadata, we pull out the audio element ID, find
+  // the matching AudioElementWithData, and convert the proto labels to internal
+  // labels, and pair up the converted labels with `substream_id_to_labels` and
+  // `label_to_output_gain` from the AudioElementWithData.
+  for (const iamf_tools_cli_proto::AudioFrameObuMetadata&
+           user_audio_frame_metadata : user_metadata.audio_frame_metadata()) {
+    const auto audio_element_id = user_audio_frame_metadata.audio_element_id();
+    absl::flat_hash_map<DecodedUleb128, AudioElementWithData>::const_iterator
+        audio_element = audio_elements.find(audio_element_id);
+    if (audio_element == audio_elements.end()) {
+      return absl::InvalidArgumentError(
+          absl::StrCat("Audio Element ID= ", audio_element_id, " not found"));
+    }
+    absl::flat_hash_set<ChannelLabel::Label> user_channel_labels;
+    RETURN_IF_NOT_OK(ChannelLabelUtils::SelectConvertAndFillLabels(
+        user_audio_frame_metadata, user_channel_labels));
+    const auto& audio_element_with_data = audio_element->second;
+    result[audio_element_id] = {user_channel_labels,
+                                audio_element_with_data.substream_id_to_labels,
+                                audio_element_with_data.label_to_output_gain};
+  }
+
+  return result;
+}
+
+}  // namespace iamf_tools
diff --git a/iamf/cli/proto_conversion/downmixing_reconstruction_util.h b/iamf/cli/proto_conversion/downmixing_reconstruction_util.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * www.aomedia.org/license/patent.
+ */
+
+#ifndef CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_
+#define CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/status/statusor.h"
+#include "iamf/cli/audio_element_with_data.h"
+#include "iamf/cli/demixing_module.h"
+#include "iamf/obu/types.h"
+
+namespace iamf_tools {
+
+/*!\brief Creates a map of ID to DownmixingAndReconstructionConfig.
+ *
+ * \param user_metadata Proto UserMetadata, the source of ChannelLabels.
+ * \param audio_elements AudioElements to source SubStreamIdsToLabels and
+ *        LabelToOutputGains.
+ * \return Map of Audio Element ID to DemixingMetadata on success. An error if
+ *         any Audio Element ID is not found in `audio_elements`. An error if
+ *         any labels fail to be converted.
+ */
+absl::StatusOr<absl::flat_hash_map<
+    DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
+CreateAudioElementIdToDemixingMetadata(
+    const iamf_tools_cli_proto::UserMetadata& user_metadata,
+    const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
+        audio_elements);
+
+}  // namespace iamf_tools
+
+#endif  // CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_