Skip to content

Commit

Permalink
Pull the dependency on protobuf out of the DemixingModule
Browse files Browse the repository at this point in the history
The DemixingModule is used for both encoding and decoding, but for decoding, we don't want to depend on protobuf.

This CL moves the conversion from protobuf UserMetadata out of the class and into a new util.

PiperOrigin-RevId: 725680089
  • Loading branch information
trevorknight authored and jwcullen committed Feb 11, 2025
1 parent fa7ec41 commit 7ae60a1
Show file tree
Hide file tree
Showing 11 changed files with 396 additions and 88 deletions.
1 change: 1 addition & 0 deletions iamf/cli/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ cc_library(
":rendering_mix_presentation_finalizer",
"//iamf/cli/proto:test_vector_metadata_cc_proto",
"//iamf/cli/proto:user_metadata_cc_proto",
"//iamf/cli/proto_conversion:downmixing_reconstruction_util",
"//iamf/cli/proto_conversion/proto_to_obu:arbitrary_obu_generator",
"//iamf/cli/proto_conversion/proto_to_obu:audio_element_generator",
"//iamf/cli/proto_conversion/proto_to_obu:audio_frame_generator",
Expand Down
61 changes: 25 additions & 36 deletions iamf/cli/demixing_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/cli/cli_util.h"
#include "iamf/cli/proto/audio_frame.pb.h"
#include "iamf/cli/proto/user_metadata.pb.h"
#include "iamf/cli/proto_conversion/channel_label_utils.h"
#include "iamf/common/utils/macros.h"
#include "iamf/common/utils/numeric_utils.h"
#include "iamf/obu/audio_element.h"
Expand All @@ -48,8 +46,8 @@ namespace {

using enum ChannelLabel::Label;

using DemxingMetadataForAudioElementId =
DemixingModule::DemxingMetadataForAudioElementId;
using DemixingMetadataForAudioElementId =
DemixingModule::DemixingMetadataForAudioElementId;

absl::Status S7ToS5DownMixer(const DownMixingParams& down_mixing_params,
LabelSamplesMap& label_to_samples) {
Expand Down Expand Up @@ -438,10 +436,12 @@ absl::Status Tf2ToT2Demixer(const DownMixingParams& down_mixing_params,
return absl::OkStatus();
}

// Helper to fill in the fields of `DemixingMetadataForAudioElementId`.
absl::Status FillRequiredDemixingMetadata(
const absl::flat_hash_set<ChannelLabel::Label>& labels_to_demix,
const AudioElementWithData& audio_element_with_data,
DemxingMetadataForAudioElementId& demixing_metadata) {
const SubstreamIdLabelsMap& substream_id_to_labels,
const LabelGainMap& label_to_output_gain,
DemixingMetadataForAudioElementId& demixing_metadata) {
auto& down_mixers = demixing_metadata.down_mixers;
auto& demixers = demixing_metadata.demixers;

Expand All @@ -450,10 +450,8 @@ absl::Status FillRequiredDemixingMetadata(
"`FillRequiredDemixingMetadata()` should only be called once per Audio "
"Element ID");
}
demixing_metadata.substream_id_to_labels =
audio_element_with_data.substream_id_to_labels;
demixing_metadata.label_to_output_gain =
audio_element_with_data.label_to_output_gain;
demixing_metadata.substream_id_to_labels = substream_id_to_labels;
demixing_metadata.label_to_output_gain = label_to_output_gain;

// Find the input surround number.
int input_surround_number = 0;
Expand Down Expand Up @@ -714,9 +712,10 @@ absl::Status ApplyDemixers(const std::list<Demixer>& demixers,

absl::Status GetDemixerMetadata(
const DecodedUleb128 audio_element_id,
const absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>&
const absl::flat_hash_map<DecodedUleb128,
DemixingMetadataForAudioElementId>&
audio_element_id_to_demixing_metadata,
const DemxingMetadataForAudioElementId*& demixing_metadata) {
const DemixingMetadataForAudioElementId*& demixing_metadata) {
const auto iter =
audio_element_id_to_demixing_metadata.find(audio_element_id);
if (iter == audio_element_id_to_demixing_metadata.end()) {
Expand Down Expand Up @@ -802,26 +801,15 @@ absl::Status DemixingModule::FindSamplesOrDemixedSamples(

absl::StatusOr<DemixingModule>
DemixingModule::CreateForDownMixingAndReconstruction(
const iamf_tools_cli_proto::UserMetadata& user_metadata,
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements) {
absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
const absl::flat_hash_map<
DecodedUleb128, DownmixingAndReconstructionConfig>&& id_to_config_map) {
absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
audio_element_id_to_demixing_metadata;
for (const auto& audio_frame_metadata :
user_metadata.audio_frame_metadata()) {
const auto audio_element_id = audio_frame_metadata.audio_element_id();
auto audio_element = audio_elements.find(audio_element_id);
if (audio_element == audio_elements.end()) {
return absl::InvalidArgumentError(
absl::StrCat("Audio Element ID= ", audio_element_id, " not found"));
}

absl::flat_hash_set<ChannelLabel::Label> input_channel_labels;
RETURN_IF_NOT_OK(ChannelLabelUtils::SelectConvertAndFillLabels(
audio_frame_metadata, input_channel_labels));

for (const auto& [audio_element_id, config] : id_to_config_map) {
RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
input_channel_labels, audio_element->second,
config.user_labels, config.substream_id_to_labels,
config.label_to_output_gain,
audio_element_id_to_demixing_metadata[audio_element_id]));
}

Expand All @@ -831,7 +819,7 @@ DemixingModule::CreateForDownMixingAndReconstruction(
absl::StatusOr<DemixingModule> DemixingModule::CreateForReconstruction(
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements) {
absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
audio_element_id_to_demixing_metadata;
for (const auto& [audio_element_id, audio_element_with_data] :
audio_elements) {
Expand All @@ -842,11 +830,12 @@ absl::StatusOr<DemixingModule> DemixingModule::CreateForReconstruction(
}

auto [iter, inserted] = audio_element_id_to_demixing_metadata.insert(
{audio_element_id, DemxingMetadataForAudioElementId()});
{audio_element_id, DemixingMetadataForAudioElementId()});
CHECK(inserted) << "The target map was initially empty, iterating over "
"`audio_elements` cannot produce a duplicate key.";
RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
*labels_to_reconstruct, audio_element_with_data, iter->second));
*labels_to_reconstruct, audio_element_with_data.substream_id_to_labels,
audio_element_with_data.label_to_output_gain, iter->second));
iter->second.down_mixers.clear();
}

Expand All @@ -858,7 +847,7 @@ absl::Status DemixingModule::DownMixSamplesToSubstreams(
LabelSamplesMap& input_label_to_samples,
absl::flat_hash_map<uint32_t, SubstreamData>&
substream_id_to_substream_data) const {
const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
audio_element_id_to_demixing_metadata_,
demixing_metadata));
Expand Down Expand Up @@ -972,7 +961,7 @@ absl::Status DemixingModule::DemixAudioSamples(
absl::Status DemixingModule::GetDownMixers(
DecodedUleb128 audio_element_id,
const std::list<Demixer>*& down_mixers) const {
const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
audio_element_id_to_demixing_metadata_,
demixing_metadata));
Expand All @@ -983,7 +972,7 @@ absl::Status DemixingModule::GetDownMixers(
absl::Status DemixingModule::GetDemixers(
DecodedUleb128 audio_element_id,
const std::list<Demixer>*& demixers) const {
const DemxingMetadataForAudioElementId* demixing_metadata = nullptr;
const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
audio_element_id_to_demixing_metadata_,
demixing_metadata));
Expand Down
26 changes: 17 additions & 9 deletions iamf/cli/demixing_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/node_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
Expand Down Expand Up @@ -93,13 +94,19 @@ typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);
*/
class DemixingModule {
public:
struct DemxingMetadataForAudioElementId {
struct DemixingMetadataForAudioElementId {
std::list<Demixer> demixers;
std::list<Demixer> down_mixers;
SubstreamIdLabelsMap substream_id_to_labels;
LabelGainMap label_to_output_gain;
};

struct DownmixingAndReconstructionConfig {
absl::flat_hash_set<ChannelLabel::Label> user_labels;
SubstreamIdLabelsMap substream_id_to_labels;
LabelGainMap label_to_output_gain;
};

/*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
*
* This is most useful from the context of an encoder. For example, to encode
Expand All @@ -110,15 +117,16 @@ class DemixingModule {
* information about the channels and the specific down-mixers and demixers
* needed for that audio element.
*
* \param user_metadata Input user metadata.
* \param audio_elements Audio elements. Used only for `audio_element_id`,
* `substream_id_to_labels`, and `label_to_output_gain`.
* \param id_to_config_map Map of Audio Element IDs to
* `DownmixingAndReconstructionConfig`, which contains the user-provided
* labels and the `substream_id_to_labels` and `label_to_output_gain` from
* the corresponding `AudioElementWithData`.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
const iamf_tools_cli_proto::UserMetadata& user_metadata,
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements);
const absl::flat_hash_map<DecodedUleb128,
DownmixingAndReconstructionConfig>&&
id_to_config_map);

/*!\brief Initializes for reconstruction (demixing) the input audio elements.
*
Expand Down Expand Up @@ -210,12 +218,12 @@ class DemixingModule {
* to demixing metadata.
*/
DemixingModule(
absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>&&
absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
audio_element_id_to_demixing_metadata)
: audio_element_id_to_demixing_metadata_(
std::move(audio_element_id_to_demixing_metadata)) {}

const absl::flat_hash_map<DecodedUleb128, DemxingMetadataForAudioElementId>
const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
audio_element_id_to_demixing_metadata_;
};

Expand Down
10 changes: 9 additions & 1 deletion iamf/cli/iamf_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "iamf/cli/parameters_manager.h"
#include "iamf/cli/proto/test_vector_metadata.pb.h"
#include "iamf/cli/proto/user_metadata.pb.h"
#include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
#include "iamf/cli/proto_conversion/proto_to_obu/arbitrary_obu_generator.h"
#include "iamf/cli/proto_conversion/proto_to_obu/audio_element_generator.h"
#include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
Expand Down Expand Up @@ -156,8 +157,15 @@ absl::StatusOr<IamfEncoder> IamfEncoder::Create(
// Down-mix the audio samples and then demix audio samples while decoding
// them. This is useful to create multi-layer audio elements and to determine
// the recon gain parameters and to measuring loudness.
const absl::StatusOr<absl::flat_hash_map<
DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
audio_element_id_to_demixing_metadata =
CreateAudioElementIdToDemixingMetadata(user_metadata, audio_elements);
if (!audio_element_id_to_demixing_metadata.ok()) {
return audio_element_id_to_demixing_metadata.status();
}
auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction(
user_metadata, audio_elements);
*std::move(audio_element_id_to_demixing_metadata));
if (!demixing_module.ok()) {
return demixing_module.status();
}
Expand Down
24 changes: 24 additions & 0 deletions iamf/cli/proto_conversion/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,30 @@ cc_library(
],
)

cc_library(
name = "downmixing_reconstruction_util",
srcs = ["downmixing_reconstruction_util.cc"],
hdrs = ["downmixing_reconstruction_util.h"],
visibility = [
"//iamf/cli:__pkg__",
"//iamf/cli/proto_conversion/tests:__pkg__",
"//iamf/cli/tests:__pkg__",
],
deps = [
":channel_label_utils",
"//iamf/cli:audio_element_with_data",
"//iamf/cli:channel_label",
"//iamf/cli:demixing_module",
"//iamf/common/utils:macros",
"//iamf/obu:types",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)

cc_library(
name = "lookup_tables",
hdrs = ["lookup_tables.h"],
Expand Down
63 changes: 63 additions & 0 deletions iamf/cli/proto_conversion/downmixing_reconstruction_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2025, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/

#include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"

#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/cli/proto_conversion/channel_label_utils.h"
#include "iamf/common/utils/macros.h"
#include "iamf/obu/types.h"

namespace iamf_tools {

absl::StatusOr<absl::flat_hash_map<
DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
CreateAudioElementIdToDemixingMetadata(
const iamf_tools_cli_proto::UserMetadata& user_metadata,
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements) {
absl::flat_hash_map<DecodedUleb128,
DemixingModule::DownmixingAndReconstructionConfig>
result;
// For each AudioFrameObuMetadata, we pull out the audio element ID, find
// the matching AudioElementWithData, and convert the proto labels to internal
// labels, and pair up the converted labels with `substream_id_to_labels` and
// `label_to_output_gain` from the AudioElementWithData.
for (const iamf_tools_cli_proto::AudioFrameObuMetadata&
user_audio_frame_metadata : user_metadata.audio_frame_metadata()) {
const auto audio_element_id = user_audio_frame_metadata.audio_element_id();
absl::flat_hash_map<DecodedUleb128, AudioElementWithData>::const_iterator
audio_element = audio_elements.find(audio_element_id);
if (audio_element == audio_elements.end()) {
return absl::InvalidArgumentError(
absl::StrCat("Audio Element ID= ", audio_element_id, " not found"));
}
absl::flat_hash_set<ChannelLabel::Label> user_channel_labels;
RETURN_IF_NOT_OK(ChannelLabelUtils::SelectConvertAndFillLabels(
user_audio_frame_metadata, user_channel_labels));
const auto& audio_element_with_data = audio_element->second;
result[audio_element_id] = {user_channel_labels,
audio_element_with_data.substream_id_to_labels,
audio_element_with_data.label_to_output_gain};
}

return result;
}

} // namespace iamf_tools
42 changes: 42 additions & 0 deletions iamf/cli/proto_conversion/downmixing_reconstruction_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2025, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/

#ifndef CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_
#define CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_

#include "absl/container/flat_hash_map.h"
#include "absl/status/statusor.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/obu/types.h"

namespace iamf_tools {

/*!\brief Creates a map of ID to DownmixingAndReconstructionConfig.
*
* \param user_metadata Proto UserMetadata, the source of ChannelLabels.
* \param audio_elements AudioElements to source SubStreamIdsToLabels and
* LabelToOutputGains.
* \return Map of Audio Element ID to DemixingMetadata on success. An error if
* any Audio Element ID is not found in `audio_elements`. An error if
* any labels fail to be converted.
*/
absl::StatusOr<absl::flat_hash_map<
DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
CreateAudioElementIdToDemixingMetadata(
const iamf_tools_cli_proto::UserMetadata& user_metadata,
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements);

} // namespace iamf_tools

#endif // CLI_PROTO_CONVERSION_DOWNMIXING_RECONSTRUCTION_UTIL_H_
Loading

0 comments on commit 7ae60a1

Please sign in to comment.