Optionally disable APM limiter in AudioMixer.
The APM limiter is a component for keeping the audio from clipping by smoothly reducing the amplitude of the audio samples. It can be rather expensive because of band-splitting & merging. Also, experiments indicate that it is of questionable benefit (adding several sources of human speech almost never cause clipping). To optionally disable the limiter, this CL does some refactoring on the (quite large) AudioMixerImpl. Functionality related to actual addition of frames and handling AudioFrame meta-data (sample_rate, num_channels, samples_per_channel, time_stamp, elapsed_time_ms) is broken out in a new sub-component called FrameCombiner. The FrameCombiner is initialized with a 'use_limiter' flag. To create a mixer without using the APM limiter Inside of FrameCombiner, the meta-data handling and the audio sample addition are kept divided from each other. This also fixes a few minor GN issues so that warnings do not have to be suppressed. BUG=webrtc:7167 Review-Url: https://codereview.webrtc.org/2692333002 Cr-Commit-Position: refs/heads/master@{#16742}
This commit is contained in:
parent
76094ee697
commit
24899e58ec
@ -21,11 +21,15 @@ rtc_static_library("audio_mixer_impl") {
|
||||
"audio_mixer_impl.h",
|
||||
"default_output_rate_calculator.cc",
|
||||
"default_output_rate_calculator.h",
|
||||
"frame_combiner.cc",
|
||||
"frame_combiner.h",
|
||||
"output_rate_calculator.h",
|
||||
]
|
||||
|
||||
public = [
|
||||
"audio_mixer_impl.h",
|
||||
"default_output_rate_calculator.h", # For creating a mixer with limiter disabled.
|
||||
"frame_combiner.h",
|
||||
]
|
||||
|
||||
public_deps = [
|
||||
@ -61,14 +65,11 @@ rtc_static_library("audio_frame_manipulator") {
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_source_set("audio_mixer_unittests") {
|
||||
# When enabled, fails with error audio_mixer_impl_unittest.cc includes a
|
||||
# private header. Fie default_output_rate_calculator.h is private to the
|
||||
# target :audio_mixer_impl
|
||||
check_includes = false
|
||||
testonly = true
|
||||
sources = [
|
||||
"audio_frame_manipulator_unittest.cc",
|
||||
"audio_mixer_impl_unittest.cc",
|
||||
"frame_combiner_unittest.cc",
|
||||
]
|
||||
deps = [
|
||||
":audio_frame_manipulator",
|
||||
@ -79,12 +80,5 @@ if (rtc_include_tests) {
|
||||
"../../test:test_support",
|
||||
"//testing/gmock",
|
||||
]
|
||||
|
||||
# TODO(jschuh): bugs.webrtc.org/1348: fix this warning.
|
||||
configs += [ "//build/config/compiler:no_size_t_to_int_warning" ]
|
||||
if (!build_with_chromium && is_clang) {
|
||||
# Suppress warnings from the Chromium Clang plugin (bugs.webrtc.org/163).
|
||||
suppressed_configs += [ "//build/config/clang:find_bad_constructs" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -15,7 +15,6 @@
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
#include "webrtc/audio/utility/audio_frame_operations.h"
|
||||
#include "webrtc/base/logging.h"
|
||||
#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
|
||||
#include "webrtc/modules/audio_mixer/default_output_rate_calculator.h"
|
||||
@ -79,45 +78,6 @@ void RampAndUpdateGain(
|
||||
}
|
||||
}
|
||||
|
||||
// Mix the AudioFrames stored in audioFrameList into mixed_audio.
|
||||
int32_t MixFromList(AudioFrame* mixed_audio,
|
||||
const AudioFrameList& audio_frame_list,
|
||||
bool use_limiter) {
|
||||
if (audio_frame_list.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (audio_frame_list.size() == 1) {
|
||||
mixed_audio->timestamp_ = audio_frame_list.front()->timestamp_;
|
||||
mixed_audio->elapsed_time_ms_ = audio_frame_list.front()->elapsed_time_ms_;
|
||||
} else {
|
||||
// TODO(wu): Issue 3390.
|
||||
// Audio frame timestamp is only supported in one channel case.
|
||||
mixed_audio->timestamp_ = 0;
|
||||
mixed_audio->elapsed_time_ms_ = -1;
|
||||
}
|
||||
|
||||
for (const auto& frame : audio_frame_list) {
|
||||
RTC_DCHECK_EQ(mixed_audio->sample_rate_hz_, frame->sample_rate_hz_);
|
||||
RTC_DCHECK_EQ(
|
||||
frame->samples_per_channel_,
|
||||
static_cast<size_t>((mixed_audio->sample_rate_hz_ *
|
||||
webrtc::AudioMixerImpl::kFrameDurationInMs) /
|
||||
1000));
|
||||
|
||||
// Mix |f.frame| into |mixed_audio|, with saturation protection.
|
||||
// These effect is applied to |f.frame| itself prior to mixing.
|
||||
if (use_limiter) {
|
||||
// This is to avoid saturation in the mixing. It is only
|
||||
// meaningful if the limiter will be used.
|
||||
AudioFrameOperations::ApplyHalfGain(frame);
|
||||
}
|
||||
RTC_DCHECK_EQ(frame->num_channels_, mixed_audio->num_channels_);
|
||||
AudioFrameOperations::Add(*frame, mixed_audio);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList(
|
||||
AudioMixerImpl::Source const* audio_source,
|
||||
AudioMixerImpl::SourceStatusList const* audio_source_list) {
|
||||
@ -139,68 +99,40 @@ AudioMixerImpl::SourceStatusList::iterator FindSourceInList(
|
||||
});
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioProcessing> CreateLimiter() {
|
||||
Config config;
|
||||
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
|
||||
std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
|
||||
if (!limiter.get()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (limiter->gain_control()->set_mode(GainControl::kFixedDigital) !=
|
||||
limiter->kNoError) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
|
||||
// divide-by-2 but -7 is used instead to give a bit of headroom since the
|
||||
// AGC is not a hard limiter.
|
||||
if (limiter->gain_control()->set_target_level_dbfs(7) != limiter->kNoError) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (limiter->gain_control()->set_compression_gain_db(0) !=
|
||||
limiter->kNoError) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (limiter->gain_control()->enable_limiter(true) != limiter->kNoError) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (limiter->gain_control()->Enable(true) != limiter->kNoError) {
|
||||
return nullptr;
|
||||
}
|
||||
return limiter;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AudioMixerImpl::AudioMixerImpl(
|
||||
std::unique_ptr<AudioProcessing> limiter,
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator)
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
|
||||
bool use_limiter)
|
||||
: output_rate_calculator_(std::move(output_rate_calculator)),
|
||||
output_frequency_(0),
|
||||
sample_size_(0),
|
||||
audio_source_list_(),
|
||||
use_limiter_(true),
|
||||
time_stamp_(0),
|
||||
limiter_(std::move(limiter)) {}
|
||||
frame_combiner_(use_limiter) {}
|
||||
|
||||
AudioMixerImpl::~AudioMixerImpl() {}
|
||||
|
||||
rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create() {
|
||||
return CreateWithOutputRateCalculator(
|
||||
return CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<DefaultOutputRateCalculator>(
|
||||
new DefaultOutputRateCalculator()));
|
||||
new DefaultOutputRateCalculator()),
|
||||
true);
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<AudioMixerImpl>
|
||||
AudioMixerImpl::CreateWithOutputRateCalculator(
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator) {
|
||||
return CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::move(output_rate_calculator), true);
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<AudioMixerImpl>
|
||||
AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
|
||||
bool use_limiter) {
|
||||
return rtc::scoped_refptr<AudioMixerImpl>(
|
||||
new rtc::RefCountedObject<AudioMixerImpl>(
|
||||
CreateLimiter(), std::move(output_rate_calculator)));
|
||||
std::move(output_rate_calculator), use_limiter));
|
||||
}
|
||||
|
||||
void AudioMixerImpl::Mix(size_t number_of_channels,
|
||||
@ -210,34 +142,10 @@ void AudioMixerImpl::Mix(size_t number_of_channels,
|
||||
|
||||
CalculateOutputFrequency();
|
||||
|
||||
AudioFrameList mix_list;
|
||||
{
|
||||
rtc::CritScope lock(&crit_);
|
||||
mix_list = GetAudioFromSources();
|
||||
|
||||
for (const auto& frame : mix_list) {
|
||||
RemixFrame(number_of_channels, frame);
|
||||
}
|
||||
|
||||
audio_frame_for_mixing->UpdateFrame(
|
||||
-1, time_stamp_, NULL, 0, OutputFrequency(), AudioFrame::kNormalSpeech,
|
||||
AudioFrame::kVadPassive, number_of_channels);
|
||||
|
||||
time_stamp_ += static_cast<uint32_t>(sample_size_);
|
||||
|
||||
use_limiter_ = mix_list.size() > 1;
|
||||
|
||||
// We only use the limiter if we're actually mixing multiple streams.
|
||||
MixFromList(audio_frame_for_mixing, mix_list, use_limiter_);
|
||||
}
|
||||
|
||||
if (audio_frame_for_mixing->samples_per_channel_ == 0) {
|
||||
// Nothing was mixed, set the audio samples to silence.
|
||||
audio_frame_for_mixing->samples_per_channel_ = sample_size_;
|
||||
AudioFrameOperations::Mute(audio_frame_for_mixing);
|
||||
} else {
|
||||
// Only call the limiter if we have something to mix.
|
||||
LimitMixedAudio(audio_frame_for_mixing);
|
||||
frame_combiner_.Combine(GetAudioFromSources(), number_of_channels,
|
||||
OutputFrequency(), audio_frame_for_mixing);
|
||||
}
|
||||
|
||||
return;
|
||||
@ -331,36 +239,6 @@ AudioFrameList AudioMixerImpl::GetAudioFromSources() {
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
bool AudioMixerImpl::LimitMixedAudio(AudioFrame* mixed_audio) const {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
|
||||
if (!use_limiter_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Smoothly limit the mixed frame.
|
||||
const int error = limiter_->ProcessStream(mixed_audio);
|
||||
|
||||
// And now we can safely restore the level. This procedure results in
|
||||
// some loss of resolution, deemed acceptable.
|
||||
//
|
||||
// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
|
||||
// and compression gain of 6 dB). However, in the transition frame when this
|
||||
// is enabled (moving from one to two audio sources) it has the potential to
|
||||
// create discontinuities in the mixed frame.
|
||||
//
|
||||
// Instead we double the frame (with addition since left-shifting a
|
||||
// negative value is undefined).
|
||||
AudioFrameOperations::Add(*mixed_audio, mixed_audio);
|
||||
|
||||
if (error != limiter_->kNoError) {
|
||||
LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
|
||||
RTC_NOTREACHED();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest(
|
||||
AudioMixerImpl::Source* audio_source) const {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#include "webrtc/base/scoped_ref_ptr.h"
|
||||
#include "webrtc/base/thread_annotations.h"
|
||||
#include "webrtc/base/race_checker.h"
|
||||
#include "webrtc/modules/audio_mixer/frame_combiner.h"
|
||||
#include "webrtc/modules/audio_mixer/output_rate_calculator.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
@ -48,9 +49,18 @@ class AudioMixerImpl : public AudioMixer {
|
||||
static const int kMaximumAmountOfMixedAudioSources = 3;
|
||||
|
||||
static rtc::scoped_refptr<AudioMixerImpl> Create();
|
||||
static rtc::scoped_refptr<AudioMixerImpl> CreateWithOutputRateCalculator(
|
||||
|
||||
// TODO(aleloi): remove this when dependencies have updated to
|
||||
// use Create..AndLimiter instead. See bugs.webrtc.org/7167.
|
||||
RTC_DEPRECATED static rtc::scoped_refptr<AudioMixerImpl>
|
||||
CreateWithOutputRateCalculator(
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator);
|
||||
|
||||
static rtc::scoped_refptr<AudioMixerImpl>
|
||||
CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator,
|
||||
bool use_limiter);
|
||||
|
||||
~AudioMixerImpl() override;
|
||||
|
||||
// AudioMixer functions
|
||||
@ -66,8 +76,8 @@ class AudioMixerImpl : public AudioMixer {
|
||||
bool GetAudioSourceMixabilityStatusForTest(Source* audio_source) const;
|
||||
|
||||
protected:
|
||||
AudioMixerImpl(std::unique_ptr<AudioProcessing> limiter,
|
||||
std::unique_ptr<OutputRateCalculator> output_rate_calculator);
|
||||
AudioMixerImpl(std::unique_ptr<OutputRateCalculator> output_rate_calculator,
|
||||
bool use_limiter);
|
||||
|
||||
private:
|
||||
// Set mixing frequency through OutputFrequencyCalculator.
|
||||
@ -87,8 +97,6 @@ class AudioMixerImpl : public AudioMixer {
|
||||
bool RemoveAudioSourceFromList(Source* remove_audio_source,
|
||||
SourceStatusList* audio_source_list) const;
|
||||
|
||||
bool LimitMixedAudio(AudioFrame* mixed_audio) const;
|
||||
|
||||
// The critical section lock guards audio source insertion and
|
||||
// removal, which can be done from any thread. The race checker
|
||||
// checks that mixing is done sequentially.
|
||||
@ -103,14 +111,8 @@ class AudioMixerImpl : public AudioMixer {
|
||||
// List of all audio sources. Note all lists are disjunct
|
||||
SourceStatusList audio_source_list_ GUARDED_BY(crit_); // May be mixed.
|
||||
|
||||
// Determines if we will use a limiter for clipping protection during
|
||||
// mixing.
|
||||
bool use_limiter_ GUARDED_BY(race_checker_);
|
||||
|
||||
uint32_t time_stamp_ GUARDED_BY(race_checker_);
|
||||
|
||||
// Used for inhibiting saturation in mixing.
|
||||
std::unique_ptr<AudioProcessing> limiter_ GUARDED_BY(race_checker_);
|
||||
// Component that handles actual adding of audio frames.
|
||||
FrameCombiner frame_combiner_ GUARDED_BY(race_checker_);
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioMixerImpl);
|
||||
};
|
||||
|
||||
@ -12,10 +12,13 @@
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "webrtc/api/audio/audio_mixer.h"
|
||||
#include "webrtc/base/bind.h"
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/thread.h"
|
||||
#include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
|
||||
#include "webrtc/modules/audio_mixer/default_output_rate_calculator.h"
|
||||
@ -46,6 +49,16 @@ void ResetFrame(AudioFrame* frame) {
|
||||
frame->speech_type_ = AudioFrame::kNormalSpeech;
|
||||
}
|
||||
|
||||
std::string ProduceDebugText(int sample_rate_hz,
|
||||
int number_of_channels,
|
||||
int number_of_sources) {
|
||||
std::ostringstream ss;
|
||||
ss << "Sample rate: " << sample_rate_hz << " ";
|
||||
ss << "Number of channels: " << number_of_channels << " ";
|
||||
ss << "Number of sources: " << number_of_sources;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
AudioFrame frame_for_mixing;
|
||||
|
||||
} // namespace
|
||||
@ -78,7 +91,8 @@ class MockMixerAudioSource : public AudioMixer::Source {
|
||||
AudioFrame* audio_frame) {
|
||||
audio_frame->CopyFrom(fake_frame_);
|
||||
audio_frame->sample_rate_hz_ = sample_rate_hz;
|
||||
audio_frame->samples_per_channel_ = sample_rate_hz / 100;
|
||||
audio_frame->samples_per_channel_ =
|
||||
rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
return fake_info();
|
||||
}
|
||||
|
||||
@ -89,7 +103,7 @@ class MockMixerAudioSource : public AudioMixer::Source {
|
||||
class CustomRateCalculator : public OutputRateCalculator {
|
||||
public:
|
||||
explicit CustomRateCalculator(int rate) : rate_(rate) {}
|
||||
int CalculateOutputRate(const std::vector<int>& preferred_rates) {
|
||||
int CalculateOutputRate(const std::vector<int>& preferred_rates) override {
|
||||
return rate_;
|
||||
}
|
||||
|
||||
@ -103,19 +117,19 @@ void MixAndCompare(
|
||||
const std::vector<AudioFrame>& frames,
|
||||
const std::vector<AudioMixer::Source::AudioFrameInfo>& frame_info,
|
||||
const std::vector<bool>& expected_status) {
|
||||
int num_audio_sources = frames.size();
|
||||
const size_t num_audio_sources = frames.size();
|
||||
RTC_DCHECK(frames.size() == frame_info.size());
|
||||
RTC_DCHECK(frame_info.size() == expected_status.size());
|
||||
|
||||
const auto mixer = AudioMixerImpl::Create();
|
||||
std::vector<MockMixerAudioSource> participants(num_audio_sources);
|
||||
|
||||
for (int i = 0; i < num_audio_sources; i++) {
|
||||
for (size_t i = 0; i < num_audio_sources; ++i) {
|
||||
participants[i].fake_frame()->CopyFrom(frames[i]);
|
||||
participants[i].set_fake_info(frame_info[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_audio_sources; i++) {
|
||||
for (size_t i = 0; i < num_audio_sources; ++i) {
|
||||
EXPECT_TRUE(mixer->AddSource(&participants[i]));
|
||||
EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
|
||||
.Times(Exactly(1));
|
||||
@ -123,7 +137,7 @@ void MixAndCompare(
|
||||
|
||||
mixer->Mix(1, &frame_for_mixing);
|
||||
|
||||
for (int i = 0; i < num_audio_sources; i++) {
|
||||
for (size_t i = 0; i < num_audio_sources; ++i) {
|
||||
EXPECT_EQ(expected_status[i],
|
||||
mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
|
||||
<< "Mixed status of AudioSource #" << i << " wrong.";
|
||||
@ -191,11 +205,11 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
|
||||
MockMixerAudioSource participant;
|
||||
|
||||
ResetFrame(participant.fake_frame());
|
||||
const int n_samples = participant.fake_frame()->samples_per_channel_;
|
||||
const size_t n_samples = participant.fake_frame()->samples_per_channel_;
|
||||
|
||||
// Modify the frame so that it's not zero.
|
||||
for (int j = 0; j < n_samples; j++) {
|
||||
participant.fake_frame()->data_[j] = j;
|
||||
for (size_t j = 0; j < n_samples; ++j) {
|
||||
participant.fake_frame()->data_[j] = static_cast<int16_t>(j);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(mixer->AddSource(&participant));
|
||||
@ -203,7 +217,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
|
||||
|
||||
AudioFrame audio_frame;
|
||||
// Two mix iteration to compare after the ramp-up step.
|
||||
for (int i = 0; i < 2; i++) {
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
mixer->Mix(1, // number of channels
|
||||
&audio_frame);
|
||||
}
|
||||
@ -310,7 +324,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
|
||||
const auto mixer = AudioMixerImpl::Create();
|
||||
MockMixerAudioSource participants[kAudioSources];
|
||||
|
||||
for (int i = 0; i < kAudioSources; i++) {
|
||||
for (int i = 0; i < kAudioSources; ++i) {
|
||||
ResetFrame(participants[i].fake_frame());
|
||||
// Set the participant audio energy to increase with the index
|
||||
// |i|.
|
||||
@ -318,7 +332,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
|
||||
}
|
||||
|
||||
// Add all participants but the loudest for mixing.
|
||||
for (int i = 0; i < kAudioSources - 1; i++) {
|
||||
for (int i = 0; i < kAudioSources - 1; ++i) {
|
||||
EXPECT_TRUE(mixer->AddSource(&participants[i]));
|
||||
EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
|
||||
.Times(Exactly(1));
|
||||
@ -328,14 +342,14 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
|
||||
mixer->Mix(1, &frame_for_mixing);
|
||||
|
||||
// All participants but the loudest should have been mixed.
|
||||
for (int i = 0; i < kAudioSources - 1; i++) {
|
||||
for (int i = 0; i < kAudioSources - 1; ++i) {
|
||||
EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
|
||||
<< "Mixed status of AudioSource #" << i << " wrong.";
|
||||
}
|
||||
|
||||
// Add new participant with higher energy.
|
||||
EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
|
||||
for (int i = 0; i < kAudioSources; i++) {
|
||||
for (int i = 0; i < kAudioSources; ++i) {
|
||||
EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
|
||||
.Times(Exactly(1));
|
||||
}
|
||||
@ -347,7 +361,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
|
||||
<< "Mixed status of AudioSource #0 wrong.";
|
||||
|
||||
// The loudest participants should have been mixed.
|
||||
for (int i = 1; i < kAudioSources; i++) {
|
||||
for (int i = 1; i < kAudioSources; ++i) {
|
||||
EXPECT_EQ(true,
|
||||
mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
|
||||
<< "Mixed status of AudioSource #" << i << " wrong.";
|
||||
@ -456,9 +470,10 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
|
||||
|
||||
TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
|
||||
constexpr int kOutputRate = 22000;
|
||||
const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
|
||||
const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator>(
|
||||
new CustomRateCalculator(kOutputRate)));
|
||||
new CustomRateCalculator(kOutputRate)),
|
||||
true);
|
||||
MockMixerAudioSource audio_source;
|
||||
mixer->AddSource(&audio_source);
|
||||
ResetFrame(audio_source.fake_frame());
|
||||
@ -471,12 +486,48 @@ TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
|
||||
|
||||
TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) {
|
||||
constexpr int kOutputRate = 8000;
|
||||
const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
|
||||
const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator>(
|
||||
new CustomRateCalculator(kOutputRate)));
|
||||
new CustomRateCalculator(kOutputRate)),
|
||||
true);
|
||||
|
||||
mixer->Mix(1, &frame_for_mixing);
|
||||
|
||||
EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_);
|
||||
}
|
||||
|
||||
TEST(AudioMixer, NoLimiterBasicApiCalls) {
|
||||
const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator>(new DefaultOutputRateCalculator()),
|
||||
false);
|
||||
mixer->Mix(1, &frame_for_mixing);
|
||||
}
|
||||
|
||||
TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) {
|
||||
// No APM limiter means no AudioProcessing::NativeRate restriction
|
||||
// on mixing rate. The rate has to be divisible by 100 since we use
|
||||
// 10 ms frames, though.
|
||||
for (const auto rate : {8000, 20000, 24000, 32000, 44100}) {
|
||||
for (const size_t number_of_channels : {1, 2}) {
|
||||
for (const auto number_of_sources : {0, 1, 2, 3, 4}) {
|
||||
SCOPED_TRACE(
|
||||
ProduceDebugText(rate, number_of_sources, number_of_sources));
|
||||
const auto mixer =
|
||||
AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter(
|
||||
std::unique_ptr<OutputRateCalculator>(
|
||||
new CustomRateCalculator(rate)),
|
||||
false);
|
||||
|
||||
std::vector<MockMixerAudioSource> sources(number_of_sources);
|
||||
for (auto& source : sources) {
|
||||
mixer->AddSource(&source);
|
||||
}
|
||||
|
||||
mixer->Mix(number_of_channels, &frame_for_mixing);
|
||||
EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_);
|
||||
EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
172
webrtc/modules/audio_mixer/frame_combiner.cc
Normal file
172
webrtc/modules/audio_mixer/frame_combiner.cc
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_mixer/frame_combiner.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/audio/utility/audio_frame_operations.h"
|
||||
#include "webrtc/base/logging.h"
|
||||
#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
|
||||
#include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Stereo, 48 kHz, 10 ms.
|
||||
constexpr int kMaximalFrameSize = 2 * 48 * 10;
|
||||
|
||||
void CombineZeroFrames(AudioFrame* audio_frame_for_mixing) {
|
||||
audio_frame_for_mixing->elapsed_time_ms_ = -1;
|
||||
AudioFrameOperations::Mute(audio_frame_for_mixing);
|
||||
}
|
||||
|
||||
void CombineOneFrame(const AudioFrame* input_frame,
|
||||
AudioFrame* audio_frame_for_mixing) {
|
||||
audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
|
||||
audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
|
||||
std::copy(input_frame->data_,
|
||||
input_frame->data_ +
|
||||
input_frame->num_channels_ * input_frame->samples_per_channel_,
|
||||
audio_frame_for_mixing->data_);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioProcessing> CreateLimiter() {
|
||||
Config config;
|
||||
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
|
||||
std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
|
||||
RTC_DCHECK(limiter);
|
||||
|
||||
const auto check_no_error = [](int x) {
|
||||
RTC_DCHECK_EQ(x, AudioProcessing::kNoError);
|
||||
};
|
||||
auto* const gain_control = limiter->gain_control();
|
||||
check_no_error(gain_control->set_mode(GainControl::kFixedDigital));
|
||||
|
||||
// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
|
||||
// divide-by-2 but -7 is used instead to give a bit of headroom since the
|
||||
// AGC is not a hard limiter.
|
||||
check_no_error(gain_control->set_target_level_dbfs(7));
|
||||
|
||||
check_no_error(gain_control->set_compression_gain_db(0));
|
||||
check_no_error(gain_control->enable_limiter(true));
|
||||
check_no_error(gain_control->Enable(true));
|
||||
return limiter;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
FrameCombiner::FrameCombiner(bool use_apm_limiter)
|
||||
: use_apm_limiter_(use_apm_limiter),
|
||||
limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {}
|
||||
|
||||
FrameCombiner::~FrameCombiner() = default;
|
||||
|
||||
void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
|
||||
size_t number_of_channels,
|
||||
int sample_rate,
|
||||
AudioFrame* audio_frame_for_mixing) const {
|
||||
RTC_DCHECK(audio_frame_for_mixing);
|
||||
const size_t samples_per_channel = static_cast<size_t>(
|
||||
(sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
|
||||
|
||||
for (const auto* frame : mix_list) {
|
||||
RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
|
||||
RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
|
||||
}
|
||||
|
||||
// Frames could be both stereo and mono.
|
||||
for (auto* frame : mix_list) {
|
||||
RemixFrame(number_of_channels, frame);
|
||||
}
|
||||
|
||||
// TODO(aleloi): Issue bugs.webrtc.org/3390.
|
||||
// Audio frame timestamp. The 'timestamp_' field is set to dummy
|
||||
// value '0', because it is only supported in the one channel case and
|
||||
// is then updated in the helper functions.
|
||||
audio_frame_for_mixing->UpdateFrame(
|
||||
-1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
|
||||
AudioFrame::kVadUnknown, number_of_channels);
|
||||
|
||||
if (mix_list.size() == 0) {
|
||||
CombineZeroFrames(audio_frame_for_mixing);
|
||||
} else if (mix_list.size() == 1) {
|
||||
CombineOneFrame(mix_list.front(), audio_frame_for_mixing);
|
||||
} else {
|
||||
std::vector<rtc::ArrayView<const int16_t>> input_frames;
|
||||
for (size_t i = 0; i < mix_list.size(); ++i) {
|
||||
input_frames.push_back(rtc::ArrayView<const int16_t>(
|
||||
mix_list[i]->data_, samples_per_channel * number_of_channels));
|
||||
}
|
||||
CombineMultipleFrames(input_frames, audio_frame_for_mixing);
|
||||
}
|
||||
}
|
||||
|
||||
void FrameCombiner::CombineMultipleFrames(
|
||||
const std::vector<rtc::ArrayView<const int16_t>>& input_frames,
|
||||
AudioFrame* audio_frame_for_mixing) const {
|
||||
RTC_DCHECK(!input_frames.empty());
|
||||
RTC_DCHECK(audio_frame_for_mixing);
|
||||
|
||||
const size_t frame_length = input_frames.front().size();
|
||||
for (const auto& frame : input_frames) {
|
||||
RTC_DCHECK_EQ(frame_length, frame.size());
|
||||
}
|
||||
|
||||
// Algorithm: int16 frames are added to a sufficiently large
|
||||
// statically allocated int32 buffer. For > 2 participants this is
|
||||
// more efficient than addition in place in the int16 audio
|
||||
// frame. The audio quality loss due to halving the samples is
|
||||
// smaller than 16-bit addition in place.
|
||||
RTC_DCHECK_GE(kMaximalFrameSize, frame_length);
|
||||
std::array<int32_t, kMaximalFrameSize> add_buffer;
|
||||
|
||||
add_buffer.fill(0);
|
||||
|
||||
for (const auto& frame : input_frames) {
|
||||
std::transform(frame.begin(), frame.end(), add_buffer.begin(),
|
||||
add_buffer.begin(), std::plus<int32_t>());
|
||||
}
|
||||
|
||||
if (use_apm_limiter_) {
|
||||
// Halve all samples to avoid saturation before limiting.
|
||||
std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
|
||||
audio_frame_for_mixing->data_, [](int32_t a) {
|
||||
return rtc::saturated_cast<int16_t>(a / 2);
|
||||
});
|
||||
|
||||
// Smoothly limit the audio.
|
||||
RTC_DCHECK(limiter_);
|
||||
const int error = limiter_->ProcessStream(audio_frame_for_mixing);
|
||||
if (error != limiter_->kNoError) {
|
||||
LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
// And now we can safely restore the level. This procedure results in
|
||||
// some loss of resolution, deemed acceptable.
|
||||
//
|
||||
// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
|
||||
// and compression gain of 6 dB). However, in the transition frame when this
|
||||
// is enabled (moving from one to two audio sources) it has the potential to
|
||||
// create discontinuities in the mixed frame.
|
||||
//
|
||||
// Instead we double the frame (with addition since left-shifting a
|
||||
// negative value is undefined).
|
||||
AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
|
||||
} else {
|
||||
std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
|
||||
audio_frame_for_mixing->data_,
|
||||
[](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
54
webrtc/modules/audio_mixer/frame_combiner.h
Normal file
54
webrtc/modules/audio_mixer/frame_combiner.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class FrameCombiner {
|
||||
public:
|
||||
explicit FrameCombiner(bool use_apm_limiter);
|
||||
~FrameCombiner();
|
||||
|
||||
// Combine several frames into one. Assumes sample_rate,
|
||||
// samples_per_channel of the input frames match the parameters. The
|
||||
// extra parameters are needed because 'mix_list' can be empty.
|
||||
void Combine(const std::vector<AudioFrame*>& mix_list,
|
||||
size_t number_of_channels,
|
||||
int sample_rate,
|
||||
AudioFrame* audio_frame_for_mixing) const;
|
||||
|
||||
private:
|
||||
// Lower-level helper function called from Combine(...) when there
|
||||
// are several input frames.
|
||||
//
|
||||
// TODO(aleloi): change interface to ArrayView<int16_t> output_frame
|
||||
// once we have gotten rid of the APM limiter.
|
||||
//
|
||||
// Only the 'data' field of output_frame should be modified. The
|
||||
// rest are used for potentially sending the output to the APM
|
||||
// limiter.
|
||||
void CombineMultipleFrames(
|
||||
const std::vector<rtc::ArrayView<const int16_t>>& input_frames,
|
||||
AudioFrame* audio_frame_for_mixing) const;
|
||||
|
||||
const bool use_apm_limiter_;
|
||||
std::unique_ptr<AudioProcessing> limiter_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_
|
||||
132
webrtc/modules/audio_mixer/frame_combiner_unittest.cc
Normal file
132
webrtc/modules/audio_mixer/frame_combiner_unittest.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_mixer/frame_combiner.h"
|
||||
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
std::string ProduceDebugText(int sample_rate_hz,
|
||||
int number_of_channels,
|
||||
int number_of_sources) {
|
||||
std::ostringstream ss;
|
||||
ss << "Sample rate: " << sample_rate_hz << " ";
|
||||
ss << "Number of channels: " << number_of_channels << " ";
|
||||
ss << "Number of sources: " << number_of_sources;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
AudioFrame frame1;
|
||||
AudioFrame frame2;
|
||||
AudioFrame audio_frame_for_mixing;
|
||||
|
||||
void SetUpFrames(int sample_rate_hz, int number_of_channels) {
|
||||
for (auto* frame : {&frame1, &frame2}) {
|
||||
frame->UpdateFrame(-1, 0, nullptr,
|
||||
rtc::CheckedDivExact(sample_rate_hz, 100),
|
||||
sample_rate_hz, AudioFrame::kNormalSpeech,
|
||||
AudioFrame::kVadActive, number_of_channels);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(FrameCombiner, BasicApiCallsLimiter) {
|
||||
FrameCombiner combiner(true);
|
||||
for (const int rate : {8000, 16000, 32000, 48000}) {
|
||||
for (const int number_of_channels : {1, 2}) {
|
||||
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
|
||||
SetUpFrames(rate, number_of_channels);
|
||||
|
||||
for (const int number_of_frames : {0, 1, 2}) {
|
||||
SCOPED_TRACE(
|
||||
ProduceDebugText(rate, number_of_channels, number_of_frames));
|
||||
const std::vector<AudioFrame*> frames_to_combine(
|
||||
all_frames.begin(), all_frames.begin() + number_of_frames);
|
||||
combiner.Combine(frames_to_combine, number_of_channels, rate,
|
||||
&audio_frame_for_mixing);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No APM limiter means no AudioProcessing::NativeRate restriction
|
||||
// on rate. The rate has to be divisible by 100 since we use
|
||||
// 10 ms frames, though.
|
||||
TEST(FrameCombiner, BasicApiCallsNoLimiter) {
|
||||
FrameCombiner combiner(false);
|
||||
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
|
||||
for (const int number_of_channels : {1, 2}) {
|
||||
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
|
||||
SetUpFrames(rate, number_of_channels);
|
||||
|
||||
for (const int number_of_frames : {0, 1, 2}) {
|
||||
SCOPED_TRACE(
|
||||
ProduceDebugText(rate, number_of_channels, number_of_frames));
|
||||
const std::vector<AudioFrame*> frames_to_combine(
|
||||
all_frames.begin(), all_frames.begin() + number_of_frames);
|
||||
combiner.Combine(frames_to_combine, number_of_channels, rate,
|
||||
&audio_frame_for_mixing);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
|
||||
FrameCombiner combiner(false);
|
||||
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
|
||||
for (const int number_of_channels : {1, 2}) {
|
||||
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));
|
||||
|
||||
const std::vector<AudioFrame*> frames_to_combine;
|
||||
combiner.Combine(frames_to_combine, number_of_channels, rate,
|
||||
&audio_frame_for_mixing);
|
||||
|
||||
const std::vector<int16_t> mixed_data(
|
||||
audio_frame_for_mixing.data_,
|
||||
audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
|
||||
|
||||
const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
|
||||
EXPECT_EQ(mixed_data, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
|
||||
FrameCombiner combiner(false);
|
||||
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
|
||||
for (const int number_of_channels : {1, 2}) {
|
||||
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
|
||||
|
||||
SetUpFrames(rate, number_of_channels);
|
||||
std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100,
|
||||
0);
|
||||
const std::vector<AudioFrame*> frames_to_combine = {&frame1};
|
||||
combiner.Combine(frames_to_combine, number_of_channels, rate,
|
||||
&audio_frame_for_mixing);
|
||||
|
||||
const std::vector<int16_t> mixed_data(
|
||||
audio_frame_for_mixing.data_,
|
||||
audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
|
||||
|
||||
std::vector<int16_t> expected(number_of_channels * rate / 100);
|
||||
std::iota(expected.begin(), expected.end(), 0);
|
||||
EXPECT_EQ(mixed_data, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
Loading…
x
Reference in New Issue
Block a user