webrtc_m130/modules/audio_mixer/frame_combiner.cc

/*
 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_mixer/frame_combiner.h"

#include <algorithm>
#include <array>
#include <functional>

#include "api/array_view.h"
#include "audio/utility/audio_frame_operations.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_mixer/audio_frame_manipulator.h"
#include "modules/audio_mixer/audio_mixer_impl.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "system_wrappers/include/metrics.h"

namespace webrtc {
namespace {

// Stereo, 48 kHz, 10 ms.
constexpr int kMaximumAmountOfChannels = 2;
constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs;

using OneChannelBuffer = std::array<float, kMaximumChannelSize>;

std::unique_ptr<AudioProcessing> CreateLimiter() {
  Config config;
  config.Set<ExperimentalAgc>(new ExperimentalAgc(false));

  std::unique_ptr<AudioProcessing> limiter(
      AudioProcessingBuilder().Create(config));
  RTC_DCHECK(limiter);
  webrtc::AudioProcessing::Config apm_config;
  apm_config.residual_echo_detector.enabled = false;
  limiter->ApplyConfig(apm_config);

  const auto check_no_error = [](int x) {
    RTC_DCHECK_EQ(x, AudioProcessing::kNoError);
  };
  auto* const gain_control = limiter->gain_control();
  check_no_error(gain_control->set_mode(GainControl::kFixedDigital));

  // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
  // divide-by-2 but -7 is used instead to give a bit of headroom since the
  // AGC is not a hard limiter.
  check_no_error(gain_control->set_target_level_dbfs(7));

  check_no_error(gain_control->set_compression_gain_db(0));
  check_no_error(gain_control->enable_limiter(true));
  check_no_error(gain_control->Enable(true));

  return limiter;
}

void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list,
                         size_t number_of_channels,
                         int sample_rate,
                         size_t number_of_streams,
                         AudioFrame* audio_frame_for_mixing) {
  const size_t samples_per_channel = static_cast<size_t>(
      (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);

  // TODO(minyue): Issue bugs.webrtc.org/3390.
  // Audio frame timestamp. The 'timestamp_' field is set to dummy
  // value '0', because it is only supported in the one channel case and
  // is then updated in the helper functions.
  audio_frame_for_mixing->UpdateFrame(
      0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
      AudioFrame::kVadUnknown, number_of_channels);

  if (mix_list.empty()) {
    audio_frame_for_mixing->elapsed_time_ms_ = -1;
  } else if (mix_list.size() == 1) {
    audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_;
    audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_;
  }
}

void MixFewFramesWithNoLimiter(const std::vector<AudioFrame*>& mix_list,
                               AudioFrame* audio_frame_for_mixing) {
  if (mix_list.empty()) {
    audio_frame_for_mixing->Mute();
    return;
  }
  RTC_DCHECK_LE(mix_list.size(), 1);
  std::copy(mix_list[0]->data(),
            mix_list[0]->data() +
                mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_,
            audio_frame_for_mixing->mutable_data());
}

std::array<OneChannelBuffer, kMaximumAmountOfChannels> MixToFloatFrame(
    const std::vector<AudioFrame*>& mix_list,
    size_t samples_per_channel,
    size_t number_of_channels) {
  // Convert to FloatS16 and mix.
  using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
  std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer{};

  for (size_t i = 0; i < mix_list.size(); ++i) {
    const AudioFrame* const frame = mix_list[i];
    for (size_t j = 0; j < number_of_channels; ++j) {
      for (size_t k = 0; k < samples_per_channel; ++k) {
        mixing_buffer[j][k] += frame->data()[number_of_channels * k + j];
      }
    }
  }
  return mixing_buffer;
}

void RunApmAgcLimiter(AudioFrameView<float> mixing_buffer_view,
                      AudioProcessing* apm_agc_limiter) {
  // Halve all samples to avoid saturation before limiting. The input
  // format of APM is Float. Convert the samples from FloatS16 to
  // Float.
  for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) {
    std::transform(mixing_buffer_view.channel(i).begin(),
                   mixing_buffer_view.channel(i).end(),
                   mixing_buffer_view.channel(i).begin(),
                   [](float a) { return FloatS16ToFloat(a / 2); });
  }

  const int sample_rate =
      static_cast<int>(mixing_buffer_view.samples_per_channel()) * 1000 /
      AudioMixerImpl::kFrameDurationInMs;
  StreamConfig processing_config(sample_rate,
                                 mixing_buffer_view.num_channels());

  // Smoothly limit the audio.
  apm_agc_limiter->ProcessStream(mixing_buffer_view.data(), processing_config,
                                 processing_config, mixing_buffer_view.data());

  // And now we can safely restore the level. This procedure results in
  // some loss of resolution, deemed acceptable.
  //
  // It's possible to apply the gain in the AGC (with a target level of 0 dbFS
  // and compression gain of 6 dB). However, in the transition frame when this
  // is enabled (moving from one to two audio sources) it has the potential to
  // create discontinuities in the mixed frame.
  //
  // Instead we double the samples in the frame..
  // Also convert the samples back to FloatS16.
  for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) {
    std::transform(mixing_buffer_view.channel(i).begin(),
                   mixing_buffer_view.channel(i).end(),
                   mixing_buffer_view.channel(i).begin(),
                   [](float a) { return FloatToFloatS16(a * 2); });
  }
}

void RunApmAgc2Limiter(AudioFrameView<float> mixing_buffer_view,
                       FixedGainController* apm_agc2_limiter) {
  const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 /
                             AudioMixerImpl::kFrameDurationInMs;
  apm_agc2_limiter->SetSampleRate(sample_rate);
  apm_agc2_limiter->Process(mixing_buffer_view);
}

// Both interleaves and rounds.
void InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view,
                            AudioFrame* audio_frame_for_mixing) {
  const size_t number_of_channels = mixing_buffer_view.num_channels();
  const size_t samples_per_channel = mixing_buffer_view.samples_per_channel();
  // Put data in the result frame.
  for (size_t i = 0; i < number_of_channels; ++i) {
    for (size_t j = 0; j < samples_per_channel; ++j) {
      audio_frame_for_mixing->mutable_data()[number_of_channels * j + i] =
          FloatS16ToS16(mixing_buffer_view.channel(i)[j]);
    }
  }
}
}  // namespace

FrameCombiner::FrameCombiner(LimiterType limiter_type)
    : limiter_type_(limiter_type),
      apm_agc_limiter_(limiter_type_ == LimiterType::kApmAgcLimiter
                           ? CreateLimiter()
                           : nullptr),
      data_dumper_(new ApmDataDumper(0)),
      apm_agc2_limiter_(data_dumper_.get()) {
  apm_agc2_limiter_.SetGain(0.f);
}

FrameCombiner::FrameCombiner(bool use_limiter)
    : FrameCombiner(use_limiter ? LimiterType::kApmAgcLimiter
                                : LimiterType::kNoLimiter) {}

FrameCombiner::~FrameCombiner() = default;

void FrameCombiner::SetLimiterType(LimiterType limiter_type) {
  // TODO(aleloi): remove this method and make limiter_type_ const
  // when we have finished moved to APM-AGC2.
  limiter_type_ = limiter_type;
  if (limiter_type_ == LimiterType::kApmAgcLimiter &&
      apm_agc_limiter_ == nullptr) {
    apm_agc_limiter_ = CreateLimiter();
  }
}

void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
                            size_t number_of_channels,
                            int sample_rate,
                            size_t number_of_streams,
                            AudioFrame* audio_frame_for_mixing) {
  RTC_DCHECK(audio_frame_for_mixing);

  LogMixingStats(mix_list, sample_rate, number_of_streams);

  SetAudioFrameFields(mix_list, number_of_channels, sample_rate,
                      number_of_streams, audio_frame_for_mixing);

  const size_t samples_per_channel = static_cast<size_t>(
      (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);

  for (const auto* frame : mix_list) {
    RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
    RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
  }

  // The 'num_channels_' field of frames in 'mix_list' could be
  // different from 'number_of_channels'.
  for (auto* frame : mix_list) {
    RemixFrame(number_of_channels, frame);
  }

  if (number_of_streams <= 1) {
    MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing);
    return;
  }

  std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer =
      MixToFloatFrame(mix_list, samples_per_channel, number_of_channels);

  // Put float data in an AudioFrameView.
  std::array<float*, kMaximumAmountOfChannels> channel_pointers{};
  for (size_t i = 0; i < number_of_channels; ++i) {
    channel_pointers[i] = &mixing_buffer[i][0];
  }
  AudioFrameView<float> mixing_buffer_view(
      &channel_pointers[0], number_of_channels, samples_per_channel);

  if (limiter_type_ == LimiterType::kApmAgcLimiter) {
    RunApmAgcLimiter(mixing_buffer_view, apm_agc_limiter_.get());
  } else if (limiter_type_ == LimiterType::kApmAgc2Limiter) {
    RunApmAgc2Limiter(mixing_buffer_view, &apm_agc2_limiter_);
  }

  InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing);
}

void FrameCombiner::LogMixingStats(const std::vector<AudioFrame*>& mix_list,
                                   int sample_rate,
                                   size_t number_of_streams) const {
  // Log every second.
  uma_logging_counter_++;
  if (uma_logging_counter_ > 1000 / AudioMixerImpl::kFrameDurationInMs) {
    uma_logging_counter_ = 0;
    RTC_HISTOGRAM_COUNTS_100("WebRTC.Audio.AudioMixer.NumIncomingStreams",
                             static_cast<int>(number_of_streams));
    RTC_HISTOGRAM_ENUMERATION(
        "WebRTC.Audio.AudioMixer.NumIncomingActiveStreams",
        static_cast<int>(mix_list.size()),
        AudioMixerImpl::kMaximumAmountOfMixedAudioSources);

    using NativeRate = AudioProcessing::NativeRate;
    static constexpr NativeRate native_rates[] = {
        NativeRate::kSampleRate8kHz, NativeRate::kSampleRate16kHz,
        NativeRate::kSampleRate32kHz, NativeRate::kSampleRate48kHz};
    const auto* rate_position = std::lower_bound(
        std::begin(native_rates), std::end(native_rates), sample_rate);

    RTC_HISTOGRAM_ENUMERATION(
        "WebRTC.Audio.AudioMixer.MixingRate",
        std::distance(std::begin(native_rates), rate_position),
        arraysize(native_rates));
  }
}

}  // namespace webrtc