From 46ea5d7f8206d2d64d6a5596721e422ad3810fb7 Mon Sep 17 00:00:00 2001 From: Gustaf Ullberg Date: Tue, 15 Dec 2020 15:12:16 +0100 Subject: [PATCH] Surface the number of encoded channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two audio channels going into the AudioSource::Sink can either be down-mixed to mono or encoded as stereo. This change enables WebRTC users (such as Chromium) to query the number of audio channels actually encoded. That information can in turn be used to tailor the audio processing to the number of channels actually encoded. This change fixes webrtc:8133 from a WebRTC perspective and will be followed up with the necessary Chromium changes. Bug: webrtc:8133 Change-Id: I8e8a08292002919784c05a5aacb21707918809c8 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/197426 Reviewed-by: Per Ã…hgren Reviewed-by: Magnus Flodman Commit-Queue: Gustaf Ullberg Cr-Commit-Position: refs/heads/master@{#32836} --- api/media_stream_interface.h | 5 +++++ media/base/audio_source.h | 5 +++++ media/base/fake_media_engine.h | 1 + media/engine/webrtc_voice_engine.cc | 11 +++++++++++ pc/rtp_sender.cc | 1 + pc/rtp_sender.h | 4 ++++ 6 files changed, 27 insertions(+) diff --git a/api/media_stream_interface.h b/api/media_stream_interface.h index bd4a2c0292..8892ee5a0b 100644 --- a/api/media_stream_interface.h +++ b/api/media_stream_interface.h @@ -216,6 +216,11 @@ class AudioTrackSinkInterface { number_of_frames); } + // Returns the number of channels encoded by the sink. This can be less than + // the number_of_channels if down-mixing occur. A value of -1 means an unknown + // number. + virtual int NumPreferredChannels() const { return -1; } + protected: virtual ~AudioTrackSinkInterface() {} }; diff --git a/media/base/audio_source.h b/media/base/audio_source.h index 8a8796800b..51fe0e13e1 100644 --- a/media/base/audio_source.h +++ b/media/base/audio_source.h @@ -36,6 +36,11 @@ class AudioSource { // Called when the AudioSource is going away. virtual void OnClose() = 0; + // Returns the number of channels encoded by the sink. This can be less than + // the number_of_channels if down-mixing occur. A value of -1 means an + // unknown number. + virtual int NumPreferredChannels() const = 0; + protected: virtual ~Sink() {} }; diff --git a/media/base/fake_media_engine.h b/media/base/fake_media_engine.h index 1751dd8bfe..42940bf1b4 100644 --- a/media/base/fake_media_engine.h +++ b/media/base/fake_media_engine.h @@ -371,6 +371,7 @@ class FakeVoiceMediaChannel : public RtpHelper { size_t number_of_frames, absl::optional absolute_capture_timestamp_ms) override; void OnClose() override; + int NumPreferredChannels() const override { return -1; } AudioSource* source() const; private: diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc index bc382d03e3..2ed78b429b 100644 --- a/media/engine/webrtc_voice_engine.cc +++ b/media/engine/webrtc_voice_engine.cc @@ -1114,6 +1114,14 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream *audio_codec_spec_); UpdateAllowedBitrateRange(); + + // Encoder will only use two channels if the stereo parameter is set. + const auto& it = send_codec_spec.format.parameters.find("stereo"); + if (it != send_codec_spec.format.parameters.end() && it->second == "1") { + num_encoded_channels_ = 2; + } else { + num_encoded_channels_ = 1; + } } void UpdateAudioNetworkAdaptorConfig() { @@ -1133,6 +1141,8 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream stream_->Reconfigure(config_); } + int NumPreferredChannels() const override { return num_encoded_channels_; } + const AdaptivePtimeConfig adaptive_ptime_config_; rtc::ThreadChecker worker_thread_checker_; rtc::RaceChecker audio_capture_race_checker_; @@ -1154,6 +1164,7 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream // TODO(webrtc:11717): Remove this once audio_network_adaptor in AudioOptions // has been removed. absl::optional audio_network_adaptor_config_from_options_; + int num_encoded_channels_ = -1; }; class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { diff --git a/pc/rtp_sender.cc b/pc/rtp_sender.cc index 0da6dfca80..5a7e237c90 100644 --- a/pc/rtp_sender.cc +++ b/pc/rtp_sender.cc @@ -405,6 +405,7 @@ void LocalAudioSinkAdapter::OnData( if (sink_) { sink_->OnData(audio_data, bits_per_sample, sample_rate, number_of_channels, number_of_frames, absolute_capture_timestamp_ms); + num_preferred_channels_ = sink_->NumPreferredChannels(); } } diff --git a/pc/rtp_sender.h b/pc/rtp_sender.h index c2fe91f01d..51ae1e978b 100644 --- a/pc/rtp_sender.h +++ b/pc/rtp_sender.h @@ -237,12 +237,16 @@ class LocalAudioSinkAdapter : public AudioTrackSinkInterface, /*absolute_capture_timestamp_ms=*/absl::nullopt); } + // AudioSinkInterface implementation. + int NumPreferredChannels() const override { return num_preferred_channels_; } + // cricket::AudioSource implementation. void SetSink(cricket::AudioSource::Sink* sink) override; cricket::AudioSource::Sink* sink_; // Critical section protecting |sink_|. Mutex lock_; + int num_preferred_channels_ = -1; }; class AudioRtpSender : public DtmfProviderInterface, public RtpSenderBase {