diff --git a/audio/BUILD.gn b/audio/BUILD.gn index 65e3050b34..84593858e7 100644 --- a/audio/BUILD.gn +++ b/audio/BUILD.gn @@ -20,8 +20,8 @@ rtc_static_library("audio") { "audio_send_stream.h", "audio_state.cc", "audio_state.h", - "audio_transport_proxy.cc", - "audio_transport_proxy.h", + "audio_transport_impl.cc", + "audio_transport_impl.h", "conversion.h", "null_audio_poller.cc", "null_audio_poller.h", @@ -61,6 +61,8 @@ rtc_static_library("audio") { "../system_wrappers", "../system_wrappers:field_trial_api", "../voice_engine", + "../voice_engine:audio_level", + "utility:audio_frame_operations", ] } if (rtc_include_tests) { @@ -99,6 +101,7 @@ if (rtc_include_tests) { ":audio", ":audio_end_to_end_test", "../api:mock_audio_mixer", + "../call:mock_call_interfaces", "../call:mock_rtp_interfaces", "../call:rtp_interfaces", "../call:rtp_receiver", diff --git a/audio/DEPS b/audio/DEPS index 0f952a3e7a..70e33469df 100644 --- a/audio/DEPS +++ b/audio/DEPS @@ -19,10 +19,7 @@ specific_include_rules = { "audio_send_stream.cc": [ "+modules/audio_coding/codecs/cng/audio_encoder_cng.h", ], - # TODO(ossu): Remove this exception when builtin_audio_encoder_factory.h - # has moved to api/, or when the proper mocks have been made. - "audio_send_stream_unittest.cc": [ - "+modules/audio_coding/codecs/builtin_audio_encoder_factory.h", - ], + "audio_transport_impl.h": [ + "+modules/audio_processing/typing_detection.h", + ] } - diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc index d6c2dbe69b..d24bed5299 100644 --- a/audio/audio_receive_stream_unittest.cc +++ b/audio/audio_receive_stream_unittest.cc @@ -17,6 +17,7 @@ #include "audio/conversion.h" #include "call/rtp_stream_receiver_controller.h" #include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_device/include/mock_audio_device.h" #include "modules/audio_processing/include/mock_audio_processing.h" #include "modules/bitrate_controller/include/mock/mock_bitrate_controller.h" #include "modules/pacing/packet_router.h" @@ -75,12 +76,12 @@ struct ConfigHelper { audio_mixer_(new rtc::RefCountedObject()) { using testing::Invoke; - EXPECT_CALL(voice_engine_, audio_transport()); - AudioState::Config config; config.voice_engine = &voice_engine_; config.audio_mixer = audio_mixer_; config.audio_processing = new rtc::RefCountedObject(); + config.audio_device_module = + new rtc::RefCountedObject(); audio_state_ = AudioState::Create(config); EXPECT_CALL(voice_engine_, ChannelProxyFactory(kChannelId)) diff --git a/audio/audio_send_stream.cc b/audio/audio_send_stream.cc index 1596c96085..6aa469d6ca 100644 --- a/audio/audio_send_stream.cc +++ b/audio/audio_send_stream.cc @@ -31,7 +31,6 @@ #include "system_wrappers/include/field_trial.h" #include "voice_engine/channel_proxy.h" #include "voice_engine/include/voe_base.h" -#include "voice_engine/transmit_mixer.h" #include "voice_engine/voice_engine_impl.h" namespace webrtc { @@ -121,6 +120,7 @@ AudioSendStream::AudioSendStream( AudioSendStream::~AudioSendStream() { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); RTC_LOG(LS_INFO) << "~AudioSendStream: " << config_.ToString(); + RTC_DCHECK(!sending_); transport_->send_side_cc()->DeRegisterPacketFeedbackObserver(this); channel_proxy_->RegisterTransport(nullptr); channel_proxy_->ResetSenderCongestionControlObjects(); @@ -135,6 +135,7 @@ const webrtc::AudioSendStream::Config& AudioSendStream::GetConfig() const { void AudioSendStream::Reconfigure( const webrtc::AudioSendStream::Config& new_config) { + RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); ConfigureStream(this, new_config, false); } @@ -232,6 +233,10 @@ void AudioSendStream::ConfigureStream( void AudioSendStream::Start() { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); + if (sending_) { + return; + } + if (config_.min_bitrate_bps != -1 && config_.max_bitrate_bps != -1 && (FindExtensionIds(config_.rtp.extensions).transport_sequence_number != 0 || @@ -246,10 +251,17 @@ void AudioSendStream::Start() { if (error != 0) { RTC_LOG(LS_ERROR) << "AudioSendStream::Start failed with error: " << error; } + sending_ = true; + audio_state()->AddSendingStream(this, encoder_sample_rate_hz_, + encoder_num_channels_); } void AudioSendStream::Stop() { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); + if (!sending_) { + return; + } + RemoveBitrateObserver(); ScopedVoEInterface base(voice_engine()); @@ -257,6 +269,13 @@ void AudioSendStream::Stop() { if (error != 0) { RTC_LOG(LS_ERROR) << "AudioSendStream::Stop failed with error: " << error; } + sending_ = false; + audio_state()->RemoveSendingStream(this); +} + +void AudioSendStream::SendAudioData(std::unique_ptr audio_frame) { + RTC_CHECK_RUNS_SERIALIZED(&audio_capture_race_checker_); + channel_proxy_->ProcessAndEncodeAudio(std::move(audio_frame)); } bool AudioSendStream::SendTelephoneEvent(int payload_type, @@ -313,17 +332,12 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats( } } - ScopedVoEInterface base(voice_engine()); - RTC_DCHECK(base->transmit_mixer()); - stats.audio_level = base->transmit_mixer()->AudioLevelFullRange(); - RTC_DCHECK_LE(0, stats.audio_level); + AudioState::Stats input_stats = audio_state()->GetAudioInputStats(); + stats.audio_level = input_stats.audio_level; + stats.total_input_energy = input_stats.total_energy; + stats.total_input_duration = input_stats.total_duration; - stats.total_input_energy = base->transmit_mixer()->GetTotalInputEnergy(); - stats.total_input_duration = base->transmit_mixer()->GetTotalInputDuration(); - - internal::AudioState* audio_state = - static_cast(audio_state_.get()); - stats.typing_noise_detected = audio_state->typing_noise_detected(); + stats.typing_noise_detected = audio_state()->typing_noise_detected(); stats.ana_statistics = channel_proxy_->GetANAStatistics(); RTC_DCHECK(audio_state_->audio_processing()); stats.apm_statistics = @@ -418,6 +432,20 @@ const TimeInterval& AudioSendStream::GetActiveLifetime() const { return active_lifetime_; } +internal::AudioState* AudioSendStream::audio_state() { + internal::AudioState* audio_state = + static_cast(audio_state_.get()); + RTC_DCHECK(audio_state); + return audio_state; +} + +const internal::AudioState* AudioSendStream::audio_state() const { + internal::AudioState* audio_state = + static_cast(audio_state_.get()); + RTC_DCHECK(audio_state); + return audio_state; +} + VoiceEngine* AudioSendStream::voice_engine() const { internal::AudioState* audio_state = static_cast(audio_state_.get()); @@ -426,6 +454,17 @@ VoiceEngine* AudioSendStream::voice_engine() const { return voice_engine; } +void AudioSendStream::StoreEncoderProperties(int sample_rate_hz, + size_t num_channels) { + RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); + encoder_sample_rate_hz_ = sample_rate_hz; + encoder_num_channels_ = num_channels; + if (sending_) { + // Update AudioState's information about the stream. + audio_state()->AddSendingStream(this, sample_rate_hz, num_channels); + } +} + // Apply current codec settings to a single voe::Channel used for sending. bool AudioSendStream::SetupSendCodec(AudioSendStream* stream, const Config& new_config) { @@ -472,6 +511,8 @@ bool AudioSendStream::SetupSendCodec(AudioSendStream* stream, new_config.send_codec_spec->format.clockrate_hz); } + stream->StoreEncoderProperties(encoder->SampleRateHz(), + encoder->NumChannels()); stream->channel_proxy_->SetEncoder(new_config.send_codec_spec->payload_type, std::move(encoder)); return true; diff --git a/audio/audio_send_stream.h b/audio/audio_send_stream.h index 1414e39c64..37717674e2 100644 --- a/audio/audio_send_stream.h +++ b/audio/audio_send_stream.h @@ -20,6 +20,7 @@ #include "call/bitrate_allocator.h" #include "modules/rtp_rtcp/include/rtp_rtcp.h" #include "rtc_base/constructormagic.h" +#include "rtc_base/race_checker.h" #include "rtc_base/thread_checker.h" #include "voice_engine/transport_feedback_packet_loss_tracker.h" @@ -35,6 +36,8 @@ class ChannelProxy; } // namespace voe namespace internal { +class AudioState; + class AudioSendStream final : public webrtc::AudioSendStream, public webrtc::BitrateAllocatorObserver, public webrtc::PacketFeedbackObserver { @@ -54,6 +57,7 @@ class AudioSendStream final : public webrtc::AudioSendStream, void Reconfigure(const webrtc::AudioSendStream::Config& config) override; void Start() override; void Stop() override; + void SendAudioData(std::unique_ptr audio_frame) override; bool SendTelephoneEvent(int payload_type, int payload_frequency, int event, int duration_ms) override; void SetMuted(bool muted) override; @@ -83,8 +87,12 @@ class AudioSendStream final : public webrtc::AudioSendStream, private: class TimedTransport; + internal::AudioState* audio_state(); + const internal::AudioState* audio_state() const; VoiceEngine* voice_engine() const; + void StoreEncoderProperties(int sample_rate_hz, size_t num_channels); + // These are all static to make it less likely that (the old) config_ is // accessed unintentionally. static void ConfigureStream(AudioSendStream* stream, @@ -105,12 +113,17 @@ class AudioSendStream final : public webrtc::AudioSendStream, rtc::ThreadChecker worker_thread_checker_; rtc::ThreadChecker pacer_thread_checker_; + rtc::RaceChecker audio_capture_race_checker_; rtc::TaskQueue* worker_queue_; webrtc::AudioSendStream::Config config_; rtc::scoped_refptr audio_state_; std::unique_ptr channel_proxy_; RtcEventLog* const event_log_; + int encoder_sample_rate_hz_ = 0; + size_t encoder_num_channels_ = 0; + bool sending_ = false; + BitrateAllocator* const bitrate_allocator_; RtpTransportControllerSendInterface* const transport_; diff --git a/audio/audio_send_stream_unittest.cc b/audio/audio_send_stream_unittest.cc index 145a8e2419..f6056378fe 100644 --- a/audio/audio_send_stream_unittest.cc +++ b/audio/audio_send_stream_unittest.cc @@ -18,6 +18,7 @@ #include "call/fake_rtp_transport_controller_send.h" #include "call/rtp_transport_controller_send_interface.h" #include "logging/rtc_event_log/mock/mock_rtc_event_log.h" +#include "modules/audio_device/include/mock_audio_device.h" #include "modules/audio_mixer/audio_mixer_impl.h" #include "modules/audio_processing/include/audio_processing_statistics.h" #include "modules/audio_processing/include/mock_audio_processing.h" @@ -33,7 +34,6 @@ #include "test/mock_audio_encoder_factory.h" #include "test/mock_voe_channel_proxy.h" #include "test/mock_voice_engine.h" -#include "voice_engine/transmit_mixer.h" namespace webrtc { namespace test { @@ -58,9 +58,6 @@ const double kEchoReturnLoss = -65; const double kEchoReturnLossEnhancement = 101; const double kResidualEchoLikelihood = -1.0f; const double kResidualEchoLikelihoodMax = 23.0f; -const int32_t kSpeechInputLevel = 96; -const double kTotalInputEnergy = 0.25; -const double kTotalInputDuration = 0.5; const CallStatistics kCallStats = { 1345, 1678, 1901, 1234, 112, 13456, 17890, 1567, -1890, -1123}; const ReportBlock kReportBlock = {456, 780, 123, 567, 890, 132, 143, 13354}; @@ -85,14 +82,6 @@ class MockLimitObserver : public BitrateAllocator::LimitObserver { uint32_t max_padding_bitrate_bps)); }; -class MockTransmitMixer : public voe::TransmitMixer { - public: - MOCK_CONST_METHOD0(AudioLevelFullRange, int16_t()); - MOCK_CONST_METHOD0(GetTotalInputEnergy, double()); - MOCK_CONST_METHOD0(GetTotalInputDuration, double()); - MOCK_CONST_METHOD0(typing_noise_detected, bool()); -}; - std::unique_ptr SetupAudioEncoderMock( int payload_type, const SdpAudioFormat& format) { @@ -151,12 +140,12 @@ struct ConfigHelper { audio_encoder_(nullptr) { using testing::Invoke; - EXPECT_CALL(voice_engine_, audio_transport()); - AudioState::Config config; config.voice_engine = &voice_engine_; config.audio_mixer = AudioMixerImpl::Create(); config.audio_processing = audio_processing_; + config.audio_device_module = + new rtc::RefCountedObject(); audio_state_ = AudioState::Create(config); SetupDefaultChannelProxy(audio_bwe_enabled); @@ -301,17 +290,6 @@ struct ConfigHelper { .WillRepeatedly(Return(report_blocks)); EXPECT_CALL(*channel_proxy_, GetANAStatistics()) .WillRepeatedly(Return(ANAStats())); - EXPECT_CALL(voice_engine_, transmit_mixer()) - .WillRepeatedly(Return(&transmit_mixer_)); - - EXPECT_CALL(transmit_mixer_, AudioLevelFullRange()) - .WillRepeatedly(Return(kSpeechInputLevel)); - EXPECT_CALL(transmit_mixer_, GetTotalInputEnergy()) - .WillRepeatedly(Return(kTotalInputEnergy)); - EXPECT_CALL(transmit_mixer_, GetTotalInputDuration()) - .WillRepeatedly(Return(kTotalInputDuration)); - EXPECT_CALL(transmit_mixer_, typing_noise_detected()) - .WillRepeatedly(Return(true)); audio_processing_stats_.echo_return_loss = kEchoReturnLoss; audio_processing_stats_.echo_return_loss_enhancement = @@ -334,7 +312,6 @@ struct ConfigHelper { AudioSendStream::Config stream_config_; testing::StrictMock* channel_proxy_ = nullptr; rtc::scoped_refptr audio_processing_; - MockTransmitMixer transmit_mixer_; AudioProcessingStats audio_processing_stats_; SimulatedClock simulated_clock_; PacketRouter packet_router_; @@ -447,9 +424,9 @@ TEST(AudioSendStreamTest, GetStats) { (kIsacCodec.plfreq / 1000)), stats.jitter_ms); EXPECT_EQ(kCallStats.rttMs, stats.rtt_ms); - EXPECT_EQ(static_cast(kSpeechInputLevel), stats.audio_level); - EXPECT_EQ(kTotalInputEnergy, stats.total_input_energy); - EXPECT_EQ(kTotalInputDuration, stats.total_input_duration); + EXPECT_EQ(0, stats.audio_level); + EXPECT_EQ(0, stats.total_input_energy); + EXPECT_EQ(0, stats.total_input_duration); EXPECT_EQ(kEchoDelayMedian, stats.apm_statistics.delay_median_ms); EXPECT_EQ(kEchoDelayStdDev, stats.apm_statistics.delay_standard_deviation_ms); EXPECT_EQ(kEchoReturnLoss, stats.apm_statistics.echo_return_loss); @@ -461,7 +438,7 @@ TEST(AudioSendStreamTest, GetStats) { stats.apm_statistics.residual_echo_likelihood); EXPECT_EQ(kResidualEchoLikelihoodMax, stats.apm_statistics.residual_echo_likelihood_recent_max); - EXPECT_TRUE(stats.typing_noise_detected); + EXPECT_FALSE(stats.typing_noise_detected); } TEST(AudioSendStreamTest, SendCodecAppliesAudioNetworkAdaptor) { @@ -594,7 +571,5 @@ TEST(AudioSendStreamTest, ReconfigureTransportCcResetsFirst) { } send_stream.Reconfigure(new_config); } - - } // namespace test } // namespace webrtc diff --git a/audio/audio_state.cc b/audio/audio_state.cc index 5a30c53b3d..a83b681252 100644 --- a/audio/audio_state.cc +++ b/audio/audio_state.cc @@ -10,13 +10,16 @@ #include "audio/audio_state.h" +#include +#include +#include + #include "modules/audio_device/include/audio_device.h" #include "rtc_base/atomicops.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" #include "rtc_base/ptr_util.h" #include "rtc_base/thread.h" -#include "voice_engine/transmit_mixer.h" namespace webrtc { namespace internal { @@ -24,15 +27,16 @@ namespace internal { AudioState::AudioState(const AudioState::Config& config) : config_(config), voe_base_(config.voice_engine), - audio_transport_proxy_(voe_base_->audio_transport(), - config_.audio_processing.get(), - config_.audio_mixer) { + audio_transport_(config_.audio_mixer, + config_.audio_processing.get(), + config_.audio_device_module.get()) { process_thread_checker_.DetachFromThread(); RTC_DCHECK(config_.audio_mixer); } AudioState::~AudioState() { RTC_DCHECK(thread_checker_.CalledOnValidThread()); + RTC_DCHECK(sending_streams_.empty()); } VoiceEngine* AudioState::voice_engine() { @@ -47,11 +51,23 @@ rtc::scoped_refptr AudioState::mixer() { bool AudioState::typing_noise_detected() const { RTC_DCHECK(thread_checker_.CalledOnValidThread()); - // TODO(solenberg): Remove const_cast once AudioState owns transmit mixer - // functionality. - voe::TransmitMixer* transmit_mixer = - const_cast(this)->voe_base_->transmit_mixer(); - return transmit_mixer->typing_noise_detected(); + return audio_transport_.typing_noise_detected(); +} + +void AudioState::AddSendingStream(webrtc::AudioSendStream* stream, + int sample_rate_hz, size_t num_channels) { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + auto& properties = sending_streams_[stream]; + properties.sample_rate_hz = sample_rate_hz; + properties.num_channels = num_channels; + UpdateAudioTransportWithSendingStreams(); +} + +void AudioState::RemoveSendingStream(webrtc::AudioSendStream* stream) { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + auto count = sending_streams_.erase(stream); + RTC_DCHECK_EQ(1, count); + UpdateAudioTransportWithSendingStreams(); } void AudioState::SetPlayout(bool enabled) { @@ -61,33 +77,47 @@ void AudioState::SetPlayout(bool enabled) { if (enabled == currently_enabled) { return; } - VoEBase* const voe = VoEBase::GetInterface(voice_engine()); - RTC_DCHECK(voe); if (enabled) { null_audio_poller_.reset(); } // Will stop/start playout of the underlying device, if necessary, and // remember the setting for when it receives subsequent calls of // StartPlayout. - voe->SetPlayout(enabled); + voe_base_->SetPlayout(enabled); if (!enabled) { null_audio_poller_ = - rtc::MakeUnique(&audio_transport_proxy_); + rtc::MakeUnique(&audio_transport_); } - voe->Release(); } void AudioState::SetRecording(bool enabled) { RTC_LOG(INFO) << "SetRecording(" << enabled << ")"; RTC_DCHECK(thread_checker_.CalledOnValidThread()); // TODO(henrika): keep track of state as in SetPlayout(). - VoEBase* const voe = VoEBase::GetInterface(voice_engine()); - RTC_DCHECK(voe); // Will stop/start recording of the underlying device, if necessary, and // remember the setting for when it receives subsequent calls of // StartPlayout. - voe->SetRecording(enabled); - voe->Release(); + voe_base_->SetRecording(enabled); +} + +AudioState::Stats AudioState::GetAudioInputStats() const { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + const voe::AudioLevel& audio_level = audio_transport_.audio_level(); + Stats result; + result.audio_level = audio_level.LevelFullRange(); + RTC_DCHECK_LE(0, result.audio_level); + RTC_DCHECK_GE(32767, result.audio_level); + result.quantized_audio_level = audio_level.Level(); + RTC_DCHECK_LE(0, result.quantized_audio_level); + RTC_DCHECK_GE(9, result.quantized_audio_level); + result.total_energy = audio_level.TotalEnergy(); + result.total_duration = audio_level.TotalDuration(); + return result; +} + +void AudioState::SetStereoChannelSwapping(bool enable) { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + audio_transport_.SetStereoChannelSwapping(enable); } // Reference count; implementation copied from rtc::RefCountedObject. @@ -103,6 +133,20 @@ rtc::RefCountReleaseStatus AudioState::Release() const { } return rtc::RefCountReleaseStatus::kOtherRefsRemained; } + +void AudioState::UpdateAudioTransportWithSendingStreams() { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + std::vector sending_streams; + int max_sample_rate_hz = 8000; + size_t max_num_channels = 1; + for (const auto& kv : sending_streams_) { + sending_streams.push_back(kv.first); + max_sample_rate_hz = std::max(max_sample_rate_hz, kv.second.sample_rate_hz); + max_num_channels = std::max(max_num_channels, kv.second.num_channels); + } + audio_transport_.UpdateSendingStreams(std::move(sending_streams), + max_sample_rate_hz, max_num_channels); +} } // namespace internal rtc::scoped_refptr AudioState::Create( diff --git a/audio/audio_state.h b/audio/audio_state.h index f4bddbfa85..14dc78891b 100644 --- a/audio/audio_state.h +++ b/audio/audio_state.h @@ -11,9 +11,10 @@ #ifndef AUDIO_AUDIO_STATE_H_ #define AUDIO_AUDIO_STATE_H_ +#include #include -#include "audio/audio_transport_proxy.h" +#include "audio/audio_transport_impl.h" #include "audio/null_audio_poller.h" #include "audio/scoped_voe_interface.h" #include "call/audio_state.h" @@ -24,6 +25,9 @@ #include "voice_engine/include/voe_base.h" namespace webrtc { + +class AudioSendStream; + namespace internal { class AudioState final : public webrtc::AudioState { @@ -36,21 +40,30 @@ class AudioState final : public webrtc::AudioState { return config_.audio_processing.get(); } AudioTransport* audio_transport() override { - return &audio_transport_proxy_; + return &audio_transport_; } void SetPlayout(bool enabled) override; void SetRecording(bool enabled) override; + Stats GetAudioInputStats() const override; + void SetStereoChannelSwapping(bool enable) override; + VoiceEngine* voice_engine(); rtc::scoped_refptr mixer(); bool typing_noise_detected() const; + void AddSendingStream(webrtc::AudioSendStream* stream, + int sample_rate_hz, size_t num_channels); + void RemoveSendingStream(webrtc::AudioSendStream* stream); + private: // rtc::RefCountInterface implementation. void AddRef() const override; rtc::RefCountReleaseStatus Release() const override; + void UpdateAudioTransportWithSendingStreams(); + rtc::ThreadChecker thread_checker_; rtc::ThreadChecker process_thread_checker_; const webrtc::AudioState::Config config_; @@ -63,14 +76,20 @@ class AudioState final : public webrtc::AudioState { mutable volatile int ref_count_ = 0; // Transports mixed audio from the mixer to the audio device and - // recorded audio to the VoE AudioTransport. - AudioTransportProxy audio_transport_proxy_; + // recorded audio to the sending streams. + AudioTransportImpl audio_transport_; // Null audio poller is used to continue polling the audio streams if audio // playout is disabled so that audio processing still happens and the audio // stats are still updated. std::unique_ptr null_audio_poller_; + struct StreamProperties { + int sample_rate_hz = 0; + size_t num_channels = 0; + }; + std::map sending_streams_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioState); }; } // namespace internal diff --git a/audio/audio_state_unittest.cc b/audio/audio_state_unittest.cc index 28b0a715f6..abae9857b7 100644 --- a/audio/audio_state_unittest.cc +++ b/audio/audio_state_unittest.cc @@ -9,8 +9,11 @@ */ #include +#include #include "audio/audio_state.h" +#include "call/test/mock_audio_send_stream.h" +#include "modules/audio_device/include/mock_audio_device.h" #include "modules/audio_mixer/audio_mixer_impl.h" #include "modules/audio_processing/include/mock_audio_processing.h" #include "test/gtest.h" @@ -20,30 +23,26 @@ namespace webrtc { namespace test { namespace { -const int kSampleRate = 8000; -const int kNumberOfChannels = 1; -const int kBytesPerSample = 2; +constexpr int kSampleRate = 16000; +constexpr int kNumberOfChannels = 1; struct ConfigHelper { ConfigHelper() : audio_mixer(AudioMixerImpl::Create()) { - EXPECT_CALL(mock_voice_engine, audio_transport()) - .WillRepeatedly(testing::Return(&audio_transport)); - audio_state_config.voice_engine = &mock_voice_engine; audio_state_config.audio_mixer = audio_mixer; audio_state_config.audio_processing = - new rtc::RefCountedObject(); + new rtc::RefCountedObject>(); + audio_state_config.audio_device_module = + new rtc::RefCountedObject(); } AudioState::Config& config() { return audio_state_config; } MockVoiceEngine& voice_engine() { return mock_voice_engine; } rtc::scoped_refptr mixer() { return audio_mixer; } - MockAudioTransport& original_audio_transport() { return audio_transport; } private: testing::StrictMock mock_voice_engine; AudioState::Config audio_state_config; rtc::scoped_refptr audio_mixer; - MockAudioTransport audio_transport; }; class FakeAudioSource : public AudioMixer::Source { @@ -60,12 +59,44 @@ class FakeAudioSource : public AudioMixer::Source { AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame)); }; +std::vector Create10msSilentTestData(int sample_rate_hz, + size_t num_channels) { + const int samples_per_channel = sample_rate_hz / 100; + std::vector audio_data(samples_per_channel * num_channels, 0); + return audio_data; +} + +std::vector Create10msTestData(int sample_rate_hz, + size_t num_channels) { + const int samples_per_channel = sample_rate_hz / 100; + std::vector audio_data(samples_per_channel * num_channels, 0); + // Fill the first channel with a 1kHz sine wave. + const float inc = (2 * 3.14159265f * 1000) / sample_rate_hz; + float w = 0.f; + for (int i = 0; i < samples_per_channel; ++i) { + audio_data[i * num_channels] = + static_cast(32767.f * std::sin(w)); + w += inc; + } + return audio_data; +} + +std::vector ComputeChannelLevels(AudioFrame* audio_frame) { + const size_t num_channels = audio_frame->num_channels_; + const size_t samples_per_channel = audio_frame->samples_per_channel_; + std::vector levels(num_channels, 0); + for (size_t i = 0; i < samples_per_channel; ++i) { + for (size_t j = 0; j < num_channels; ++j) { + levels[j] += std::abs(audio_frame->data()[i * num_channels + j]); + } + } + return levels; +} } // namespace TEST(AudioStateTest, Create) { ConfigHelper helper; - rtc::scoped_refptr audio_state = - AudioState::Create(helper.config()); + auto audio_state = AudioState::Create(helper.config()); EXPECT_TRUE(audio_state.get()); } @@ -82,35 +113,165 @@ TEST(AudioStateTest, GetVoiceEngine) { EXPECT_EQ(audio_state->voice_engine(), &helper.voice_engine()); } -// Test that RecordedDataIsAvailable calls get to the original transport. -TEST(AudioStateAudioPathTest, RecordedAudioArrivesAtOriginalTransport) { +TEST(AudioStateTest, RecordedAudioArrivesAtSingleStream) { ConfigHelper helper; + std::unique_ptr audio_state( + new internal::AudioState(helper.config())); - rtc::scoped_refptr audio_state = - AudioState::Create(helper.config()); + MockAudioSendStream stream; + audio_state->AddSendingStream(&stream, 8000, 2); - // Setup completed. Ensure call of original transport is forwarded to new. - uint32_t new_mic_level; - EXPECT_CALL( - helper.original_audio_transport(), - RecordedDataIsAvailable(nullptr, kSampleRate / 100, kBytesPerSample, - kNumberOfChannels, kSampleRate, 0, 0, 0, false, - testing::Ref(new_mic_level))); + EXPECT_CALL(stream, SendAudioDataForMock(testing::AllOf( + testing::Field(&AudioFrame::sample_rate_hz_, testing::Eq(8000)), + testing::Field(&AudioFrame::num_channels_, testing::Eq(2u))))) + .WillOnce( + // Verify that channels are not swapped by default. + testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + EXPECT_EQ(0u, levels[1]); + })); + MockAudioProcessing* ap = + static_cast(audio_state->audio_processing()); + EXPECT_CALL(*ap, set_stream_delay_ms(0)); + EXPECT_CALL(*ap, set_stream_key_pressed(false)); + EXPECT_CALL(*ap, ProcessStream(testing::_)); + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 2; + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; audio_state->audio_transport()->RecordedDataIsAvailable( - nullptr, kSampleRate / 100, kBytesPerSample, kNumberOfChannels, - kSampleRate, 0, 0, 0, false, new_mic_level); + &audio_data[0], kSampleRate / 100, kNumChannels * 2, + kNumChannels, kSampleRate, 0, 0, 0, false, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream); } -TEST(AudioStateAudioPathTest, - QueryingProxyForAudioShouldResultInGetAudioCallOnMixerSource) { +TEST(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) { ConfigHelper helper; + std::unique_ptr audio_state( + new internal::AudioState(helper.config())); - rtc::scoped_refptr audio_state = - AudioState::Create(helper.config()); + MockAudioSendStream stream_1; + MockAudioSendStream stream_2; + audio_state->AddSendingStream(&stream_1, 8001, 2); + audio_state->AddSendingStream(&stream_2, 32000, 1); + + EXPECT_CALL(stream_1, SendAudioDataForMock(testing::AllOf( + testing::Field(&AudioFrame::sample_rate_hz_, testing::Eq(16000)), + testing::Field(&AudioFrame::num_channels_, testing::Eq(1u))))) + .WillOnce( + // Verify that there is output signal. + testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + })); + EXPECT_CALL(stream_2, SendAudioDataForMock(testing::AllOf( + testing::Field(&AudioFrame::sample_rate_hz_, testing::Eq(16000)), + testing::Field(&AudioFrame::num_channels_, testing::Eq(1u))))) + .WillOnce( + // Verify that there is output signal. + testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_LT(0u, levels[0]); + })); + MockAudioProcessing* ap = + static_cast(audio_state->audio_processing()); + EXPECT_CALL(*ap, set_stream_delay_ms(5)); + EXPECT_CALL(*ap, set_stream_key_pressed(true)); + EXPECT_CALL(*ap, ProcessStream(testing::_)); + + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 1; + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, + kNumChannels, kSampleRate, 5, 0, 0, true, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream_1); + audio_state->RemoveSendingStream(&stream_2); +} + +TEST(AudioStateTest, EnableChannelSwap) { + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 2; + + ConfigHelper helper; + std::unique_ptr audio_state( + new internal::AudioState(helper.config())); + audio_state->SetStereoChannelSwapping(true); + + MockAudioSendStream stream; + audio_state->AddSendingStream(&stream, kSampleRate, kNumChannels); + + EXPECT_CALL(stream, SendAudioDataForMock(testing::_)) + .WillOnce( + // Verify that channels are swapped. + testing::Invoke([](AudioFrame* audio_frame) { + auto levels = ComputeChannelLevels(audio_frame); + EXPECT_EQ(0u, levels[0]); + EXPECT_LT(0u, levels[1]); + })); + + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, + kNumChannels, kSampleRate, 0, 0, 0, false, new_mic_level); + EXPECT_EQ(667u, new_mic_level); + + audio_state->RemoveSendingStream(&stream); +} + +TEST(AudioStateTest, InputLevelStats) { + constexpr int kSampleRate = 16000; + constexpr size_t kNumChannels = 1; + + ConfigHelper helper; + std::unique_ptr audio_state( + new internal::AudioState(helper.config())); + + // Push a silent buffer -> Level stats should be zeros except for duration. + { + auto audio_data = Create10msSilentTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, + kNumChannels, kSampleRate, 0, 0, 0, false, new_mic_level); + auto stats = audio_state->GetAudioInputStats(); + EXPECT_EQ(0, stats.audio_level); + EXPECT_EQ(0, stats.quantized_audio_level); + EXPECT_THAT(stats.total_energy, testing::DoubleEq(0.0)); + EXPECT_THAT(stats.total_duration, testing::DoubleEq(0.01)); + } + + // Push 10 non-silent buffers -> Level stats should be non-zero. + { + auto audio_data = Create10msTestData(kSampleRate, kNumChannels); + uint32_t new_mic_level = 667; + for (int i = 0; i < 10; ++i) { + audio_state->audio_transport()->RecordedDataIsAvailable( + &audio_data[0], kSampleRate / 100, kNumChannels * 2, + kNumChannels, kSampleRate, 0, 0, 0, false, new_mic_level); + } + auto stats = audio_state->GetAudioInputStats(); + EXPECT_EQ(32767, stats.audio_level); + EXPECT_EQ(9, stats.quantized_audio_level); + EXPECT_THAT(stats.total_energy, testing::DoubleEq(0.01)); + EXPECT_THAT(stats.total_duration, testing::DoubleEq(0.11)); + } +} + +TEST(AudioStateTest, + QueryingTransportForAudioShouldResultInGetAudioCallOnMixerSource) { + ConfigHelper helper; + auto audio_state = AudioState::Create(helper.config()); FakeAudioSource fake_source; - helper.mixer()->AddSource(&fake_source); EXPECT_CALL(fake_source, GetAudioFrameWithInfo(testing::_, testing::_)) @@ -127,7 +288,7 @@ TEST(AudioStateAudioPathTest, int64_t elapsed_time_ms; int64_t ntp_time_ms; audio_state->audio_transport()->NeedMorePlayData( - kSampleRate / 100, kBytesPerSample, kNumberOfChannels, kSampleRate, + kSampleRate / 100, kNumberOfChannels * 2, kNumberOfChannels, kSampleRate, audio_buffer, n_samples_out, &elapsed_time_ms, &ntp_time_ms); } } // namespace test diff --git a/audio/audio_transport_impl.cc b/audio/audio_transport_impl.cc new file mode 100644 index 0000000000..586fc4660b --- /dev/null +++ b/audio/audio_transport_impl.cc @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/audio_transport_impl.h" + +#include +#include +#include + +#include "audio/utility/audio_frame_operations.h" +#include "call/audio_send_stream.h" +#include "rtc_base/logging.h" +#include "voice_engine/utility.h" + +namespace webrtc { + +namespace { + +// We want to process at the lowest sample rate and channel count possible +// without losing information. Choose the lowest native rate at least equal to +// the minimum of input and codec rates, choose lowest channel count, and +// configure the audio frame. +void InitializeCaptureFrame(int input_sample_rate, + int send_sample_rate_hz, + size_t input_num_channels, + size_t send_num_channels, + AudioFrame* audio_frame) { + RTC_DCHECK(audio_frame); + int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz); + for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) { + audio_frame->sample_rate_hz_ = native_rate_hz; + if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) { + break; + } + } + audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels); +} + +void ProcessCaptureFrame(int analog_level, + uint32_t delay_ms, + bool key_pressed, + bool swap_stereo_channels, + AudioProcessing* audio_processing, + AudioFrame* audio_frame) { + RTC_DCHECK(audio_processing); + RTC_DCHECK(audio_frame); + RTC_DCHECK( + !audio_processing->echo_cancellation()->is_drift_compensation_enabled()); + GainControl* agc = audio_processing->gain_control(); + int error = agc->set_stream_analog_level(analog_level); + RTC_DCHECK_EQ(0, error) << + "set_stream_analog_level failed: analog_level = " << analog_level; + audio_processing->set_stream_delay_ms(delay_ms); + audio_processing->set_stream_key_pressed(key_pressed); + error = audio_processing->ProcessStream(audio_frame); + RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error; + if (swap_stereo_channels) { + AudioFrameOperations::SwapStereoChannels(audio_frame); + } +} + +// Resample audio in |frame| to given sample rate preserving the +// channel count and place the result in |destination|. +int Resample(const AudioFrame& frame, + const int destination_sample_rate, + PushResampler* resampler, + int16_t* destination) { + const int number_of_channels = static_cast(frame.num_channels_); + const int target_number_of_samples_per_channel = + destination_sample_rate / 100; + resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate, + number_of_channels); + + // TODO(yujo): make resampler take an AudioFrame, and add special case + // handling of muted frames. + return resampler->Resample( + frame.data(), frame.samples_per_channel_ * number_of_channels, + destination, number_of_channels * target_number_of_samples_per_channel); +} +} // namespace + +AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer, + AudioProcessing* audio_processing, + AudioDeviceModule* audio_device_module) + : audio_processing_(audio_processing), + audio_device_module_(audio_device_module), + mixer_(mixer) { + RTC_DCHECK(mixer); + RTC_DCHECK(audio_processing); + RTC_DCHECK(audio_device_module); +} + +AudioTransportImpl::~AudioTransportImpl() {} + +// Not used in Chromium. Process captured audio and distribute to all sending +// streams, and try to do this at the lowest possible sample rate. +int32_t AudioTransportImpl::RecordedDataIsAvailable( + const void* audio_data, + const size_t number_of_frames, + const size_t bytes_per_sample, + const size_t number_of_channels, + const uint32_t sample_rate, + const uint32_t audio_delay_milliseconds, + const int32_t /*clock_drift*/, + const uint32_t volume, + const bool key_pressed, + uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs + RTC_DCHECK(audio_data); + RTC_DCHECK_GE(number_of_channels, 1); + RTC_DCHECK_LE(number_of_channels, 2); + RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); + RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); + RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels, + AudioFrame::kMaxDataSizeBytes); + + // TODO(solenberg): Remove volume handling since it is now always 0. + uint16_t voe_mic_level = 0; + { + constexpr uint32_t kMaxVolumeLevel = 255; + uint32_t max_volume = 0; + + // Check for zero to skip this calculation; the consumer may use this to + // indicate no volume is available. + if (volume != 0) { + // Scale from ADM to VoE level range + if (audio_device_module_->MaxMicrophoneVolume(&max_volume) == 0) { + if (max_volume != 0) { + voe_mic_level = static_cast( + (volume * kMaxVolumeLevel + static_cast(max_volume / 2)) / + max_volume); + } + } + // We learned that on certain systems (e.g Linux) the voe_mic_level + // can be greater than the maxVolumeLevel therefore + // we are going to cap the voe_mic_level to the maxVolumeLevel + // and change the maxVolume to volume if it turns out that + // the voe_mic_level is indeed greater than the maxVolumeLevel. + if (voe_mic_level > kMaxVolumeLevel) { + voe_mic_level = kMaxVolumeLevel; + max_volume = volume; + } + } + } + + int send_sample_rate_hz = 0; + size_t send_num_channels = 0; + bool swap_stereo_channels = false; + { + rtc::CritScope lock(&capture_lock_); + send_sample_rate_hz = send_sample_rate_hz_; + send_num_channels = send_num_channels_; + swap_stereo_channels = swap_stereo_channels_; + } + + std::unique_ptr audio_frame(new AudioFrame()); + InitializeCaptureFrame(sample_rate, send_sample_rate_hz, + number_of_channels, send_num_channels, + audio_frame.get()); + voe::RemixAndResample(static_cast(audio_data), + number_of_frames, number_of_channels, sample_rate, + &capture_resampler_, audio_frame.get()); + ProcessCaptureFrame(voe_mic_level, audio_delay_milliseconds, key_pressed, + swap_stereo_channels, audio_processing_, + audio_frame.get()); + + // Typing detection (utilizes the APM/VAD decision). We let the VAD determine + // if we're using this feature or not. + // TODO(solenberg): is_enabled() takes a lock. Work around that. + bool typing_detected = false; + if (audio_processing_->voice_detection()->is_enabled()) { + if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) { + bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive; + typing_detected = typing_detection_.Process(key_pressed, vad_active); + } + } + + // Measure audio level of speech after all processing. + double sample_duration = static_cast(number_of_frames) / sample_rate; + audio_level_.ComputeLevel(*audio_frame.get(), sample_duration); + + // Copy frame and push to each sending stream. The copy is required since an + // encoding task will be posted internally to each stream. + { + rtc::CritScope lock(&capture_lock_); + typing_noise_detected_ = typing_detected; + + RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); + if (!sending_streams_.empty()) { + auto it = sending_streams_.begin(); + while (++it != sending_streams_.end()) { + std::unique_ptr audio_frame_copy(new AudioFrame()); + audio_frame_copy->CopyFrom(*audio_frame.get()); + (*it)->SendAudioData(std::move(audio_frame_copy)); + } + // Send the original frame to the first stream w/o copying. + (*sending_streams_.begin())->SendAudioData(std::move(audio_frame)); + } + } + + return 0; +} + +// Mix all received streams, feed the result to the AudioProcessing module, then +// resample the result to the requested output rate. +int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples, + const size_t nBytesPerSample, + const size_t nChannels, + const uint32_t samplesPerSec, + void* audioSamples, + size_t& nSamplesOut, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); + RTC_DCHECK_GE(nChannels, 1); + RTC_DCHECK_LE(nChannels, 2); + RTC_DCHECK_GE( + samplesPerSec, + static_cast(AudioProcessing::NativeRate::kSampleRate8kHz)); + + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); + RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, + AudioFrame::kMaxDataSizeBytes); + + mixer_->Mix(nChannels, &mixed_frame_); + *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; + *ntp_time_ms = mixed_frame_.ntp_time_ms_; + + const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_); + RTC_DCHECK_EQ(error, AudioProcessing::kNoError); + + nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_, + static_cast(audioSamples)); + RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples); + return 0; +} + +// Used by Chromium - same as NeedMorePlayData() but because Chrome has its +// own APM instance, does not call audio_processing_->ProcessReverseStream(). +void AudioTransportImpl::PullRenderData(int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames, + void* audio_data, + int64_t* elapsed_time_ms, + int64_t* ntp_time_ms) { + RTC_DCHECK_EQ(bits_per_sample, 16); + RTC_DCHECK_GE(number_of_channels, 1); + RTC_DCHECK_LE(number_of_channels, 2); + RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); + + // 100 = 1 second / data duration (10 ms). + RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); + + // 8 = bits per byte. + RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, + AudioFrame::kMaxDataSizeBytes); + mixer_->Mix(number_of_channels, &mixed_frame_); + *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; + *ntp_time_ms = mixed_frame_.ntp_time_ms_; + + auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_, + static_cast(audio_data)); + RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames); +} + +void AudioTransportImpl::UpdateSendingStreams( + std::vector streams, int send_sample_rate_hz, + size_t send_num_channels) { + rtc::CritScope lock(&capture_lock_); + sending_streams_ = std::move(streams); + send_sample_rate_hz_ = send_sample_rate_hz; + send_num_channels_ = send_num_channels; +} + +void AudioTransportImpl::SetStereoChannelSwapping(bool enable) { + rtc::CritScope lock(&capture_lock_); + swap_stereo_channels_ = enable; +} + +bool AudioTransportImpl::typing_noise_detected() const { + rtc::CritScope lock(&capture_lock_); + return typing_noise_detected_; +} +} // namespace webrtc diff --git a/audio/audio_transport_proxy.h b/audio/audio_transport_impl.h similarity index 56% rename from audio/audio_transport_proxy.h rename to audio/audio_transport_impl.h index a51a7dba31..e7de7e9f48 100644 --- a/audio/audio_transport_proxy.h +++ b/audio/audio_transport_impl.h @@ -8,25 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef AUDIO_AUDIO_TRANSPORT_PROXY_H_ -#define AUDIO_AUDIO_TRANSPORT_PROXY_H_ +#ifndef AUDIO_AUDIO_TRANSPORT_IMPL_H_ +#define AUDIO_AUDIO_TRANSPORT_IMPL_H_ + +#include #include "api/audio/audio_mixer.h" #include "common_audio/resampler/include/push_resampler.h" -#include "modules/audio_device/include/audio_device_defines.h" +#include "modules/audio_device/include/audio_device.h" #include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/typing_detection.h" #include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" #include "rtc_base/scoped_ref_ptr.h" +#include "rtc_base/thread_annotations.h" +#include "voice_engine/audio_level.h" namespace webrtc { -class AudioTransportProxy : public AudioTransport { - public: - AudioTransportProxy(AudioTransport* voe_audio_transport, - AudioProcessing* audio_processing, - AudioMixer* mixer); +class AudioSendStream; - ~AudioTransportProxy() override; +class AudioTransportImpl : public AudioTransport { + public: + AudioTransportImpl(AudioMixer* mixer, + AudioProcessing* audio_processing, + AudioDeviceModule* audio_device_module); + ~AudioTransportImpl() override; int32_t RecordedDataIsAvailable(const void* audioSamples, const size_t nSamples, @@ -48,13 +55,6 @@ class AudioTransportProxy : public AudioTransport { int64_t* elapsed_time_ms, int64_t* ntp_time_ms) override; - void PushCaptureData(int voe_channel, - const void* audio_data, - int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames) override; - void PullRenderData(int bits_per_sample, int sample_rate, size_t number_of_channels, @@ -63,16 +63,38 @@ class AudioTransportProxy : public AudioTransport { int64_t* elapsed_time_ms, int64_t* ntp_time_ms) override; + void UpdateSendingStreams(std::vector streams, + int send_sample_rate_hz, size_t send_num_channels); + void SetStereoChannelSwapping(bool enable); + bool typing_noise_detected() const; + const voe::AudioLevel& audio_level() const { + return audio_level_; + } + private: - AudioTransport* voe_audio_transport_; - AudioProcessing* audio_processing_; + // Shared. + AudioProcessing* audio_processing_ = nullptr; + + // Capture side. + rtc::CriticalSection capture_lock_; + std::vector sending_streams_ RTC_GUARDED_BY(capture_lock_); + int send_sample_rate_hz_ RTC_GUARDED_BY(capture_lock_) = 8000; + size_t send_num_channels_ RTC_GUARDED_BY(capture_lock_) = 1; + bool typing_noise_detected_ RTC_GUARDED_BY(capture_lock_) = false; + bool swap_stereo_channels_ RTC_GUARDED_BY(capture_lock_) = false; + AudioDeviceModule* audio_device_module_ = nullptr; + PushResampler capture_resampler_; + voe::AudioLevel audio_level_; + TypingDetection typing_detection_; + + // Render side. rtc::scoped_refptr mixer_; AudioFrame mixed_frame_; // Converts mixed audio to the audio device output rate. - PushResampler resampler_; + PushResampler render_resampler_; - RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioTransportProxy); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioTransportImpl); }; } // namespace webrtc -#endif // AUDIO_AUDIO_TRANSPORT_PROXY_H_ +#endif // AUDIO_AUDIO_TRANSPORT_IMPL_H_ diff --git a/audio/audio_transport_proxy.cc b/audio/audio_transport_proxy.cc deleted file mode 100644 index e3201ea79b..0000000000 --- a/audio/audio_transport_proxy.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "audio/audio_transport_proxy.h" - -namespace webrtc { - -namespace { -// Resample audio in |frame| to given sample rate preserving the -// channel count and place the result in |destination|. -int Resample(const AudioFrame& frame, - const int destination_sample_rate, - PushResampler* resampler, - int16_t* destination) { - const int number_of_channels = static_cast(frame.num_channels_); - const int target_number_of_samples_per_channel = - destination_sample_rate / 100; - resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate, - number_of_channels); - - // TODO(yujo): make resampler take an AudioFrame, and add special case - // handling of muted frames. - return resampler->Resample( - frame.data(), frame.samples_per_channel_ * number_of_channels, - destination, number_of_channels * target_number_of_samples_per_channel); -} -} // namespace - -AudioTransportProxy::AudioTransportProxy(AudioTransport* voe_audio_transport, - AudioProcessing* audio_processing, - AudioMixer* mixer) - : voe_audio_transport_(voe_audio_transport), - audio_processing_(audio_processing), - mixer_(mixer) { - RTC_DCHECK(voe_audio_transport); - RTC_DCHECK(audio_processing); - RTC_DCHECK(mixer); -} - -AudioTransportProxy::~AudioTransportProxy() {} - -int32_t AudioTransportProxy::RecordedDataIsAvailable( - const void* audioSamples, - const size_t nSamples, - const size_t nBytesPerSample, - const size_t nChannels, - const uint32_t samplesPerSec, - const uint32_t totalDelayMS, - const int32_t clockDrift, - const uint32_t currentMicLevel, - const bool keyPressed, - uint32_t& newMicLevel) { // NOLINT: to avoid changing APIs - // Pass call through to original audio transport instance. - return voe_audio_transport_->RecordedDataIsAvailable( - audioSamples, nSamples, nBytesPerSample, nChannels, samplesPerSec, - totalDelayMS, clockDrift, currentMicLevel, keyPressed, newMicLevel); -} - -int32_t AudioTransportProxy::NeedMorePlayData(const size_t nSamples, - const size_t nBytesPerSample, - const size_t nChannels, - const uint32_t samplesPerSec, - void* audioSamples, - size_t& nSamplesOut, - int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) { - RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); - RTC_DCHECK_GE(nChannels, 1); - RTC_DCHECK_LE(nChannels, 2); - RTC_DCHECK_GE( - samplesPerSec, - static_cast(AudioProcessing::NativeRate::kSampleRate8kHz)); - - // 100 = 1 second / data duration (10 ms). - RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); - RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, - AudioFrame::kMaxDataSizeBytes); - - mixer_->Mix(nChannels, &mixed_frame_); - *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; - *ntp_time_ms = mixed_frame_.ntp_time_ms_; - - const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_); - RTC_DCHECK_EQ(error, AudioProcessing::kNoError); - - nSamplesOut = Resample(mixed_frame_, samplesPerSec, &resampler_, - static_cast(audioSamples)); - RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples); - return 0; -} - -void AudioTransportProxy::PushCaptureData(int voe_channel, - const void* audio_data, - int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames) { - // This is part of deprecated VoE interface operating on specific - // VoE channels. It should not be used. - RTC_NOTREACHED(); -} - -void AudioTransportProxy::PullRenderData(int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames, - void* audio_data, - int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) { - RTC_DCHECK_EQ(bits_per_sample, 16); - RTC_DCHECK_GE(number_of_channels, 1); - RTC_DCHECK_LE(number_of_channels, 2); - RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); - - // 100 = 1 second / data duration (10 ms). - RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); - - // 8 = bits per byte. - RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, - AudioFrame::kMaxDataSizeBytes); - mixer_->Mix(number_of_channels, &mixed_frame_); - *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; - *ntp_time_ms = mixed_frame_.ntp_time_ms_; - - const auto output_samples = Resample(mixed_frame_, sample_rate, &resampler_, - static_cast(audio_data)); - RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames); -} - -} // namespace webrtc diff --git a/call/BUILD.gn b/call/BUILD.gn index 6f936b853e..903df3ba78 100644 --- a/call/BUILD.gn +++ b/call/BUILD.gn @@ -294,4 +294,16 @@ if (rtc_include_tests) { "//testing/gmock", ] } + + rtc_source_set("mock_call_interfaces") { + testonly = true + + sources = [ + "test/mock_audio_send_stream.h", + ] + deps = [ + ":call_interfaces", + "//test:test_support", + ] + } } diff --git a/call/audio_send_stream.h b/call/audio_send_stream.h index 4912182c12..55f4e2fee5 100644 --- a/call/audio_send_stream.h +++ b/call/audio_send_stream.h @@ -28,6 +28,8 @@ namespace webrtc { +class AudioFrame; + // WORK IN PROGRESS // This class is under development and is not yet intended for for use outside // of WebRtc/Libjingle. Please use the VoiceEngine API instead. @@ -146,6 +148,10 @@ class AudioSendStream { // When a stream is stopped, it can't receive, process or deliver packets. virtual void Stop() = 0; + // Encode and send audio. + virtual void SendAudioData( + std::unique_ptr audio_frame) = 0; + // TODO(solenberg): Make payload_type a config property instead. virtual bool SendTelephoneEvent(int payload_type, int payload_frequency, int event, int duration_ms) = 0; diff --git a/call/audio_state.h b/call/audio_state.h index 3f96da1d55..56d5d44075 100644 --- a/call/audio_state.h +++ b/call/audio_state.h @@ -47,6 +47,16 @@ class AudioState : public rtc::RefCountInterface { rtc::scoped_refptr audio_device_module; }; + struct Stats { + // Audio peak level (max(abs())), linearly on the interval [0,32767]. + int32_t audio_level = -1; + // Audio peak level (max(abs())), logarithmically on the interval [0,9]. + int8_t quantized_audio_level = -1; + // See: https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy + double total_energy = 0.0f; + double total_duration = 0.0f; + }; + virtual AudioProcessing* audio_processing() = 0; virtual AudioTransport* audio_transport() = 0; @@ -61,6 +71,9 @@ class AudioState : public rtc::RefCountInterface { // packets will be encoded or transmitted. virtual void SetRecording(bool enabled) = 0; + virtual Stats GetAudioInputStats() const = 0; + virtual void SetStereoChannelSwapping(bool enable) = 0; + // TODO(solenberg): Replace scoped_refptr with shared_ptr once we can use it. static rtc::scoped_refptr Create( const AudioState::Config& config); diff --git a/call/call_perf_tests.cc b/call/call_perf_tests.cc index 90eaa5a4d5..db3b5e4c04 100644 --- a/call/call_perf_tests.cc +++ b/call/call_perf_tests.cc @@ -158,10 +158,8 @@ void CallPerfTest::TestAudioVideoSync(FecMode fec, audio_net_config.queue_delay_ms = 500; audio_net_config.loss_percent = 5; - rtc::scoped_refptr audio_processing; VoiceEngine* voice_engine; VoEBase* voe_base; - std::unique_ptr fake_audio_device; VideoRtcpAndSyncObserver observer(Clock::GetRealTimeClock()); std::map audio_pt_map; @@ -177,14 +175,14 @@ void CallPerfTest::TestAudioVideoSync(FecMode fec, task_queue_.SendTask([&]() { metrics::Reset(); - audio_processing = AudioProcessing::Create(); voice_engine = VoiceEngine::Create(); voe_base = VoEBase::GetInterface(voice_engine); - fake_audio_device = rtc::MakeUnique( - FakeAudioDevice::CreatePulsedNoiseCapturer(256, 48000), - FakeAudioDevice::CreateDiscardRenderer(48000), audio_rtp_speed); + rtc::scoped_refptr fake_audio_device = + new rtc::RefCountedObject( + FakeAudioDevice::CreatePulsedNoiseCapturer(256, 48000), + FakeAudioDevice::CreateDiscardRenderer(48000), audio_rtp_speed); EXPECT_EQ(0, fake_audio_device->Init()); - EXPECT_EQ(0, voe_base->Init(fake_audio_device.get(), audio_processing.get(), + EXPECT_EQ(0, voe_base->Init(fake_audio_device.get(), nullptr, decoder_factory_)); VoEBase::ChannelConfig config; config.enable_voice_pacing = true; @@ -194,7 +192,8 @@ void CallPerfTest::TestAudioVideoSync(FecMode fec, AudioState::Config send_audio_state_config; send_audio_state_config.voice_engine = voice_engine; send_audio_state_config.audio_mixer = AudioMixerImpl::Create(); - send_audio_state_config.audio_processing = audio_processing; + send_audio_state_config.audio_processing = AudioProcessing::Create(); + send_audio_state_config.audio_device_module = fake_audio_device; Call::Config sender_config(event_log_.get()); auto audio_state = AudioState::Create(send_audio_state_config); @@ -311,8 +310,6 @@ void CallPerfTest::TestAudioVideoSync(FecMode fec, DestroyCalls(); VoiceEngine::Delete(voice_engine); - - fake_audio_device.reset(); }); observer.PrintResults(); diff --git a/call/call_unittest.cc b/call/call_unittest.cc index 7b16271584..bf1fe53543 100644 --- a/call/call_unittest.cc +++ b/call/call_unittest.cc @@ -20,7 +20,7 @@ #include "call/fake_rtp_transport_controller_send.h" #include "logging/rtc_event_log/rtc_event_log.h" #include "modules/audio_device/include/mock_audio_device.h" -#include "modules/audio_mixer/audio_mixer_impl.h" +#include "modules/audio_processing/include/mock_audio_processing.h" #include "modules/congestion_controller/include/mock/mock_send_side_congestion_controller.h" #include "modules/pacing/mock/mock_paced_sender.h" #include "modules/rtp_rtcp/include/rtp_rtcp.h" @@ -39,9 +39,12 @@ struct CallHelper { : voice_engine_(decoder_factory) { webrtc::AudioState::Config audio_state_config; audio_state_config.voice_engine = &voice_engine_; - audio_state_config.audio_mixer = webrtc::AudioMixerImpl::Create(); - audio_state_config.audio_processing = webrtc::AudioProcessing::Create(); - EXPECT_CALL(voice_engine_, audio_transport()); + audio_state_config.audio_mixer = + new rtc::RefCountedObject(); + audio_state_config.audio_processing = + new rtc::RefCountedObject(); + audio_state_config.audio_device_module = + new rtc::RefCountedObject(); webrtc::Call::Config config(&event_log_); config.audio_state = webrtc::AudioState::Create(audio_state_config); call_.reset(webrtc::Call::Create(config)); @@ -431,9 +434,6 @@ TEST(CallBitrateTest, TEST(CallTest, RecreatingAudioStreamWithSameSsrcReusesRtpState) { constexpr uint32_t kSSRC = 12345; - testing::NiceMock mock_adm; - rtc::scoped_refptr mock_mixer( - new rtc::RefCountedObject); // There's similar functionality in cricket::VoEWrapper but it's not reachable // from here. Since we're working on removing VoE interfaces, I doubt it's @@ -454,9 +454,13 @@ TEST(CallTest, RecreatingAudioStreamWithSameSsrcReusesRtpState) { AudioState::Config audio_state_config; audio_state_config.voice_engine = voice_engine.voe; - audio_state_config.audio_mixer = mock_mixer; - audio_state_config.audio_processing = AudioProcessing::Create(); - voice_engine.base->Init(&mock_adm, audio_state_config.audio_processing.get(), + audio_state_config.audio_mixer = + new rtc::RefCountedObject(); + audio_state_config.audio_processing = + new rtc::RefCountedObject(); + audio_state_config.audio_device_module = + new rtc::RefCountedObject(); + voice_engine.base->Init(audio_state_config.audio_device_module, nullptr, CreateBuiltinAudioDecoderFactory()); auto audio_state = AudioState::Create(audio_state_config); diff --git a/call/test/mock_audio_send_stream.h b/call/test/mock_audio_send_stream.h new file mode 100644 index 0000000000..4eb1166316 --- /dev/null +++ b/call/test/mock_audio_send_stream.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef CALL_TEST_MOCK_AUDIO_SEND_STREAM_H_ +#define CALL_TEST_MOCK_AUDIO_SEND_STREAM_H_ + +#include + +#include "call/audio_send_stream.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockAudioSendStream : public AudioSendStream { + public: + MOCK_CONST_METHOD0(GetConfig, const webrtc::AudioSendStream::Config&()); + MOCK_METHOD1(Reconfigure, void(const Config& config)); + MOCK_METHOD0(Start, void()); + MOCK_METHOD0(Stop, void()); + // GMock doesn't like move-only types, such as std::unique_ptr. + virtual void SendAudioData( + std::unique_ptr audio_frame) { + SendAudioDataForMock(audio_frame.get()); + } + MOCK_METHOD1(SendAudioDataForMock, + void(webrtc::AudioFrame* audio_frame)); + MOCK_METHOD4(SendTelephoneEvent, + bool(int payload_type, int payload_frequency, int event, + int duration_ms)); + MOCK_METHOD1(SetMuted, void(bool muted)); + MOCK_CONST_METHOD0(GetStats, Stats()); + MOCK_CONST_METHOD1(GetStats, Stats(bool has_remote_tracks)); +}; +} // namespace test +} // namespace webrtc + +#endif // CALL_TEST_MOCK_AUDIO_SEND_STREAM_H_ diff --git a/media/engine/apm_helpers.cc b/media/engine/apm_helpers.cc index eca3e20c4f..cf5dde27dd 100644 --- a/media/engine/apm_helpers.cc +++ b/media/engine/apm_helpers.cc @@ -13,7 +13,6 @@ #include "modules/audio_device/include/audio_device.h" #include "modules/audio_processing/include/audio_processing.h" #include "rtc_base/logging.h" -#include "voice_engine/transmit_mixer.h" namespace webrtc { namespace apm_helpers { @@ -156,17 +155,6 @@ void SetNsStatus(AudioProcessing* apm, bool enable) { void SetTypingDetectionStatus(AudioProcessing* apm, bool enable) { RTC_DCHECK(apm); -#if WEBRTC_VOICE_ENGINE_TYPING_DETECTION - // Typing detection takes place in TransmitMixer::PrepareDemux() and - // TransmitMixer::TypingDetection(). The typing detection algorithm takes as - // input two booleans: - // 1. A signal whether a key was pressed during the audio frame. - // 2. Whether VAD is active or not. - // TransmitMixer will not even call the detector if APM has set kVadUnknown in - // the audio frame after near end processing, so enabling/disabling VAD is - // sufficient for turning typing detection on/off. - // TODO(solenberg): Rather than relying on a side effect, consider forcing the - // feature on/off in TransmitMixer. VoiceDetection* vd = apm->voice_detection(); if (vd->Enable(enable)) { RTC_LOG(LS_ERROR) << "Failed to enable/disable VAD: " << enable; @@ -177,7 +165,6 @@ void SetTypingDetectionStatus(AudioProcessing* apm, bool enable) { return; } RTC_LOG(LS_INFO) << "VAD set to " << enable << " for typing detection."; -#endif } } // namespace apm_helpers } // namespace webrtc diff --git a/media/engine/apm_helpers_unittest.cc b/media/engine/apm_helpers_unittest.cc index 70edce23fe..e1139b236a 100644 --- a/media/engine/apm_helpers_unittest.cc +++ b/media/engine/apm_helpers_unittest.cc @@ -10,13 +10,10 @@ #include "media/engine/apm_helpers.h" -#include "media/engine/webrtcvoe.h" -#include "modules/audio_device/include/mock_audio_device.h" #include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/scoped_ref_ptr.h" #include "test/gmock.h" #include "test/gtest.h" -#include "test/mock_audio_decoder_factory.h" -#include "voice_engine/transmit_mixer.h" namespace webrtc { namespace { @@ -30,19 +27,12 @@ struct TestHelper { config.Set(new ExperimentalAgc(false)); apm_ = rtc::scoped_refptr(AudioProcessing::Create(config)); apm_helpers::Init(apm()); - EXPECT_EQ(0, voe_wrapper_.base()->Init( - &mock_audio_device_, apm_, - MockAudioDecoderFactory::CreateEmptyFactory())); } AudioProcessing* apm() { return apm_.get(); } const AudioProcessing* apm() const { return apm_.get(); } - voe::TransmitMixer* transmit_mixer() { - return voe_wrapper_.base()->transmit_mixer(); - } - bool GetEcMetricsStatus() const { EchoCancellation* ec = apm()->echo_cancellation(); bool metrics_enabled = ec->are_metrics_enabled(); @@ -63,8 +53,6 @@ struct TestHelper { } private: - testing::NiceMock mock_audio_device_; - cricket::VoEWrapper voe_wrapper_; rtc::scoped_refptr apm_; }; } // namespace @@ -236,21 +224,14 @@ TEST(ApmHelpersTest, TypingDetectionStatus_DefaultMode) { EXPECT_FALSE(vd->is_enabled()); } -// TODO(kthelgason): Reenable this test on simulator. -// See bugs.webrtc.org/5569 -#if defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR -#define MAYBE_TypingDetectionStatus_EnableDisable \ - DISABLED_TypingDetectionStatus_EnableDisable -#else -#define MAYBE_TypingDetectionStatus_EnableDisable \ - TypingDetectionStatus_EnableDisable -#endif -TEST(ApmHelpersTest, MAYBE_TypingDetectionStatus_EnableDisable) { +TEST(ApmHelpersTest, TypingDetectionStatus_EnableDisable) { TestHelper helper; VoiceDetection* vd = helper.apm()->voice_detection(); apm_helpers::SetTypingDetectionStatus(helper.apm(), true); + EXPECT_EQ(VoiceDetection::kVeryLowLikelihood, vd->likelihood()); EXPECT_TRUE(vd->is_enabled()); apm_helpers::SetTypingDetectionStatus(helper.apm(), false); + EXPECT_EQ(VoiceDetection::kVeryLowLikelihood, vd->likelihood()); EXPECT_FALSE(vd->is_enabled()); } @@ -260,21 +241,4 @@ TEST(ApmHelpersTest, HighPassFilter_DefaultMode) { TestHelper helper; EXPECT_FALSE(helper.apm()->high_pass_filter()->is_enabled()); } - -// TODO(solenberg): Move this test to a better place - added here for the sake -// of duplicating all relevant tests from audio_processing_test.cc. -TEST(ApmHelpersTest, StereoSwapping_DefaultMode) { - TestHelper helper; - EXPECT_FALSE(helper.transmit_mixer()->IsStereoChannelSwappingEnabled()); -} - -// TODO(solenberg): Move this test to a better place - added here for the sake -// of duplicating all relevant tests from audio_processing_test.cc. -TEST(ApmHelpersTest, StereoSwapping_EnableDisable) { - TestHelper helper; - helper.transmit_mixer()->EnableStereoChannelSwapping(true); - EXPECT_TRUE(helper.transmit_mixer()->IsStereoChannelSwappingEnabled()); - helper.transmit_mixer()->EnableStereoChannelSwapping(false); - EXPECT_FALSE(helper.transmit_mixer()->IsStereoChannelSwappingEnabled()); -} } // namespace webrtc diff --git a/media/engine/fakewebrtccall.h b/media/engine/fakewebrtccall.h index 4544ff1903..6fa71dbca9 100644 --- a/media/engine/fakewebrtccall.h +++ b/media/engine/fakewebrtccall.h @@ -60,7 +60,8 @@ class FakeAudioSendStream final : public webrtc::AudioSendStream { void Start() override { sending_ = true; } void Stop() override { sending_ = false; } - + void SendAudioData(std::unique_ptr audio_frame) override { + } bool SendTelephoneEvent(int payload_type, int payload_frequency, int event, int duration_ms) override; void SetMuted(bool muted) override; diff --git a/media/engine/fakewebrtcvoiceengine.h b/media/engine/fakewebrtcvoiceengine.h index 444afdd69f..163edd8c05 100644 --- a/media/engine/fakewebrtcvoiceengine.h +++ b/media/engine/fakewebrtcvoiceengine.h @@ -17,12 +17,6 @@ #include "media/engine/webrtcvoe.h" #include "rtc_base/checks.h" -namespace webrtc { -namespace voe { -class TransmitMixer; -} // namespace voe -} // namespace webrtc - namespace cricket { #define WEBRTC_CHECK_CHANNEL(channel) \ @@ -41,8 +35,7 @@ class FakeWebRtcVoiceEngine : public webrtc::VoEBase { bool neteq_fast_accelerate = false; }; - explicit FakeWebRtcVoiceEngine(webrtc::voe::TransmitMixer* transmit_mixer) - : transmit_mixer_(transmit_mixer) {} + FakeWebRtcVoiceEngine() {} ~FakeWebRtcVoiceEngine() override { RTC_CHECK(channels_.empty()); } @@ -68,9 +61,6 @@ class FakeWebRtcVoiceEngine : public webrtc::VoEBase { void Terminate() override { inited_ = false; } - webrtc::voe::TransmitMixer* transmit_mixer() override { - return transmit_mixer_; - } WEBRTC_FUNC(CreateChannel, ()) { return CreateChannel(webrtc::VoEBase::ChannelConfig()); } @@ -114,9 +104,8 @@ class FakeWebRtcVoiceEngine : public webrtc::VoEBase { int last_channel_ = -1; std::map channels_; bool fail_create_channel_ = false; - webrtc::voe::TransmitMixer* transmit_mixer_ = nullptr; - RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FakeWebRtcVoiceEngine); + RTC_DISALLOW_COPY_AND_ASSIGN(FakeWebRtcVoiceEngine); }; } // namespace cricket diff --git a/media/engine/webrtcvoe.h b/media/engine/webrtcvoe.h index d303d30ccd..5d5342906e 100644 --- a/media/engine/webrtcvoe.h +++ b/media/engine/webrtcvoe.h @@ -74,7 +74,8 @@ class VoEWrapper { VoEWrapper() : engine_(webrtc::VoiceEngine::Create()), base_(engine_) { } - explicit VoEWrapper(webrtc::VoEBase* base) : engine_(NULL), base_(base) {} + explicit VoEWrapper(webrtc::VoEBase* base) + : engine_(webrtc::VoiceEngine::Create()), base_(base) {} ~VoEWrapper() {} webrtc::VoiceEngine* engine() const { return engine_.get(); } webrtc::VoEBase* base() const { return base_.get(); } diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc index 061a16b442..98f0aa49ca 100644 --- a/media/engine/webrtcvoiceengine.cc +++ b/media/engine/webrtcvoiceengine.cc @@ -44,7 +44,6 @@ #include "rtc_base/trace_event.h" #include "system_wrappers/include/field_trial.h" #include "system_wrappers/include/metrics.h" -#include "voice_engine/transmit_mixer.h" namespace cricket { namespace { @@ -158,21 +157,6 @@ rtc::Optional GetAudioNetworkAdaptorConfig( return rtc::nullopt; } -webrtc::AudioState::Config MakeAudioStateConfig( - VoEWrapper* voe_wrapper, - rtc::scoped_refptr audio_mixer, - rtc::scoped_refptr audio_processing) { - webrtc::AudioState::Config config; - config.voice_engine = voe_wrapper->engine(); - if (audio_mixer) { - config.audio_mixer = audio_mixer; - } else { - config.audio_mixer = webrtc::AudioMixerImpl::Create(); - } - config.audio_processing = audio_processing; - return config; -} - // |max_send_bitrate_bps| is the bitrate from "b=" in SDP. // |rtp_max_bitrate_bps| is the bitrate from RtpSender::SetParameters. rtc::Optional ComputeSendBitrate(int max_send_bitrate_bps, @@ -299,9 +283,24 @@ void WebRtcVoiceEngine::Init() { RTC_CHECK(adm()); webrtc::adm_helpers::Init(adm()); webrtc::apm_helpers::Init(apm()); - RTC_CHECK_EQ(0, voe_wrapper_->base()->Init(adm(), apm(), decoder_factory_)); - transmit_mixer_ = voe_wrapper_->base()->transmit_mixer(); - RTC_DCHECK(transmit_mixer_); + RTC_CHECK_EQ(0, voe_wrapper_->base()->Init(adm(), nullptr, decoder_factory_)); + + // Set up AudioState. + { + webrtc::AudioState::Config config; + config.voice_engine = voe()->engine(); + if (audio_mixer_) { + config.audio_mixer = audio_mixer_; + } else { + config.audio_mixer = webrtc::AudioMixerImpl::Create(); + } + config.audio_processing = apm_; + config.audio_device_module = adm_; + audio_state_ = webrtc::AudioState::Create(config); + } + + // Connect the ADM to our audio path. + adm()->RegisterAudioCallback(audio_state()->audio_transport()); // Save the default AGC configuration settings. This must happen before // calling ApplyOptions or the default will be overwritten. @@ -329,15 +328,6 @@ void WebRtcVoiceEngine::Init() { RTC_DCHECK(error); } - // May be null for VoE injected for testing. - if (voe()->engine()) { - audio_state_ = webrtc::AudioState::Create( - MakeAudioStateConfig(voe(), audio_mixer_, apm_)); - - // Connect the ADM to our audio path. - adm()->RegisterAudioCallback(audio_state_->audio_transport()); - } - initialized_ = true; } @@ -531,7 +521,7 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { if (options.stereo_swapping) { RTC_LOG(LS_INFO) << "Stereo swapping enabled? " << *options.stereo_swapping; - transmit_mixer()->EnableStereoChannelSwapping(*options.stereo_swapping); + audio_state()->SetStereoChannelSwapping(*options.stereo_swapping); } if (options.audio_jitter_buffer_max_packets) { @@ -623,9 +613,7 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { // TODO(solenberg): Remove, once AudioMonitor is gone. int WebRtcVoiceEngine::GetInputLevel() { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); - int8_t level = transmit_mixer()->AudioLevel(); - RTC_DCHECK_LE(0, level); - return level; + return audio_state()->GetAudioInputStats().quantized_audio_level; } const std::vector& WebRtcVoiceEngine::send_codecs() const { @@ -709,10 +697,10 @@ webrtc::AudioProcessing* WebRtcVoiceEngine::apm() const { return apm_.get(); } -webrtc::voe::TransmitMixer* WebRtcVoiceEngine::transmit_mixer() { +webrtc::AudioState* WebRtcVoiceEngine::audio_state() { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); - RTC_DCHECK(transmit_mixer_); - return transmit_mixer_; + RTC_DCHECK(audio_state_); + return audio_state_.get(); } AudioCodecs WebRtcVoiceEngine::CollectCodecs( @@ -796,7 +784,6 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream public: WebRtcAudioSendStream( int ch, - webrtc::AudioTransport* voe_audio_transport, uint32_t ssrc, const std::string& c_name, const std::string track_id, @@ -808,16 +795,13 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream webrtc::Call* call, webrtc::Transport* send_transport, const rtc::scoped_refptr& encoder_factory) - : voe_audio_transport_(voe_audio_transport), - call_(call), + : call_(call), config_(send_transport), send_side_bwe_with_overhead_( webrtc::field_trial::IsEnabled("WebRTC-SendSideBwe-WithOverhead")), max_send_bitrate_bps_(max_send_bitrate_bps), rtp_parameters_(CreateRtpParametersWithOneEncoding()) { RTC_DCHECK_GE(ch, 0); - // TODO(solenberg): Once we're not using FakeWebRtcVoiceEngine anymore: - // RTC_DCHECK(voe_audio_transport); RTC_DCHECK(call); RTC_DCHECK(encoder_factory); config_.rtp.ssrc = ssrc; @@ -952,11 +936,18 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream int sample_rate, size_t number_of_channels, size_t number_of_frames) override { + RTC_DCHECK_EQ(16, bits_per_sample); RTC_CHECK_RUNS_SERIALIZED(&audio_capture_race_checker_); - RTC_DCHECK(voe_audio_transport_); - voe_audio_transport_->PushCaptureData(config_.voe_channel_id, audio_data, - bits_per_sample, sample_rate, - number_of_channels, number_of_frames); + RTC_DCHECK(stream_); + std::unique_ptr audio_frame(new webrtc::AudioFrame()); + audio_frame->UpdateFrame(audio_frame->timestamp_, + static_cast(audio_data), + number_of_frames, + sample_rate, + audio_frame->speech_type_, + audio_frame->vad_activity_, + number_of_channels); + stream_->SendAudioData(std::move(audio_frame)); } // Callback from the |source_| when it is going away. In case Start() has @@ -1119,7 +1110,6 @@ class WebRtcVoiceMediaChannel::WebRtcAudioSendStream rtc::ThreadChecker worker_thread_checker_; rtc::RaceChecker audio_capture_race_checker_; - webrtc::AudioTransport* const voe_audio_transport_ = nullptr; webrtc::Call* call_ = nullptr; webrtc::AudioSendStream::Config config_; const bool send_side_bwe_with_overhead_; @@ -1821,17 +1811,12 @@ bool WebRtcVoiceMediaChannel::AddSendStream(const StreamParams& sp) { return false; } - // Save the channel to send_streams_, so that RemoveSendStream() can still - // delete the channel in case failure happens below. - webrtc::AudioTransport* audio_transport = - engine()->voe()->base()->audio_transport(); - rtc::Optional audio_network_adaptor_config = GetAudioNetworkAdaptorConfig(options_); WebRtcAudioSendStream* stream = new WebRtcAudioSendStream( - channel, audio_transport, ssrc, sp.cname, sp.id, send_codec_spec_, - send_rtp_extensions_, max_send_bitrate_bps_, audio_network_adaptor_config, - call_, this, engine()->encoder_factory_); + channel, ssrc, sp.cname, sp.id, send_codec_spec_, send_rtp_extensions_, + max_send_bitrate_bps_, audio_network_adaptor_config, call_, this, + engine()->encoder_factory_); send_streams_.insert(std::make_pair(ssrc, stream)); // At this point the stream's local SSRC has been updated. If it is the first diff --git a/media/engine/webrtcvoiceengine.h b/media/engine/webrtcvoiceengine.h index 8984299638..e5880e29da 100644 --- a/media/engine/webrtcvoiceengine.h +++ b/media/engine/webrtcvoiceengine.h @@ -32,12 +32,6 @@ #include "rtc_base/task_queue.h" #include "rtc_base/thread_checker.h" -namespace webrtc { -namespace voe { -class TransmitMixer; -} // namespace voe -} // namespace webrtc - namespace cricket { class AudioDeviceModule; @@ -115,7 +109,7 @@ class WebRtcVoiceEngine final { webrtc::AudioDeviceModule* adm(); webrtc::AudioProcessing* apm() const; - webrtc::voe::TransmitMixer* transmit_mixer(); + webrtc::AudioState* audio_state(); AudioCodecs CollectCodecs( const std::vector& specs) const; @@ -123,15 +117,13 @@ class WebRtcVoiceEngine final { rtc::ThreadChecker signal_thread_checker_; rtc::ThreadChecker worker_thread_checker_; - // The audio device manager. + // The audio device module. rtc::scoped_refptr adm_; rtc::scoped_refptr encoder_factory_; rtc::scoped_refptr decoder_factory_; rtc::scoped_refptr audio_mixer_; - // Reference to the APM, owned by VoE. + // The audio processing module. rtc::scoped_refptr apm_; - // Reference to the TransmitMixer, owned by VoE. - webrtc::voe::TransmitMixer* transmit_mixer_ = nullptr; // The primary instance of WebRtc VoiceEngine. std::unique_ptr voe_wrapper_; rtc::scoped_refptr audio_state_; diff --git a/media/engine/webrtcvoiceengine_unittest.cc b/media/engine/webrtcvoiceengine_unittest.cc index 4fd087ac1a..4df63b398c 100644 --- a/media/engine/webrtcvoiceengine_unittest.cc +++ b/media/engine/webrtcvoiceengine_unittest.cc @@ -33,7 +33,6 @@ #include "test/gtest.h" #include "test/mock_audio_decoder_factory.h" #include "test/mock_audio_encoder_factory.h" -#include "voice_engine/transmit_mixer.h" using testing::_; using testing::ContainerEq; @@ -78,27 +77,13 @@ constexpr webrtc::GainControl::Mode kDefaultAgcMode = constexpr webrtc::NoiseSuppression::Level kDefaultNsLevel = webrtc::NoiseSuppression::kHigh; -class FakeVoEWrapper : public cricket::VoEWrapper { - public: - explicit FakeVoEWrapper(cricket::FakeWebRtcVoiceEngine* engine) - : cricket::VoEWrapper(engine) { - } -}; - -class MockTransmitMixer : public webrtc::voe::TransmitMixer { - public: - MockTransmitMixer() = default; - virtual ~MockTransmitMixer() = default; - - MOCK_METHOD1(EnableStereoChannelSwapping, void(bool enable)); -}; - void AdmSetupExpectations(webrtc::test::MockAudioDeviceModule* adm) { RTC_DCHECK(adm); // Setup. - EXPECT_CALL(*adm, AddRef()).Times(1); + EXPECT_CALL(*adm, AddRef()).Times(3); EXPECT_CALL(*adm, Init()).WillOnce(Return(0)); + EXPECT_CALL(*adm, RegisterAudioCallback(_)).WillOnce(Return(0)); #if defined(WEBRTC_WIN) EXPECT_CALL(*adm, SetPlayoutDevice( testing::Matcher( @@ -130,8 +115,8 @@ void AdmSetupExpectations(webrtc::test::MockAudioDeviceModule* adm) { EXPECT_CALL(*adm, StopRecording()).WillOnce(Return(0)); EXPECT_CALL(*adm, RegisterAudioCallback(nullptr)).WillOnce(Return(0)); EXPECT_CALL(*adm, Terminate()).WillOnce(Return(0)); - EXPECT_CALL(*adm, Release()) - .WillOnce(Return(rtc::RefCountReleaseStatus::kDroppedLastRef)); + EXPECT_CALL(*adm, Release()).Times(3) + .WillRepeatedly(Return(rtc::RefCountReleaseStatus::kDroppedLastRef)); } } // namespace @@ -147,15 +132,13 @@ TEST(WebRtcVoiceEngineTestStubLibrary, StartupShutdown) { EXPECT_CALL(*apm, ApplyConfig(_)).WillRepeatedly(SaveArg<0>(&apm_config)); EXPECT_CALL(*apm, SetExtraOptions(testing::_)); EXPECT_CALL(*apm, DetachAecDump()); - StrictMock transmit_mixer; - EXPECT_CALL(transmit_mixer, EnableStereoChannelSwapping(false)); - cricket::FakeWebRtcVoiceEngine voe(&transmit_mixer); + cricket::FakeWebRtcVoiceEngine voe; EXPECT_FALSE(voe.IsInited()); { cricket::WebRtcVoiceEngine engine( &adm, webrtc::MockAudioEncoderFactory::CreateUnusedFactory(), webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm, - new FakeVoEWrapper(&voe)); + new cricket::VoEWrapper(&voe)); engine.Init(); EXPECT_TRUE(voe.IsInited()); } @@ -183,7 +166,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test { apm_ns_(*apm_->noise_suppression()), apm_vd_(*apm_->voice_detection()), call_(webrtc::Call::Config(&event_log_)), - voe_(&transmit_mixer_), + voe_(), override_field_trials_(field_trials) { // AudioDeviceModule. AdmSetupExpectations(&adm_); @@ -202,7 +185,6 @@ class WebRtcVoiceEngineTestFake : public testing::Test { EXPECT_CALL(apm_ns_, set_level(kDefaultNsLevel)).WillOnce(Return(0)); EXPECT_CALL(apm_ns_, Enable(true)).WillOnce(Return(0)); EXPECT_CALL(apm_vd_, Enable(true)).WillOnce(Return(0)); - EXPECT_CALL(transmit_mixer_, EnableStereoChannelSwapping(false)); // Init does not overwrite default AGC config. EXPECT_CALL(apm_gc_, target_level_dbfs()).WillOnce(Return(1)); EXPECT_CALL(apm_gc_, compression_gain_db()).WillRepeatedly(Return(5)); @@ -214,9 +196,9 @@ class WebRtcVoiceEngineTestFake : public testing::Test { // factories. Those tests should probably be moved elsewhere. auto encoder_factory = webrtc::CreateBuiltinAudioEncoderFactory(); auto decoder_factory = webrtc::CreateBuiltinAudioDecoderFactory(); - engine_.reset(new cricket::WebRtcVoiceEngine(&adm_, encoder_factory, - decoder_factory, nullptr, apm_, - new FakeVoEWrapper(&voe_))); + engine_.reset(new cricket::WebRtcVoiceEngine( + &adm_, encoder_factory, decoder_factory, nullptr, apm_, + new cricket::VoEWrapper(&voe_))); engine_->Init(); send_parameters_.codecs.push_back(kPcmuCodec); recv_parameters_.codecs.push_back(kPcmuCodec); @@ -723,7 +705,6 @@ class WebRtcVoiceEngineTestFake : public testing::Test { webrtc::test::MockEchoCancellation& apm_ec_; webrtc::test::MockNoiseSuppression& apm_ns_; webrtc::test::MockVoiceDetection& apm_vd_; - StrictMock transmit_mixer_; webrtc::RtcEventLogNullImpl event_log_; cricket::FakeCall call_; cricket::FakeWebRtcVoiceEngine voe_; @@ -2781,7 +2762,7 @@ TEST_F(WebRtcVoiceEngineTestFake, SetAudioOptions) { send_parameters_.options.auto_gain_control = true; SetSendParameters(send_parameters_); - // Turn off other options (and stereo swapping on). + // Turn off other options. EXPECT_CALL(apm_ec_, Enable(true)).WillOnce(Return(0)); EXPECT_CALL(apm_ec_, enable_metrics(true)).WillOnce(Return(0)); EXPECT_CALL(apm_gc_, set_mode(kDefaultAgcMode)).WillOnce(Return(0)); @@ -2789,7 +2770,6 @@ TEST_F(WebRtcVoiceEngineTestFake, SetAudioOptions) { EXPECT_CALL(apm_ns_, set_level(kDefaultNsLevel)).WillOnce(Return(0)); EXPECT_CALL(apm_ns_, Enable(false)).WillOnce(Return(0)); EXPECT_CALL(apm_vd_, Enable(false)).WillOnce(Return(0)); - EXPECT_CALL(transmit_mixer_, EnableStereoChannelSwapping(true)); send_parameters_.options.noise_suppression = false; send_parameters_.options.highpass_filter = false; send_parameters_.options.typing_detection = false; @@ -2805,7 +2785,6 @@ TEST_F(WebRtcVoiceEngineTestFake, SetAudioOptions) { EXPECT_CALL(apm_ns_, set_level(kDefaultNsLevel)).WillOnce(Return(0)); EXPECT_CALL(apm_ns_, Enable(false)).WillOnce(Return(0)); EXPECT_CALL(apm_vd_, Enable(false)).WillOnce(Return(0)); - EXPECT_CALL(transmit_mixer_, EnableStereoChannelSwapping(true)); SetSendParameters(send_parameters_); } @@ -3335,9 +3314,9 @@ TEST(WebRtcVoiceEngineTest, StartupShutdown) { // Tests that reference counting on the external ADM is correct. TEST(WebRtcVoiceEngineTest, StartupShutdownWithExternalADM) { testing::NiceMock adm; - EXPECT_CALL(adm, AddRef()).Times(3); + EXPECT_CALL(adm, AddRef()).Times(5); EXPECT_CALL(adm, Release()) - .Times(3) + .Times(5) .WillRepeatedly(Return(rtc::RefCountReleaseStatus::kDroppedLastRef)); { rtc::scoped_refptr apm = diff --git a/modules/audio_device/BUILD.gn b/modules/audio_device/BUILD.gn index 6c1914619f..38ce742f90 100644 --- a/modules/audio_device/BUILD.gn +++ b/modules/audio_device/BUILD.gn @@ -104,6 +104,7 @@ rtc_source_set("audio_device_generic") { "../../api:array_view", "../../common_audio", "../../rtc_base:checks", + "../../rtc_base:deprecation", "../../rtc_base:rtc_base_approved", "../../rtc_base:rtc_task_queue", "../../system_wrappers", diff --git a/modules/audio_device/include/audio_device_defines.h b/modules/audio_device/include/audio_device_defines.h index 510b07c049..04119f6511 100644 --- a/modules/audio_device/include/audio_device_defines.h +++ b/modules/audio_device/include/audio_device_defines.h @@ -13,6 +13,8 @@ #include +#include "rtc_base/checks.h" +#include "rtc_base/deprecation.h" #include "typedefs.h" // NOLINT(build/include) namespace webrtc { @@ -54,17 +56,19 @@ class AudioTransport { // The data will not undergo audio processing. // |voe_channel| is the id of the VoE channel which is the sink to the // capture data. - virtual void PushCaptureData(int voe_channel, - const void* audio_data, - int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames) = 0; + // TODO(bugs.webrtc.org/8659): Remove this method once clients updated. + RTC_DEPRECATED virtual void PushCaptureData( + int voe_channel, + const void* audio_data, + int bits_per_sample, + int sample_rate, + size_t number_of_channels, + size_t number_of_frames) { + RTC_NOTREACHED(); + } // Method to pull mixed render audio data from all active VoE channels. // The data will not be passed as reference for audio processing internally. - // TODO(xians): Support getting the unmixed render data from specific VoE - // channel. virtual void PullRenderData(int bits_per_sample, int sample_rate, size_t number_of_channels, diff --git a/test/call_test.cc b/test/call_test.cc index 3210e6460d..ceee841c15 100644 --- a/test/call_test.cc +++ b/test/call_test.cc @@ -44,14 +44,12 @@ CallTest::CallTest() num_flexfec_streams_(0), decoder_factory_(CreateBuiltinAudioDecoderFactory()), encoder_factory_(CreateBuiltinAudioEncoderFactory()), - task_queue_("CallTestTaskQueue"), - fake_send_audio_device_(nullptr), - fake_recv_audio_device_(nullptr) {} + task_queue_("CallTestTaskQueue") {} CallTest::~CallTest() { task_queue_.SendTask([this]() { - fake_send_audio_device_.reset(); - fake_recv_audio_device_.reset(); + fake_send_audio_device_ = nullptr; + fake_recv_audio_device_ = nullptr; frame_generator_capturer_.reset(); }); } @@ -74,6 +72,7 @@ void CallTest::RunBaseTest(BaseTest* test) { audio_state_config.voice_engine = voe_send_.voice_engine; audio_state_config.audio_mixer = AudioMixerImpl::Create(); audio_state_config.audio_processing = apm_send_; + audio_state_config.audio_device_module = fake_send_audio_device_; send_config.audio_state = AudioState::Create(audio_state_config); fake_send_audio_device_->RegisterAudioCallback( send_config.audio_state->audio_transport()); @@ -90,6 +89,7 @@ void CallTest::RunBaseTest(BaseTest* test) { audio_state_config.voice_engine = voe_recv_.voice_engine; audio_state_config.audio_mixer = AudioMixerImpl::Create(); audio_state_config.audio_processing = apm_recv_; + audio_state_config.audio_device_module = fake_recv_audio_device_; recv_config.audio_state = AudioState::Create(audio_state_config); fake_recv_audio_device_->RegisterAudioCallback( recv_config.audio_state->audio_transport()); } @@ -315,10 +315,10 @@ void CallTest::CreateFrameGeneratorCapturer(int framerate, void CallTest::CreateFakeAudioDevices( std::unique_ptr capturer, std::unique_ptr renderer) { - fake_send_audio_device_.reset(new FakeAudioDevice( - std::move(capturer), nullptr, 1.f)); - fake_recv_audio_device_.reset(new FakeAudioDevice( - nullptr, std::move(renderer), 1.f)); + fake_send_audio_device_ = new rtc::RefCountedObject( + std::move(capturer), nullptr, 1.f); + fake_recv_audio_device_ = new rtc::RefCountedObject( + nullptr, std::move(renderer), 1.f); } void CallTest::CreateVideoStreams() { @@ -432,7 +432,7 @@ void CallTest::CreateVoiceEngines() { voe_send_.base = VoEBase::GetInterface(voe_send_.voice_engine); EXPECT_EQ(0, fake_send_audio_device_->Init()); EXPECT_EQ(0, voe_send_.base->Init(fake_send_audio_device_.get(), - apm_send_.get(), decoder_factory_)); + nullptr, decoder_factory_)); VoEBase::ChannelConfig config; config.enable_voice_pacing = true; voe_send_.channel_id = voe_send_.base->CreateChannel(config); @@ -442,7 +442,7 @@ void CallTest::CreateVoiceEngines() { voe_recv_.base = VoEBase::GetInterface(voe_recv_.voice_engine); EXPECT_EQ(0, fake_recv_audio_device_->Init()); EXPECT_EQ(0, voe_recv_.base->Init(fake_recv_audio_device_.get(), - apm_recv_.get(), decoder_factory_)); + nullptr, decoder_factory_)); voe_recv_.channel_id = voe_recv_.base->CreateChannel(); EXPECT_GE(voe_recv_.channel_id, 0); } diff --git a/test/call_test.h b/test/call_test.h index cf15aacf11..11ded0b893 100644 --- a/test/call_test.h +++ b/test/call_test.h @@ -161,10 +161,8 @@ class CallTest : public ::testing::Test { VoiceEngineState voe_recv_; rtc::scoped_refptr apm_send_; rtc::scoped_refptr apm_recv_; - - // The audio devices must outlive the voice engines. - std::unique_ptr fake_send_audio_device_; - std::unique_ptr fake_recv_audio_device_; + rtc::scoped_refptr fake_send_audio_device_; + rtc::scoped_refptr fake_recv_audio_device_; }; class BaseTest : public RtpRtcpObserver { diff --git a/test/mock_voe_channel_proxy.h b/test/mock_voe_channel_proxy.h index 9bc48b10a0..e075124552 100644 --- a/test/mock_voe_channel_proxy.h +++ b/test/mock_voe_channel_proxy.h @@ -22,9 +22,9 @@ namespace test { class MockVoEChannelProxy : public voe::ChannelProxy { public: - // GTest doesn't like move-only types, like std::unique_ptr - bool SetEncoder(int payload_type, - std::unique_ptr encoder) { + // GMock doesn't like move-only types, like std::unique_ptr. + virtual bool SetEncoder(int payload_type, + std::unique_ptr encoder) { return SetEncoderForMock(payload_type, &encoder); } MOCK_METHOD2(SetEncoderForMock, @@ -77,6 +77,12 @@ class MockVoEChannelProxy : public voe::ChannelProxy { AudioMixer::Source::AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame)); MOCK_CONST_METHOD0(PreferredSampleRate, int()); + // GMock doesn't like move-only types, like std::unique_ptr. + virtual void ProcessAndEncodeAudio(std::unique_ptr audio_frame) { + ProcessAndEncodeAudioForMock(&audio_frame); + } + MOCK_METHOD1(ProcessAndEncodeAudioForMock, + void(std::unique_ptr* audio_frame)); MOCK_METHOD1(SetTransportOverhead, void(int transport_overhead_per_packet)); MOCK_METHOD1(AssociateSendChannel, void(const ChannelProxy& send_channel_proxy)); diff --git a/test/mock_voice_engine.h b/test/mock_voice_engine.h index 57c6b58c67..5c966bd76c 100644 --- a/test/mock_voice_engine.h +++ b/test/mock_voice_engine.h @@ -13,17 +13,12 @@ #include -#include "modules/audio_device/include/mock_audio_transport.h" #include "modules/rtp_rtcp/mocks/mock_rtp_rtcp.h" #include "test/gmock.h" #include "test/mock_voe_channel_proxy.h" #include "voice_engine/voice_engine_impl.h" namespace webrtc { -namespace voe { -class TransmitMixer; -} // namespace voe - namespace test { // NOTE: This class inherits from VoiceEngineImpl so that its clients will be @@ -61,9 +56,6 @@ class MockVoiceEngine : public VoiceEngineImpl { testing::SetArgPointee<0>(GetMockRtpRtcp(channel_id))); return proxy; })); - - ON_CALL(*this, audio_transport()) - .WillByDefault(testing::Return(&mock_audio_transport_)); } virtual ~MockVoiceEngine() /* override */ { // Decrease ref count before base class d-tor is called; otherwise it will @@ -94,7 +86,6 @@ class MockVoiceEngine : public VoiceEngineImpl { int(AudioDeviceModule* external_adm, AudioProcessing* external_apm, const rtc::scoped_refptr& decoder_factory)); - MOCK_METHOD0(transmit_mixer, voe::TransmitMixer*()); MOCK_METHOD0(Terminate, void()); MOCK_METHOD0(CreateChannel, int()); MOCK_METHOD1(CreateChannel, int(const ChannelConfig& config)); @@ -103,7 +94,6 @@ class MockVoiceEngine : public VoiceEngineImpl { MOCK_METHOD1(StopPlayout, int(int channel)); MOCK_METHOD1(StartSend, int(int channel)); MOCK_METHOD1(StopSend, int(int channel)); - MOCK_METHOD0(audio_transport, AudioTransport*()); private: // TODO(ossu): I'm not particularly happy about keeping the decoder factory @@ -115,8 +105,6 @@ class MockVoiceEngine : public VoiceEngineImpl { rtc::scoped_refptr decoder_factory_; std::map> mock_rtp_rtcps_; - - MockAudioTransport mock_audio_transport_; }; } // namespace test } // namespace webrtc diff --git a/video/video_quality_test.cc b/video/video_quality_test.cc index 30a49d48fd..0aa12bb905 100644 --- a/video/video_quality_test.cc +++ b/video/video_quality_test.cc @@ -93,12 +93,11 @@ struct VoiceEngineState { void CreateVoiceEngine( VoiceEngineState* voe, webrtc::AudioDeviceModule* adm, - webrtc::AudioProcessing* apm, rtc::scoped_refptr decoder_factory) { voe->voice_engine = webrtc::VoiceEngine::Create(); voe->base = webrtc::VoEBase::GetInterface(voe->voice_engine); EXPECT_EQ(0, adm->Init()); - EXPECT_EQ(0, voe->base->Init(adm, apm, decoder_factory)); + EXPECT_EQ(0, voe->base->Init(adm, nullptr, decoder_factory)); webrtc::VoEBase::ChannelConfig config; config.enable_voice_pacing = true; voe->send_channel_id = voe->base->CreateChannel(config); @@ -1961,7 +1960,6 @@ void VideoQualityTest::SetupAudio(int send_channel_id, void VideoQualityTest::RunWithRenderers(const Params& params) { std::unique_ptr send_transport; std::unique_ptr recv_transport; - std::unique_ptr fake_audio_device; ::VoiceEngineState voe; std::unique_ptr local_preview; std::vector> loopback_renderers; @@ -1976,21 +1974,19 @@ void VideoQualityTest::RunWithRenderers(const Params& params) { Call::Config call_config(event_log_.get()); call_config.bitrate_config = params_.call.call_bitrate_config; - fake_audio_device.reset(new test::FakeAudioDevice( - test::FakeAudioDevice::CreatePulsedNoiseCapturer(32000, 48000), - test::FakeAudioDevice::CreateDiscardRenderer(48000), - 1.f)); - - rtc::scoped_refptr audio_processing( - webrtc::AudioProcessing::Create()); + rtc::scoped_refptr fake_audio_device = + new rtc::RefCountedObject( + test::FakeAudioDevice::CreatePulsedNoiseCapturer(32000, 48000), + test::FakeAudioDevice::CreateDiscardRenderer(48000), + 1.f); if (params_.audio.enabled) { - CreateVoiceEngine(&voe, fake_audio_device.get(), audio_processing.get(), - decoder_factory_); + CreateVoiceEngine(&voe, fake_audio_device.get(), decoder_factory_); AudioState::Config audio_state_config; audio_state_config.voice_engine = voe.voice_engine; audio_state_config.audio_mixer = AudioMixerImpl::Create(); - audio_state_config.audio_processing = audio_processing; + audio_state_config.audio_processing = AudioProcessing::Create(); + audio_state_config.audio_device_module = fake_audio_device; call_config.audio_state = AudioState::Create(audio_state_config); fake_audio_device->RegisterAudioCallback( call_config.audio_state->audio_transport()); diff --git a/voice_engine/BUILD.gn b/voice_engine/BUILD.gn index e99393172f..a10b792677 100644 --- a/voice_engine/BUILD.gn +++ b/voice_engine/BUILD.gn @@ -23,8 +23,6 @@ rtc_static_library("voice_engine") { "include/voe_errors.h", "shared_data.cc", "shared_data.h", - "transmit_mixer.cc", - "transmit_mixer.h", "transport_feedback_packet_loss_tracker.cc", "transport_feedback_packet_loss_tracker.h", "utility.cc", @@ -36,8 +34,6 @@ rtc_static_library("voice_engine") { ] if (is_win) { - defines = [ "WEBRTC_DRIFT_COMPENSATION_SUPPORTED" ] - cflags = [ # TODO(kjellander): Bug 261: fix this warning. "/wd4373", # Virtual function override. @@ -88,6 +84,10 @@ rtc_static_library("voice_engine") { } rtc_static_library("audio_level") { + visibility += [ + ":voice_engine", + "../audio:audio", + ] sources = [ "audio_level.cc", "audio_level.h", @@ -146,8 +146,6 @@ if (rtc_include_tests) { ] if (is_win) { - defines = [ "WEBRTC_DRIFT_COMPENSATION_SUPPORTED" ] - cflags = [ # TODO(kjellander): Bug 261: fix this warning. "/wd4373", # Virtual function override. diff --git a/voice_engine/channel.cc b/voice_engine/channel.cc index 5bc66ff941..de2d4a7626 100644 --- a/voice_engine/channel.cc +++ b/voice_engine/channel.cc @@ -46,7 +46,6 @@ #include "rtc_base/timeutils.h" #include "system_wrappers/include/field_trial.h" #include "system_wrappers/include/metrics.h" -#include "voice_engine/utility.h" namespace webrtc { namespace voe { @@ -489,9 +488,6 @@ AudioMixer::Source::AudioFrameInfo Channel::GetAudioFrameWithInfo( AudioFrameOperations::Mute(audio_frame); } - // Store speech type for dead-or-alive detection - _outputSpeechType = audio_frame->speech_type_; - { // Pass the audio buffers to an optional sink callback, before applying // scaling/panning, as that applies to the mix operation. @@ -626,7 +622,6 @@ Channel::Channel(int32_t channelId, _includeAudioLevelIndication(false), transport_overhead_per_packet_(0), rtp_overhead_per_packet_(0), - _outputSpeechType(AudioFrame::kNormalSpeech), rtcp_observer_(new VoERtcpObserver(this)), associate_send_channel_(ChannelOwner(nullptr)), pacing_enabled_(config.enable_voice_pacing), @@ -785,11 +780,7 @@ int32_t Channel::StartSend() { return 0; } channel_state_.SetSending(true); - { - // It is now OK to start posting tasks to the encoder task queue. - rtc::CritScope cs(&encoder_queue_lock_); - encoder_queue_is_active_ = true; - } + // Resume the previous sequence number which was reset by StopSend(). This // needs to be done before |sending| is set to true on the RTP/RTCP module. if (send_sequence_number_) { @@ -803,7 +794,11 @@ int32_t Channel::StartSend() { channel_state_.SetSending(false); return -1; } - + { + // It is now OK to start posting tasks to the encoder task queue. + rtc::CritScope cs(&encoder_queue_lock_); + encoder_queue_is_active_ = true; + } return 0; } @@ -870,9 +865,6 @@ bool Channel::SetEncoder(int payload_type, rtp_codec.channels = encoder->NumChannels(); rtp_codec.rate = 0; - cached_encoder_props_.emplace( - EncoderProps{encoder->SampleRateHz(), encoder->NumChannels()}); - if (_rtpRtcpModule->RegisterSendPayload(rtp_codec) != 0) { _rtpRtcpModule->DeRegisterSendPayload(payload_type); if (_rtpRtcpModule->RegisterSendPayload(rtp_codec) != 0) { @@ -891,10 +883,6 @@ void Channel::ModifyEncoder( audio_coding_->ModifyEncoder(modifier); } -rtc::Optional Channel::GetEncoderProps() const { - return cached_encoder_props_; -} - int32_t Channel::GetRecCodec(CodecInst& codec) { return (audio_coding_->ReceiveCodec(&codec)); } @@ -1349,16 +1337,12 @@ int Channel::ResendPackets(const uint16_t* sequence_numbers, int length) { return _rtpRtcpModule->SendNACK(sequence_numbers, length); } -void Channel::ProcessAndEncodeAudio(const AudioFrame& audio_input) { +void Channel::ProcessAndEncodeAudio(std::unique_ptr audio_frame) { // Avoid posting any new tasks if sending was already stopped in StopSend(). rtc::CritScope cs(&encoder_queue_lock_); if (!encoder_queue_is_active_) { return; } - std::unique_ptr audio_frame(new AudioFrame()); - // TODO(henrika): try to avoid copying by moving ownership of audio frame - // either into pool of frames or into the task itself. - audio_frame->CopyFrom(audio_input); // Profile time between when the audio frame is added to the task queue and // when the task is actually executed. audio_frame->UpdateProfileTimeStamp(); @@ -1366,27 +1350,6 @@ void Channel::ProcessAndEncodeAudio(const AudioFrame& audio_input) { new ProcessAndEncodeAudioTask(std::move(audio_frame), this))); } -void Channel::ProcessAndEncodeAudio(const int16_t* audio_data, - int sample_rate, - size_t number_of_frames, - size_t number_of_channels) { - // Avoid posting as new task if sending was already stopped in StopSend(). - rtc::CritScope cs(&encoder_queue_lock_); - if (!encoder_queue_is_active_) { - return; - } - std::unique_ptr audio_frame(new AudioFrame()); - const auto props = GetEncoderProps(); - RTC_CHECK(props); - audio_frame->sample_rate_hz_ = std::min(props->sample_rate_hz, sample_rate); - audio_frame->num_channels_ = - std::min(props->num_channels, number_of_channels); - RemixAndResample(audio_data, number_of_frames, number_of_channels, - sample_rate, &input_resampler_, audio_frame.get()); - encoder_queue_->PostTask(std::unique_ptr( - new ProcessAndEncodeAudioTask(std::move(audio_frame), this))); -} - void Channel::ProcessAndEncodeAudioOnTaskQueue(AudioFrame* audio_input) { RTC_DCHECK_RUN_ON(encoder_queue_); RTC_DCHECK_GT(audio_input->samples_per_channel_, 0); diff --git a/voice_engine/channel.h b/voice_engine/channel.h index 8431b011e7..9018f9f7a2 100644 --- a/voice_engine/channel.h +++ b/voice_engine/channel.h @@ -18,7 +18,6 @@ #include "api/call/audio_sink.h" #include "api/call/transport.h" #include "api/optional.h" -#include "common_audio/resampler/include/push_resampler.h" #include "common_types.h" // NOLINT(build/include) #include "modules/audio_coding/include/audio_coding_module.h" #include "modules/audio_processing/rms_level.h" @@ -181,11 +180,6 @@ class Channel void StopSend(); // Codecs - struct EncoderProps { - int sample_rate_hz; - size_t num_channels; - }; - rtc::Optional GetEncoderProps() const; int32_t GetRecCodec(CodecInst& codec); void SetBitRate(int bitrate_bps, int64_t probing_interval_ms); bool EnableAudioNetworkAdaptor(const std::string& config_string); @@ -283,26 +277,16 @@ class Channel RtpRtcp* RtpRtcpModulePtr() const { return _rtpRtcpModule.get(); } int8_t OutputEnergyLevel() const { return _outputAudioLevel.Level(); } - // ProcessAndEncodeAudio() creates an audio frame copy and posts a task - // on the shared encoder task queue, wich in turn calls (on the queue) - // ProcessAndEncodeAudioOnTaskQueue() where the actual processing of the - // audio takes place. The processing mainly consists of encoding and preparing - // the result for sending by adding it to a send queue. + // ProcessAndEncodeAudio() posts a task on the shared encoder task queue, + // which in turn calls (on the queue) ProcessAndEncodeAudioOnTaskQueue() where + // the actual processing of the audio takes place. The processing mainly + // consists of encoding and preparing the result for sending by adding it to a + // send queue. // The main reason for using a task queue here is to release the native, // OS-specific, audio capture thread as soon as possible to ensure that it // can go back to sleep and be prepared to deliver an new captured audio // packet. - void ProcessAndEncodeAudio(const AudioFrame& audio_input); - - // This version of ProcessAndEncodeAudio() is used by PushCaptureData() in - // VoEBase and the audio in |audio_data| has not been subject to any APM - // processing. Some extra steps are therfore needed when building up the - // audio frame copy before using the same task as in the default call to - // ProcessAndEncodeAudio(const AudioFrame& audio_input). - void ProcessAndEncodeAudio(const int16_t* audio_data, - int sample_rate, - size_t number_of_frames, - size_t number_of_channels); + void ProcessAndEncodeAudio(std::unique_ptr audio_frame); // Associate to a send channel. // Used for obtaining RTT for a receive-only channel. @@ -382,8 +366,6 @@ class Channel std::unique_ptr audio_coding_; std::unique_ptr audio_sink_; AudioLevel _outputAudioLevel; - // Downsamples to the codec rate if necessary. - PushResampler input_resampler_; uint32_t _timeStamp RTC_ACCESS_ON(encoder_queue_); RemoteNtpTimeEstimator ntp_estimator_ RTC_GUARDED_BY(ts_stats_lock_); @@ -421,8 +403,6 @@ class Channel RTC_GUARDED_BY(overhead_per_packet_lock_); size_t rtp_overhead_per_packet_ RTC_GUARDED_BY(overhead_per_packet_lock_); rtc::CriticalSection overhead_per_packet_lock_; - // VoENetwork - AudioFrame::SpeechType _outputSpeechType; // RtcpBandwidthObserver std::unique_ptr rtcp_observer_; // An associated send channel. @@ -439,8 +419,6 @@ class Channel // TODO(ossu): Remove once GetAudioDecoderFactory() is no longer needed. rtc::scoped_refptr decoder_factory_; - rtc::Optional cached_encoder_props_; - rtc::ThreadChecker construction_thread_; const bool use_twcc_plr_for_ana_; diff --git a/voice_engine/channel_proxy.cc b/voice_engine/channel_proxy.cc index 8d56db0d4f..505113dbbb 100644 --- a/voice_engine/channel_proxy.cc +++ b/voice_engine/channel_proxy.cc @@ -256,6 +256,12 @@ int ChannelProxy::PreferredSampleRate() const { return channel()->PreferredSampleRate(); } +void ChannelProxy::ProcessAndEncodeAudio( + std::unique_ptr audio_frame) { + RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_); + return channel()->ProcessAndEncodeAudio(std::move(audio_frame)); +} + void ChannelProxy::SetTransportOverhead(int transport_overhead_per_packet) { RTC_DCHECK(worker_thread_checker_.CalledOnValidThread()); channel()->SetTransportOverhead(transport_overhead_per_packet); diff --git a/voice_engine/channel_proxy.h b/voice_engine/channel_proxy.h index 8c913fa373..1dec23a3eb 100644 --- a/voice_engine/channel_proxy.h +++ b/voice_engine/channel_proxy.h @@ -107,6 +107,7 @@ class ChannelProxy : public RtpPacketSinkInterface { int sample_rate_hz, AudioFrame* audio_frame); virtual int PreferredSampleRate() const; + virtual void ProcessAndEncodeAudio(std::unique_ptr audio_frame); virtual void SetTransportOverhead(int transport_overhead_per_packet); virtual void AssociateSendChannel(const ChannelProxy& send_channel_proxy); virtual void DisassociateSendChannel(); diff --git a/voice_engine/include/voe_base.h b/voice_engine/include/voe_base.h index a995325f42..238de1df95 100644 --- a/voice_engine/include/voe_base.h +++ b/voice_engine/include/voe_base.h @@ -43,10 +43,6 @@ namespace webrtc { class AudioDeviceModule; class AudioProcessing; -class AudioTransport; -namespace voe { -class TransmitMixer; -} // namespace voe // VoiceEngine class WEBRTC_DLLEXPORT VoiceEngine { @@ -90,17 +86,13 @@ class WEBRTC_DLLEXPORT VoEBase { // modules: // - The Audio Device Module (ADM) which implements all the audio layer // functionality in a separate (reference counted) module. - // - The AudioProcessing module handles capture-side processing. + // - The AudioProcessing module is unused - only kept for API compatibility. // - An AudioDecoderFactory - used to create audio decoders. virtual int Init( AudioDeviceModule* audio_device, AudioProcessing* audio_processing, const rtc::scoped_refptr& decoder_factory) = 0; - // This method is WIP - DO NOT USE! - // Returns NULL before Init() is called. - virtual voe::TransmitMixer* transmit_mixer() = 0; - // Terminates all VoiceEngine functions and releases allocated resources. virtual void Terminate() = 0; @@ -147,10 +139,6 @@ class WEBRTC_DLLEXPORT VoEBase { // By default, recording is enabled. virtual int SetRecording(bool enabled) = 0; - // TODO(xians): Make the interface pure virtual after libjingle - // implements the interface in its FakeWebRtcVoiceEngine. - virtual AudioTransport* audio_transport() { return NULL; } - protected: VoEBase() {} virtual ~VoEBase() {} diff --git a/voice_engine/shared_data.cc b/voice_engine/shared_data.cc index 01e163b394..b60633d8cc 100644 --- a/voice_engine/shared_data.cc +++ b/voice_engine/shared_data.cc @@ -10,9 +10,7 @@ #include "voice_engine/shared_data.h" -#include "modules/audio_processing/include/audio_processing.h" #include "voice_engine/channel.h" -#include "voice_engine/transmit_mixer.h" namespace webrtc { @@ -26,14 +24,10 @@ SharedData::SharedData() _audioDevicePtr(NULL), _moduleProcessThreadPtr(ProcessThread::Create("VoiceProcessThread")), encoder_queue_("AudioEncoderQueue") { - if (TransmitMixer::Create(_transmitMixerPtr) == 0) { - _transmitMixerPtr->SetEngineInformation(&_channelManager); - } } SharedData::~SharedData() { - TransmitMixer::Destroy(_transmitMixerPtr); if (_audioDevicePtr) { _audioDevicePtr->Release(); } @@ -50,10 +44,6 @@ void SharedData::set_audio_device( _audioDevicePtr = audio_device; } -void SharedData::set_audio_processing(AudioProcessing* audioproc) { - _transmitMixerPtr->SetAudioProcessingModule(audioproc); -} - int SharedData::NumOfSendingChannels() { ChannelManager::Iterator it(&_channelManager); int sending_channels = 0; diff --git a/voice_engine/shared_data.h b/voice_engine/shared_data.h index 7978ff7497..1a7985a4a4 100644 --- a/voice_engine/shared_data.h +++ b/voice_engine/shared_data.h @@ -14,7 +14,6 @@ #include #include "modules/audio_device/include/audio_device.h" -#include "modules/audio_processing/include/audio_processing.h" #include "modules/utility/include/process_thread.h" #include "rtc_base/criticalsection.h" #include "rtc_base/scoped_ref_ptr.h" @@ -28,8 +27,6 @@ class ProcessThread; namespace webrtc { namespace voe { -class TransmitMixer; - class SharedData { public: @@ -39,8 +36,6 @@ public: AudioDeviceModule* audio_device() { return _audioDevicePtr.get(); } void set_audio_device( const rtc::scoped_refptr& audio_device); - void set_audio_processing(AudioProcessing* audio_processing); - TransmitMixer* transmit_mixer() { return _transmitMixerPtr; } rtc::CriticalSection* crit_sec() { return &_apiCritPtr; } ProcessThread* process_thread() { return _moduleProcessThreadPtr.get(); } rtc::TaskQueue* encoder_queue(); @@ -54,7 +49,6 @@ protected: rtc::CriticalSection _apiCritPtr; ChannelManager _channelManager; rtc::scoped_refptr _audioDevicePtr; - TransmitMixer* _transmitMixerPtr; std::unique_ptr _moduleProcessThreadPtr; // |encoder_queue| is defined last to ensure all pending tasks are cancelled // and deleted before any other members. diff --git a/voice_engine/transmit_mixer.cc b/voice_engine/transmit_mixer.cc deleted file mode 100644 index 049a64c2e8..0000000000 --- a/voice_engine/transmit_mixer.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "voice_engine/transmit_mixer.h" - -#include - -#include "audio/utility/audio_frame_operations.h" -#include "rtc_base/format_macros.h" -#include "rtc_base/location.h" -#include "rtc_base/logging.h" -#include "system_wrappers/include/event_wrapper.h" -#include "voice_engine/channel.h" -#include "voice_engine/channel_manager.h" -#include "voice_engine/utility.h" - -namespace webrtc { -namespace voe { - -// TODO(solenberg): The thread safety in this class is dubious. - -int32_t -TransmitMixer::Create(TransmitMixer*& mixer) -{ - mixer = new TransmitMixer(); - if (mixer == NULL) - { - RTC_DLOG(LS_ERROR) << - "TransmitMixer::Create() unable to allocate memory for mixer"; - return -1; - } - return 0; -} - -void -TransmitMixer::Destroy(TransmitMixer*& mixer) -{ - if (mixer) - { - delete mixer; - mixer = NULL; - } -} - -TransmitMixer::~TransmitMixer() = default; - -void TransmitMixer::SetEngineInformation(ChannelManager* channelManager) { - _channelManagerPtr = channelManager; -} - -int32_t -TransmitMixer::SetAudioProcessingModule(AudioProcessing* audioProcessingModule) -{ - audioproc_ = audioProcessingModule; - return 0; -} - -void TransmitMixer::GetSendCodecInfo(int* max_sample_rate, - size_t* max_channels) { - *max_sample_rate = 8000; - *max_channels = 1; - for (ChannelManager::Iterator it(_channelManagerPtr); it.IsValid(); - it.Increment()) { - Channel* channel = it.GetChannel(); - if (channel->Sending()) { - const auto props = channel->GetEncoderProps(); - RTC_CHECK(props); - *max_sample_rate = std::max(*max_sample_rate, props->sample_rate_hz); - *max_channels = std::max(*max_channels, props->num_channels); - } - } -} - -int32_t -TransmitMixer::PrepareDemux(const void* audioSamples, - size_t nSamples, - size_t nChannels, - uint32_t samplesPerSec, - uint16_t totalDelayMS, - int32_t clockDrift, - uint16_t currentMicLevel, - bool keyPressed) -{ - // --- Resample input audio and create/store the initial audio frame - GenerateAudioFrame(static_cast(audioSamples), - nSamples, - nChannels, - samplesPerSec); - - // --- Near-end audio processing. - ProcessAudio(totalDelayMS, clockDrift, currentMicLevel, keyPressed); - - if (swap_stereo_channels_ && stereo_codec_) - // Only bother swapping if we're using a stereo codec. - AudioFrameOperations::SwapStereoChannels(&_audioFrame); - - // --- Annoying typing detection (utilizes the APM/VAD decision) -#if WEBRTC_VOICE_ENGINE_TYPING_DETECTION - TypingDetection(keyPressed); -#endif - - // --- Measure audio level of speech after all processing. - double sample_duration = static_cast(nSamples) / samplesPerSec; - _audioLevel.ComputeLevel(_audioFrame, sample_duration); - - return 0; -} - -void TransmitMixer::ProcessAndEncodeAudio() { - RTC_DCHECK_GT(_audioFrame.samples_per_channel_, 0); - for (ChannelManager::Iterator it(_channelManagerPtr); it.IsValid(); - it.Increment()) { - Channel* const channel = it.GetChannel(); - if (channel->Sending()) { - channel->ProcessAndEncodeAudio(_audioFrame); - } - } -} - -uint32_t TransmitMixer::CaptureLevel() const -{ - return _captureLevel; -} - -int32_t -TransmitMixer::StopSend() -{ - _audioLevel.Clear(); - return 0; -} - -int8_t TransmitMixer::AudioLevel() const -{ - // Speech + file level [0,9] - return _audioLevel.Level(); -} - -int16_t TransmitMixer::AudioLevelFullRange() const -{ - // Speech + file level [0,32767] - return _audioLevel.LevelFullRange(); -} - -double TransmitMixer::GetTotalInputEnergy() const { - return _audioLevel.TotalEnergy(); -} - -double TransmitMixer::GetTotalInputDuration() const { - return _audioLevel.TotalDuration(); -} - -void TransmitMixer::GenerateAudioFrame(const int16_t* audio, - size_t samples_per_channel, - size_t num_channels, - int sample_rate_hz) { - int codec_rate; - size_t num_codec_channels; - GetSendCodecInfo(&codec_rate, &num_codec_channels); - stereo_codec_ = num_codec_channels == 2; - - // We want to process at the lowest rate possible without losing information. - // Choose the lowest native rate at least equal to the input and codec rates. - const int min_processing_rate = std::min(sample_rate_hz, codec_rate); - for (size_t i = 0; i < AudioProcessing::kNumNativeSampleRates; ++i) { - _audioFrame.sample_rate_hz_ = AudioProcessing::kNativeSampleRatesHz[i]; - if (_audioFrame.sample_rate_hz_ >= min_processing_rate) { - break; - } - } - _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels); - RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz, - &resampler_, &_audioFrame); -} - -void TransmitMixer::ProcessAudio(int delay_ms, int clock_drift, - int current_mic_level, bool key_pressed) { - if (audioproc_->set_stream_delay_ms(delay_ms) != 0) { - // Silently ignore this failure to avoid flooding the logs. - } - - GainControl* agc = audioproc_->gain_control(); - if (agc->set_stream_analog_level(current_mic_level) != 0) { - RTC_DLOG(LS_ERROR) << "set_stream_analog_level failed: current_mic_level = " - << current_mic_level; - assert(false); - } - - EchoCancellation* aec = audioproc_->echo_cancellation(); - if (aec->is_drift_compensation_enabled()) { - aec->set_stream_drift_samples(clock_drift); - } - - audioproc_->set_stream_key_pressed(key_pressed); - - int err = audioproc_->ProcessStream(&_audioFrame); - if (err != 0) { - RTC_DLOG(LS_ERROR) << "ProcessStream() error: " << err; - assert(false); - } - - // Store new capture level. Only updated when analog AGC is enabled. - _captureLevel = agc->stream_analog_level(); -} - -#if WEBRTC_VOICE_ENGINE_TYPING_DETECTION -void TransmitMixer::TypingDetection(bool key_pressed) -{ - // We let the VAD determine if we're using this feature or not. - if (_audioFrame.vad_activity_ == AudioFrame::kVadUnknown) { - return; - } - - bool vad_active = _audioFrame.vad_activity_ == AudioFrame::kVadActive; - bool typing_detected = typing_detection_.Process(key_pressed, vad_active); - - rtc::CritScope cs(&lock_); - typing_noise_detected_ = typing_detected; -} -#endif - -void TransmitMixer::EnableStereoChannelSwapping(bool enable) { - swap_stereo_channels_ = enable; -} - -bool TransmitMixer::IsStereoChannelSwappingEnabled() { - return swap_stereo_channels_; -} - -bool TransmitMixer::typing_noise_detected() const { - rtc::CritScope cs(&lock_); - return typing_noise_detected_; -} - -} // namespace voe -} // namespace webrtc diff --git a/voice_engine/transmit_mixer.h b/voice_engine/transmit_mixer.h deleted file mode 100644 index 42b6212bfe..0000000000 --- a/voice_engine/transmit_mixer.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VOICE_ENGINE_TRANSMIT_MIXER_H_ -#define VOICE_ENGINE_TRANSMIT_MIXER_H_ - -#include - -#include "common_audio/resampler/include/push_resampler.h" -#include "common_types.h" // NOLINT(build/include) -#include "modules/audio_processing/typing_detection.h" -#include "modules/include/module_common_types.h" -#include "rtc_base/criticalsection.h" -#include "voice_engine/audio_level.h" -#include "voice_engine/include/voe_base.h" - -#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS) -#define WEBRTC_VOICE_ENGINE_TYPING_DETECTION 1 -#else -#define WEBRTC_VOICE_ENGINE_TYPING_DETECTION 0 -#endif - -namespace webrtc { -class AudioProcessing; -class ProcessThread; - -namespace voe { - -class ChannelManager; -class MixedAudio; - -class TransmitMixer { -public: - static int32_t Create(TransmitMixer*& mixer); - - static void Destroy(TransmitMixer*& mixer); - - void SetEngineInformation(ChannelManager* channelManager); - - int32_t SetAudioProcessingModule(AudioProcessing* audioProcessingModule); - - int32_t PrepareDemux(const void* audioSamples, - size_t nSamples, - size_t nChannels, - uint32_t samplesPerSec, - uint16_t totalDelayMS, - int32_t clockDrift, - uint16_t currentMicLevel, - bool keyPressed); - - void ProcessAndEncodeAudio(); - - // Must be called on the same thread as PrepareDemux(). - uint32_t CaptureLevel() const; - - int32_t StopSend(); - - // TODO(solenberg): Remove, once AudioMonitor is gone. - int8_t AudioLevel() const; - - // 'virtual' to allow mocking. - virtual int16_t AudioLevelFullRange() const; - - // See description of "totalAudioEnergy" in the WebRTC stats spec: - // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy - // 'virtual' to allow mocking. - virtual double GetTotalInputEnergy() const; - - // 'virtual' to allow mocking. - virtual double GetTotalInputDuration() const; - - virtual ~TransmitMixer(); - - // Virtual to allow mocking. - virtual void EnableStereoChannelSwapping(bool enable); - bool IsStereoChannelSwappingEnabled(); - - // Virtual to allow mocking. - virtual bool typing_noise_detected() const; - -protected: - TransmitMixer() = default; - -private: - // Gets the maximum sample rate and number of channels over all currently - // sending codecs. - void GetSendCodecInfo(int* max_sample_rate, size_t* max_channels); - - void GenerateAudioFrame(const int16_t audioSamples[], - size_t nSamples, - size_t nChannels, - int samplesPerSec); - - void ProcessAudio(int delay_ms, int clock_drift, int current_mic_level, - bool key_pressed); - -#if WEBRTC_VOICE_ENGINE_TYPING_DETECTION - void TypingDetection(bool key_pressed); -#endif - - // uses - ChannelManager* _channelManagerPtr = nullptr; - AudioProcessing* audioproc_ = nullptr; - - // owns - AudioFrame _audioFrame; - PushResampler resampler_; // ADM sample rate -> mixing rate - voe::AudioLevel _audioLevel; - -#if WEBRTC_VOICE_ENGINE_TYPING_DETECTION - webrtc::TypingDetection typing_detection_; -#endif - - rtc::CriticalSection lock_; - bool typing_noise_detected_ RTC_GUARDED_BY(lock_) = false; - - uint32_t _captureLevel = 0; - bool stereo_codec_ = false; - bool swap_stereo_channels_ = false; -}; -} // namespace voe -} // namespace webrtc - -#endif // VOICE_ENGINE_TRANSMIT_MIXER_H_ diff --git a/voice_engine/voe_base_impl.cc b/voice_engine/voe_base_impl.cc index 5d49872620..094288df74 100644 --- a/voice_engine/voe_base_impl.cc +++ b/voice_engine/voe_base_impl.cc @@ -13,13 +13,11 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_coding/include/audio_coding_module.h" #include "modules/audio_device/audio_device_impl.h" -#include "modules/audio_processing/include/audio_processing.h" #include "rtc_base/format_macros.h" #include "rtc_base/location.h" #include "rtc_base/logging.h" #include "voice_engine/channel.h" #include "voice_engine/include/voe_errors.h" -#include "voice_engine/transmit_mixer.h" #include "voice_engine/voice_engine_impl.h" namespace webrtc { @@ -40,122 +38,17 @@ VoEBaseImpl::~VoEBaseImpl() { TerminateInternal(); } -int32_t VoEBaseImpl::RecordedDataIsAvailable( - const void* audio_data, - const size_t number_of_frames, - const size_t bytes_per_sample, - const size_t number_of_channels, - const uint32_t sample_rate, - const uint32_t audio_delay_milliseconds, - const int32_t clock_drift, - const uint32_t volume, - const bool key_pressed, - uint32_t& new_mic_volume) { - RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); - RTC_DCHECK(shared_->transmit_mixer() != nullptr); - RTC_DCHECK(shared_->audio_device() != nullptr); - - constexpr uint32_t kMaxVolumeLevel = 255; - - uint32_t max_volume = 0; - uint16_t voe_mic_level = 0; - // Check for zero to skip this calculation; the consumer may use this to - // indicate no volume is available. - if (volume != 0) { - // Scale from ADM to VoE level range - if (shared_->audio_device()->MaxMicrophoneVolume(&max_volume) == 0) { - if (max_volume) { - voe_mic_level = static_cast( - (volume * kMaxVolumeLevel + static_cast(max_volume / 2)) / - max_volume); - } - } - // We learned that on certain systems (e.g Linux) the voe_mic_level - // can be greater than the maxVolumeLevel therefore - // we are going to cap the voe_mic_level to the maxVolumeLevel - // and change the maxVolume to volume if it turns out that - // the voe_mic_level is indeed greater than the maxVolumeLevel. - if (voe_mic_level > kMaxVolumeLevel) { - voe_mic_level = kMaxVolumeLevel; - max_volume = volume; - } - } - - // Perform channel-independent operations - // (APM, mix with file, record to file, mute, etc.) - shared_->transmit_mixer()->PrepareDemux( - audio_data, number_of_frames, number_of_channels, sample_rate, - static_cast(audio_delay_milliseconds), clock_drift, - voe_mic_level, key_pressed); - - // Copy the audio frame to each sending channel and perform - // channel-dependent operations (file mixing, mute, etc.), encode and - // packetize+transmit the RTP packet. - shared_->transmit_mixer()->ProcessAndEncodeAudio(); - - // Scale from VoE to ADM level range. - uint32_t new_voe_mic_level = shared_->transmit_mixer()->CaptureLevel(); - if (new_voe_mic_level != voe_mic_level) { - // Return the new volume if AGC has changed the volume. - return static_cast((new_voe_mic_level * max_volume + - static_cast(kMaxVolumeLevel / 2)) / - kMaxVolumeLevel); - } - - return 0; -} - -int32_t VoEBaseImpl::NeedMorePlayData(const size_t nSamples, - const size_t nBytesPerSample, - const size_t nChannels, - const uint32_t samplesPerSec, - void* audioSamples, - size_t& nSamplesOut, - int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) { - RTC_NOTREACHED(); - return 0; -} - -void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data, - int bits_per_sample, int sample_rate, - size_t number_of_channels, - size_t number_of_frames) { - voe::ChannelOwner ch = shared_->channel_manager().GetChannel(voe_channel); - voe::Channel* channel = ch.channel(); - if (!channel) - return; - if (channel->Sending()) { - // Send the audio to each channel directly without using the APM in the - // transmit mixer. - channel->ProcessAndEncodeAudio(static_cast(audio_data), - sample_rate, number_of_frames, - number_of_channels); - } -} - -void VoEBaseImpl::PullRenderData(int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames, - void* audio_data, int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) { - RTC_NOTREACHED(); -} - int VoEBaseImpl::Init( AudioDeviceModule* audio_device, AudioProcessing* audio_processing, const rtc::scoped_refptr& decoder_factory) { RTC_DCHECK(audio_device); - RTC_DCHECK(audio_processing); rtc::CritScope cs(shared_->crit_sec()); if (shared_->process_thread()) { shared_->process_thread()->Start(); } shared_->set_audio_device(audio_device); - shared_->set_audio_processing(audio_processing); RTC_DCHECK(decoder_factory); decoder_factory_ = decoder_factory; @@ -338,7 +231,6 @@ int32_t VoEBaseImpl::StopSend() { RTC_LOG(LS_ERROR) << "StopSend() failed to stop recording"; return -1; } - shared_->transmit_mixer()->StopSend(); } return 0; @@ -405,6 +297,5 @@ void VoEBaseImpl::TerminateInternal() { } shared_->set_audio_device(nullptr); - shared_->set_audio_processing(nullptr); } } // namespace webrtc diff --git a/voice_engine/voe_base_impl.h b/voice_engine/voe_base_impl.h index b96be9e343..2649b8c09b 100644 --- a/voice_engine/voe_base_impl.h +++ b/voice_engine/voe_base_impl.h @@ -21,16 +21,12 @@ namespace webrtc { class ProcessThread; -class VoEBaseImpl : public VoEBase, - public AudioTransport { +class VoEBaseImpl : public VoEBase { public: int Init( AudioDeviceModule* audio_device, AudioProcessing* audio_processing, const rtc::scoped_refptr& decoder_factory) override; - voe::TransmitMixer* transmit_mixer() override { - return shared_->transmit_mixer(); - } void Terminate() override; int CreateChannel() override; @@ -45,41 +41,6 @@ class VoEBaseImpl : public VoEBase, int SetPlayout(bool enabled) override; int SetRecording(bool enabled) override; - AudioTransport* audio_transport() override { return this; } - - // AudioTransport - int32_t RecordedDataIsAvailable(const void* audio_data, - const size_t number_of_frames, - const size_t bytes_per_sample, - const size_t number_of_channels, - const uint32_t sample_rate, - const uint32_t audio_delay_milliseconds, - const int32_t clock_drift, - const uint32_t volume, - const bool key_pressed, - uint32_t& new_mic_volume) override; - RTC_DEPRECATED int32_t NeedMorePlayData(const size_t nSamples, - const size_t nBytesPerSample, - const size_t nChannels, - const uint32_t samplesPerSec, - void* audioSamples, - size_t& nSamplesOut, - int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) override; - void PushCaptureData(int voe_channel, - const void* audio_data, - int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames) override; - RTC_DEPRECATED void PullRenderData(int bits_per_sample, - int sample_rate, - size_t number_of_channels, - size_t number_of_frames, - void* audio_data, - int64_t* elapsed_time_ms, - int64_t* ntp_time_ms) override; - protected: VoEBaseImpl(voe::SharedData* shared); ~VoEBaseImpl() override; diff --git a/voice_engine/voe_base_unittest.cc b/voice_engine/voe_base_unittest.cc index 56c3d13a00..2642a100d6 100644 --- a/voice_engine/voe_base_unittest.cc +++ b/voice_engine/voe_base_unittest.cc @@ -12,8 +12,6 @@ #include "api/audio_codecs/builtin_audio_decoder_factory.h" #include "modules/audio_device/include/fake_audio_device.h" -#include "modules/audio_processing/include/mock_audio_processing.h" -#include "rtc_base/refcountedobject.h" #include "test/gtest.h" namespace webrtc { @@ -24,7 +22,6 @@ class VoEBaseTest : public ::testing::Test { : voe_(VoiceEngine::Create()), base_(VoEBase::GetInterface(voe_)) { EXPECT_NE(nullptr, base_); - apm_ = new rtc::RefCountedObject(); } ~VoEBaseTest() { @@ -36,17 +33,16 @@ class VoEBaseTest : public ::testing::Test { VoiceEngine* voe_; VoEBase* base_; FakeAudioDeviceModule adm_; - rtc::scoped_refptr apm_; }; TEST_F(VoEBaseTest, InitWithExternalAudioDevice) { EXPECT_EQ(0, - base_->Init(&adm_, apm_.get(), CreateBuiltinAudioDecoderFactory())); + base_->Init(&adm_, nullptr, CreateBuiltinAudioDecoderFactory())); } TEST_F(VoEBaseTest, CreateChannelAfterInit) { EXPECT_EQ(0, - base_->Init(&adm_, apm_.get(), CreateBuiltinAudioDecoderFactory())); + base_->Init(&adm_, nullptr, CreateBuiltinAudioDecoderFactory())); int channelID = base_->CreateChannel(); EXPECT_NE(channelID, -1); EXPECT_EQ(0, base_->DeleteChannel(channelID));