From 2f84afad30b088ddebb4063bc47ac9a79d735a2b Mon Sep 17 00:00:00 2001 From: "xians@webrtc.org" Date: Wed, 31 Jul 2013 16:23:37 +0000 Subject: [PATCH] Merge r4326 from stable to trunk. r4326 was mistakenly committed to stable, so this is to re-merge back to trunk. Add new interface to support multiple sources in webrtc. CaptureData() will be called by chrome with a flag |need_audio_processing| to indicate if the data needs to be processed by APM or not. Different from the old interface that will send the data to all voe channels, the new interface will specify a list of voe channels that the data is demultiplexing to. R=tommi@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1919004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4449 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../include/audio_device_defines.h | 26 ++++++++ .../test/audio_device_test_api.cc | 13 ++++ .../audio_device/test/func_test_manager.cc | 13 ++++ .../audio_device/test/func_test_manager.h | 11 ++++ webrtc/voice_engine/channel.cc | 57 +++++++++++++++++ webrtc/voice_engine/channel.h | 12 +++- webrtc/voice_engine/transmit_mixer.cc | 27 ++++++++ webrtc/voice_engine/transmit_mixer.h | 6 ++ webrtc/voice_engine/voe_base_impl.cc | 63 +++++++++++++++++++ webrtc/voice_engine/voe_base_impl.h | 11 ++++ 10 files changed, 238 insertions(+), 1 deletion(-) diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h index ce5fd7b3de..0dfee19745 100644 --- a/webrtc/modules/audio_device/include/audio_device_defines.h +++ b/webrtc/modules/audio_device/include/audio_device_defines.h @@ -72,6 +72,32 @@ public: void* audioSamples, uint32_t& nSamplesOut) = 0; + // Method to pass captured data directly and unmixed to network channels. + // |channel_ids| contains a list of VoE channels which are the + // sinks to the capture data. |audio_delay_milliseconds| is the sum of + // recording delay and playout delay of the hardware. |current_volume| is + // in the range of [0, 255], representing the current microphone analog + // volume. |key_pressed| is used by the typing detection. + // |need_audio_processing| specify if the data needs to be processed by APM. + // Currently WebRtc supports only one APM, and Chrome will make sure only + // one stream goes through APM. When |need_audio_processing| is false, the + // values of |audio_delay_milliseconds|, |current_volume| and |key_pressed| + // will be ignored. + // The return value is the new microphone volume, in the range of |0, 255]. + // When the volume does not need to be updated, it returns 0. + // TODO(xians): Make the interface pure virtual after libjingle has its + // implementation. + virtual int OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing) { return 0; } + protected: virtual ~AudioTransport() {} }; diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc index e6c7261149..62a2db6983 100644 --- a/webrtc/modules/audio_device/test/audio_device_test_api.cc +++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc @@ -129,6 +129,19 @@ class AudioTransportAPI: public AudioTransport { return 0; } + virtual int OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing) { + return 0; + } + private: uint32_t rec_count_; uint32_t play_count_; diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc index eee85cbde3..1607742e85 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.cc +++ b/webrtc/modules/audio_device/test/func_test_manager.cc @@ -557,6 +557,19 @@ int32_t AudioTransportImpl::NeedMorePlayData( return 0; } +int AudioTransportImpl::OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing) { + return 0; +} + FuncTestManager::FuncTestManager() : _processThread(NULL), _audioDevice(NULL), diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h index 23d26c6723..314a3a7c5a 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.h +++ b/webrtc/modules/audio_device/test/func_test_manager.h @@ -111,6 +111,17 @@ public: void* audioSamples, uint32_t& nSamplesOut); + virtual int OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing); + AudioTransportImpl(AudioDeviceModule* audioDevice); ~AudioTransportImpl(); diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index ba84ba9d73..d8ee96a1f2 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -4418,6 +4418,63 @@ Channel::Demultiplex(const AudioFrame& audioFrame) return 0; } +// TODO(xians): This method borrows quite some code from +// TransmitMixer::GenerateAudioFrame(), refactor these two methods and reduce +// code duplication. +void Channel::Demultiplex(const int16_t* audio_data, + int number_of_frames, + int number_of_channels, + int sample_rate) { + // The highest sample rate that WebRTC supports for mono audio is 96kHz. + static const int kMaxNumberOfFrames = 960; + assert(number_of_frames <= kMaxNumberOfFrames); + + // Get the send codec information for doing resampling or downmixing later on. + CodecInst codec; + GetSendCodec(codec); + assert(codec.channels == 1 || codec.channels == 2); + int support_sample_rate = std::min(32000, + std::min(sample_rate, codec.plfreq)); + + // Downmix the data to mono if needed. + const int16_t* audio_ptr = audio_data; + if (number_of_channels == 2 && codec.channels == 1) { + if (!mono_recording_audio_.get()) + mono_recording_audio_.reset(new int16_t[kMaxNumberOfFrames]); + + AudioFrameOperations::StereoToMono(audio_data, number_of_frames, + mono_recording_audio_.get()); + audio_ptr = mono_recording_audio_.get(); + } + + // Resample the data to the sample rate that the codec is using. + if (input_resampler_.InitializeIfNeeded(sample_rate, + support_sample_rate, + codec.channels)) { + WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1), + "Channel::Demultiplex() unable to resample"); + return; + } + + int out_length = input_resampler_.Resample(audio_ptr, + number_of_frames * codec.channels, + _audioFrame.data_, + AudioFrame::kMaxDataSizeSamples); + if (out_length == -1) { + WEBRTC_TRACE(kTraceError, kTraceVoice, VoEId(_instanceId, -1), + "Channel::Demultiplex() resampling failed"); + return; + } + + _audioFrame.samples_per_channel_ = out_length / codec.channels; + _audioFrame.timestamp_ = -1; + _audioFrame.sample_rate_hz_ = support_sample_rate; + _audioFrame.speech_type_ = AudioFrame::kNormalSpeech; + _audioFrame.vad_activity_ = AudioFrame::kVadUnknown; + _audioFrame.num_channels_ = codec.channels; + _audioFrame.id_ = _channelId; +} + uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) { diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index a1e16feba7..4896f73190 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -11,7 +11,7 @@ #ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H #define WEBRTC_VOICE_ENGINE_CHANNEL_H -#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/common_audio/resampler/include/push_resampler.h" #include "webrtc/common_types.h" #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" #include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h" @@ -422,6 +422,13 @@ public: return _outputAudioLevel.Level(); } uint32_t Demultiplex(const AudioFrame& audioFrame); + // Demultiplex the data to the channel's |_audioFrame|. The difference + // between this method and the overloaded method above is that |audio_data| + // does not go through transmit_mixer and APM. + void Demultiplex(const int16_t* audio_data, + int number_of_frames, + int number_of_channels, + int sample_rate); uint32_t PrepareEncodeAndSend(int mixingFrequency); uint32_t EncodeAndSend(); @@ -454,6 +461,9 @@ private: AudioLevel _outputAudioLevel; bool _externalTransport; AudioFrame _audioFrame; + scoped_array mono_recording_audio_; + // Resampler is used when input data is stereo while codec is mono. + PushResampler input_resampler_; uint8_t _audioLevel_dBov; FilePlayer* _inputFilePlayerPtr; FilePlayer* _outputFilePlayerPtr; diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc index 9c7e03ea56..79d145419f 100644 --- a/webrtc/voice_engine/transmit_mixer.cc +++ b/webrtc/voice_engine/transmit_mixer.cc @@ -443,6 +443,23 @@ TransmitMixer::DemuxAndMix() return 0; } +void TransmitMixer::DemuxAndMix(int voe_channels[], + int number_of_voe_channels) { + for (int i = 0; i < number_of_voe_channels; ++i) { + voe::ScopedChannel sc(*_channelManagerPtr, voe_channels[i]); + voe::Channel* channel_ptr = sc.ChannelPtr(); + if (channel_ptr) { + if (channel_ptr->InputIsOnHold()) { + channel_ptr->UpdateLocalTimeStamp(); + } else if (channel_ptr->Sending()) { + // Demultiplex makes a copy of its input. + channel_ptr->Demultiplex(_audioFrame); + channel_ptr->PrepareEncodeAndSend(_audioFrame.sample_rate_hz_); + } + } + } +} + int32_t TransmitMixer::EncodeAndSend() { @@ -463,6 +480,16 @@ TransmitMixer::EncodeAndSend() return 0; } +void TransmitMixer::EncodeAndSend(int voe_channels[], + int number_of_voe_channels) { + for (int i = 0; i < number_of_voe_channels; ++i) { + voe::ScopedChannel sc(*_channelManagerPtr, voe_channels[i]); + voe::Channel* channel_ptr = sc.ChannelPtr(); + if (channel_ptr && channel_ptr->Sending() && !channel_ptr->InputIsOnHold()) + channel_ptr->EncodeAndSend(); + } +} + uint32_t TransmitMixer::CaptureLevel() const { CriticalSectionScoped cs(&_critSect); diff --git a/webrtc/voice_engine/transmit_mixer.h b/webrtc/voice_engine/transmit_mixer.h index 1b575af415..c68293fe12 100644 --- a/webrtc/voice_engine/transmit_mixer.h +++ b/webrtc/voice_engine/transmit_mixer.h @@ -61,8 +61,14 @@ public: int32_t DemuxAndMix(); + // Used by the Chrome to pass the recording data to the specific VoE + // channels for demux. + void DemuxAndMix(int voe_channels[], int number_of_voe_channels); int32_t EncodeAndSend(); + // Used by the Chrome to pass the recording data to the specific VoE + // channels for encoding and sending to the network. + void EncodeAndSend(int voe_channels[], int number_of_voe_channels); uint32_t CaptureLevel() const; diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc index f9934573d0..c703efdaaa 100644 --- a/webrtc/voice_engine/voe_base_impl.cc +++ b/webrtc/voice_engine/voe_base_impl.cc @@ -274,6 +274,69 @@ int32_t VoEBaseImpl::NeedMorePlayData( return 0; } +int VoEBaseImpl::OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing) { + WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1), + "VoEBaseImpl::OnDataAvailable(number_of_voe_channels=%d, " + "sample_rate=%d, number_of_channels=%d, number_of_frames=%d, " + "audio_delay_milliseconds=%d, current_volume=%d, " + "key_pressed=%d, need_audio_processing=%d)", + number_of_voe_channels, sample_rate, number_of_channels, + number_of_frames, audio_delay_milliseconds, current_volume, + key_pressed, need_audio_processing); + + if (need_audio_processing) { + // Perform channel-independent operations + // (APM, mix with file, record to file, mute, etc.) + _shared->transmit_mixer()->PrepareDemux( + audio_data, number_of_frames, number_of_channels, + sample_rate, static_cast(audio_delay_milliseconds), 0, + current_volume, key_pressed); + _shared->transmit_mixer()->DemuxAndMix(voe_channels, + number_of_voe_channels); + _shared->transmit_mixer()->EncodeAndSend(voe_channels, + number_of_voe_channels); + // Update the volume if the analog AGC is working. + if (_shared->audio_processing() && + _shared->audio_processing()->gain_control()->mode() == + GainControl::kAdaptiveAnalog) { + return _shared->transmit_mixer()->CaptureLevel(); + } + + // Return 0 to indicate no need to change the volume. + return 0; + } + + // No need to go through the APM, demultiplex the data to each VoE channel, + // encode and send to the network. + for (int i = 0; i < number_of_voe_channels; ++i) { + voe::ScopedChannel sc(_shared->channel_manager(), voe_channels[i]); + voe::Channel* channel_ptr = sc.ChannelPtr(); + if (!channel_ptr) + continue; + + if (channel_ptr->InputIsOnHold()) { + channel_ptr->UpdateLocalTimeStamp(); + } else if (channel_ptr->Sending()) { + channel_ptr->Demultiplex(audio_data, sample_rate, number_of_frames, + number_of_channels); + channel_ptr->PrepareEncodeAndSend(sample_rate); + channel_ptr->EncodeAndSend(); + } + } + + // Return 0 to indicate no need to change the volume. + return 0; +} + int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer) { WEBRTC_TRACE(kTraceApiCall, kTraceVoice, VoEId(_shared->instance_id(), -1), diff --git a/webrtc/voice_engine/voe_base_impl.h b/webrtc/voice_engine/voe_base_impl.h index 4665e80807..c63798f35d 100644 --- a/webrtc/voice_engine/voe_base_impl.h +++ b/webrtc/voice_engine/voe_base_impl.h @@ -90,6 +90,17 @@ public: void* audioSamples, uint32_t& nSamplesOut); + virtual int OnDataAvailable(int voe_channels[], + int number_of_voe_channels, + const int16_t* audio_data, + int sample_rate, + int number_of_channels, + int number_of_frames, + int audio_delay_milliseconds, + int current_volume, + bool key_pressed, + bool need_audio_processing); + // AudioDeviceObserver virtual void OnErrorIsReported(ErrorCode error); virtual void OnWarningIsReported(WarningCode warning);