diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc index 04d3654521..0ec73dafd7 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -29,8 +29,8 @@ const int kWindowSizeMs = 16; const int kChunkSizeMs = 10; // Size provided by APM. const float kClipFreqKhz = 0.2f; const float kKbdAlpha = 1.5f; -const float kLambdaBot = -1.0f; // Extreme values in bisection -const float kLambdaTop = -1e-5f; // search for lamda. +const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection +const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda. const float kVoiceProbabilityThreshold = 0.02f; // Number of chunks after voice activity which is still considered speech. const size_t kSpeechOffsetDelay = 80; @@ -162,12 +162,12 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target) { const float reciprocal_power_target = 1.f / (power_target + std::numeric_limits::epsilon()); - float lambda_bot = kLambdaBot; - float lambda_top = kLambdaTop; + double lambda_bot = kLambdaBot; + double lambda_top = kLambdaTop; float power_ratio = 2.f; // Ratio of achieved power to target power. int iters = 0; while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { - const float lambda = (lambda_bot + lambda_top) / 2.f; + const double lambda = (lambda_bot + lambda_top) / 2.0; SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); const float power = DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); @@ -267,7 +267,7 @@ std::vector> IntelligibilityEnhancer::CreateErbBank( return filter_bank; } -void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, +void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda, size_t start_freq, float* sols) { const float kMinPower = 1e-5f; @@ -284,19 +284,19 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { sols[n] = 1.f; } else { - const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + + const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] + lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; - const float beta0 = - lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; - const float alpha0 = - lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; - RTC_DCHECK_LT(alpha0, 0.f); + const double beta0 = + lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n]; + const double alpha0 = + lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n]; + RTC_DCHECK_LT(alpha0, 0.0); // The quadratic equation should always have real roots, but to guard // against numerical errors we limit it to a minimum of zero. sols[n] = std::max( - 0.f, (-beta0 - std::sqrt(std::max( - 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / - (2.f * alpha0)); + 0.0, (-beta0 - std::sqrt(std::max( + 0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) / + (2.0 * alpha0)); } } } diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h index 1413212934..2f2506c450 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h @@ -71,7 +71,7 @@ class IntelligibilityEnhancer : public LappedTransform::Callback { // Analytically solves quadratic for optimal gains given |lambda|. // Negative gains are set to 0. Stores the results in |sols|. - void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); + void SolveForGainsGivenLambda(double lambda, size_t start_freq, float* sols); // Returns true if the audio is speech. bool IsSpeech(const float* audio); diff --git a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc index b459c39b69..64ccfd96ef 100644 --- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc +++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc @@ -56,7 +56,6 @@ void void_main(int argc, char* argv[]) { noise_file.num_channels()); while (in_file.ReadSamples(in.size(), in.data()) == in.size() && noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) { - FloatS16ToFloat(in.data(), in.size(), in.data()); FloatS16ToFloat(noise.data(), noise.size(), noise.data()); Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(), in_buf.channels()); @@ -70,7 +69,6 @@ void void_main(int argc, char* argv[]) { in_file.num_channels()); Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(), in.data()); - FloatToFloatS16(in.data(), in.size(), in.data()); out_file.WriteSamples(in.data(), in.size()); } } diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index a9d9f4a93b..1341aa8612 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -177,17 +177,15 @@ std::vector NoiseSuppressionImpl::NoiseEstimate() { rtc::CritScope cs(crit_); std::vector noise_estimate; #if defined(WEBRTC_NS_FLOAT) - const float kNormalizationFactor = 1.f / (1 << 15); noise_estimate.assign(WebRtcNs_num_freq(), 0.f); for (auto& suppressor : suppressors_) { const float* noise = WebRtcNs_noise_estimate(suppressor->state()); for (size_t i = 0; i < noise_estimate.size(); ++i) { - noise_estimate[i] += - kNormalizationFactor * noise[i] / suppressors_.size(); + noise_estimate[i] += noise[i] / suppressors_.size(); } } #elif defined(WEBRTC_NS_FIXED) - const float kNormalizationFactor = 1.f / (1 << 23); + const float kNormalizationFactor = 1.f / (1 << 9); noise_estimate.assign(WebRtcNsx_num_freq(), 0.f); for (auto& suppressor : suppressors_) { const uint32_t* noise = WebRtcNsx_noise_estimate(suppressor->state()); diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.cc b/webrtc/modules/audio_processing/test/audio_file_processor.cc index 5febcd0dc8..5f57917337 100644 --- a/webrtc/modules/audio_processing/test/audio_file_processor.cc +++ b/webrtc/modules/audio_processing/test/audio_file_processor.cc @@ -42,14 +42,39 @@ ChannelBuffer GetChannelBuffer(const WavFile& file) { WavFileProcessor::WavFileProcessor(std::unique_ptr ap, std::unique_ptr in_file, - std::unique_ptr out_file) + std::unique_ptr out_file, + std::unique_ptr reverse_in_file, + std::unique_ptr reverse_out_file) : ap_(std::move(ap)), in_buf_(GetChannelBuffer(*in_file)), out_buf_(GetChannelBuffer(*out_file)), input_config_(GetStreamConfig(*in_file)), output_config_(GetStreamConfig(*out_file)), buffer_reader_(std::move(in_file)), - buffer_writer_(std::move(out_file)) {} + buffer_writer_(std::move(out_file)) { + if (reverse_in_file) { + const WavFile* reverse_out_config; + if (reverse_out_file) { + reverse_out_config = reverse_out_file.get(); + } else { + reverse_out_config = reverse_in_file.get(); + } + reverse_in_buf_.reset( + new ChannelBuffer(GetChannelBuffer(*reverse_in_file))); + reverse_out_buf_.reset( + new ChannelBuffer(GetChannelBuffer(*reverse_out_config))); + reverse_input_config_.reset( + new StreamConfig(GetStreamConfig(*reverse_in_file))); + reverse_output_config_.reset( + new StreamConfig(GetStreamConfig(*reverse_out_config))); + reverse_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(reverse_in_file))); + if (reverse_out_file) { + reverse_buffer_writer_.reset( + new ChannelBufferWavWriter(std::move(reverse_out_file))); + } + } +} bool WavFileProcessor::ProcessChunk() { if (!buffer_reader_.Read(&in_buf_)) { @@ -62,6 +87,22 @@ bool WavFileProcessor::ProcessChunk() { output_config_, out_buf_.channels())); } buffer_writer_.Write(out_buf_); + if (reverse_buffer_reader_) { + if (!reverse_buffer_reader_->Read(reverse_in_buf_.get())) { + return false; + } + { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(kNoErr, + ap_->ProcessReverseStream(reverse_in_buf_->channels(), + *reverse_input_config_.get(), + *reverse_output_config_.get(), + reverse_out_buf_->channels())); + } + if (reverse_buffer_writer_) { + reverse_buffer_writer_->Write(*reverse_out_buf_.get()); + } + } return true; } diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.h b/webrtc/modules/audio_processing/test/audio_file_processor.h index f3db86dc84..f7cde59821 100644 --- a/webrtc/modules/audio_processing/test/audio_file_processor.h +++ b/webrtc/modules/audio_processing/test/audio_file_processor.h @@ -86,7 +86,9 @@ class WavFileProcessor final : public AudioFileProcessor { // Takes ownership of all parameters. WavFileProcessor(std::unique_ptr ap, std::unique_ptr in_file, - std::unique_ptr out_file); + std::unique_ptr out_file, + std::unique_ptr reverse_in_file, + std::unique_ptr reverse_out_file); virtual ~WavFileProcessor() {} // Processes one chunk from the WAV input and writes to the WAV output. @@ -101,6 +103,12 @@ class WavFileProcessor final : public AudioFileProcessor { const StreamConfig output_config_; ChannelBufferWavReader buffer_reader_; ChannelBufferWavWriter buffer_writer_; + std::unique_ptr> reverse_in_buf_; + std::unique_ptr> reverse_out_buf_; + std::unique_ptr reverse_input_config_; + std::unique_ptr reverse_output_config_; + std::unique_ptr reverse_buffer_reader_; + std::unique_ptr reverse_buffer_writer_; }; // Used to read from an aecdump file and write to a WavWriter. diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index b403c1fe05..41e45bfdc6 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -42,10 +42,20 @@ DEFINE_string( o, "out.wav", "Name of the output file to write the processed capture stream to."); +DEFINE_string(ri, "", "Name of the render input stream file to read from."); +DEFINE_string( + ro, + "out_reverse.wav", + "Name of the output file to write the processed render stream to."); DEFINE_int32(out_channels, 1, "Number of output channels."); const bool out_channels_dummy = google::RegisterFlagValidator(&FLAGS_out_channels, &ValidateOutChannels); +DEFINE_int32(rev_out_channels, 1, "Number of reverse output channels."); +const bool rev_out_channels_dummy = + google::RegisterFlagValidator(&FLAGS_rev_out_channels, + &ValidateOutChannels); DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz."); +DEFINE_int32(rev_out_sample_rate, 48000, "Reverse output sample rate in Hz."); DEFINE_string(mic_positions, "", "Space delimited cartesian coordinates of microphones in meters. " "The coordinates of each point are contiguous. " @@ -77,8 +87,7 @@ const char kUsage[] = "an input capture WAV file or protobuf debug dump and writes to an output\n" "WAV file.\n" "\n" - "All components are disabled by default. If any bi-directional components\n" - "are enabled, only debug dump files are permitted."; + "All components are disabled by default."; } // namespace @@ -91,15 +100,6 @@ int main(int argc, char* argv[]) { "An input file must be specified with either -i or -dump.\n"); return 1; } - if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) { - fprintf(stderr, "-aec and -ie require a -dump file.\n"); - return 1; - } - if (FLAGS_ie) { - fprintf(stderr, - "FIXME(ajm): The intelligibility enhancer output is not dumped.\n"); - return 1; - } test::TraceToStderr trace_to_stderr(true); Config config; @@ -135,8 +135,24 @@ int main(int argc, char* argv[]) { if (FLAGS_dump.empty()) { auto in_file = std::unique_ptr(new WavReader(FLAGS_i)); std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl; - processor.reset(new WavFileProcessor(std::move(ap), std::move(in_file), - std::move(out_file))); + std::unique_ptr reverse_in_file; + std::unique_ptr reverse_out_file; + if (!FLAGS_ri.empty()) { + reverse_in_file.reset(new WavReader(FLAGS_ri)); + reverse_out_file.reset(new WavWriter( + FLAGS_ro, + FLAGS_rev_out_sample_rate, + static_cast(FLAGS_rev_out_channels))); + std::cout << FLAGS_ri << ": " + << reverse_in_file->FormatAsString() << std::endl; + std::cout << FLAGS_ro << ": " + << reverse_out_file->FormatAsString() << std::endl; + } + processor.reset(new WavFileProcessor(std::move(ap), + std::move(in_file), + std::move(out_file), + std::move(reverse_in_file), + std::move(reverse_out_file))); } else { processor.reset(new AecDumpFileProcessor(