diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc index 0ec73dafd7..04d3654521 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -29,8 +29,8 @@ const int kWindowSizeMs = 16; const int kChunkSizeMs = 10; // Size provided by APM. const float kClipFreqKhz = 0.2f; const float kKbdAlpha = 1.5f; -const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection -const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda. +const float kLambdaBot = -1.0f; // Extreme values in bisection +const float kLambdaTop = -1e-5f; // search for lamda. const float kVoiceProbabilityThreshold = 0.02f; // Number of chunks after voice activity which is still considered speech. const size_t kSpeechOffsetDelay = 80; @@ -162,12 +162,12 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target) { const float reciprocal_power_target = 1.f / (power_target + std::numeric_limits::epsilon()); - double lambda_bot = kLambdaBot; - double lambda_top = kLambdaTop; + float lambda_bot = kLambdaBot; + float lambda_top = kLambdaTop; float power_ratio = 2.f; // Ratio of achieved power to target power. int iters = 0; while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { - const double lambda = (lambda_bot + lambda_top) / 2.0; + const float lambda = (lambda_bot + lambda_top) / 2.f; SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); const float power = DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); @@ -267,7 +267,7 @@ std::vector> IntelligibilityEnhancer::CreateErbBank( return filter_bank; } -void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda, +void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols) { const float kMinPower = 1e-5f; @@ -284,19 +284,19 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda, if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { sols[n] = 1.f; } else { - const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] + + const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; - const double beta0 = - lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n]; - const double alpha0 = - lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n]; - RTC_DCHECK_LT(alpha0, 0.0); + const float beta0 = + lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; + const float alpha0 = + lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; + RTC_DCHECK_LT(alpha0, 0.f); // The quadratic equation should always have real roots, but to guard // against numerical errors we limit it to a minimum of zero. sols[n] = std::max( - 0.0, (-beta0 - std::sqrt(std::max( - 0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) / - (2.0 * alpha0)); + 0.f, (-beta0 - std::sqrt(std::max( + 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / + (2.f * alpha0)); } } } diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h index 2f2506c450..1413212934 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h @@ -71,7 +71,7 @@ class IntelligibilityEnhancer : public LappedTransform::Callback { // Analytically solves quadratic for optimal gains given |lambda|. // Negative gains are set to 0. Stores the results in |sols|. - void SolveForGainsGivenLambda(double lambda, size_t start_freq, float* sols); + void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); // Returns true if the audio is speech. bool IsSpeech(const float* audio); diff --git a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc index 64ccfd96ef..b459c39b69 100644 --- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc +++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc @@ -56,6 +56,7 @@ void void_main(int argc, char* argv[]) { noise_file.num_channels()); while (in_file.ReadSamples(in.size(), in.data()) == in.size() && noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) { + FloatS16ToFloat(in.data(), in.size(), in.data()); FloatS16ToFloat(noise.data(), noise.size(), noise.data()); Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(), in_buf.channels()); @@ -69,6 +70,7 @@ void void_main(int argc, char* argv[]) { in_file.num_channels()); Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(), in.data()); + FloatToFloatS16(in.data(), in.size(), in.data()); out_file.WriteSamples(in.data(), in.size()); } } diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index 1341aa8612..a9d9f4a93b 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -177,15 +177,17 @@ std::vector NoiseSuppressionImpl::NoiseEstimate() { rtc::CritScope cs(crit_); std::vector noise_estimate; #if defined(WEBRTC_NS_FLOAT) + const float kNormalizationFactor = 1.f / (1 << 15); noise_estimate.assign(WebRtcNs_num_freq(), 0.f); for (auto& suppressor : suppressors_) { const float* noise = WebRtcNs_noise_estimate(suppressor->state()); for (size_t i = 0; i < noise_estimate.size(); ++i) { - noise_estimate[i] += noise[i] / suppressors_.size(); + noise_estimate[i] += + kNormalizationFactor * noise[i] / suppressors_.size(); } } #elif defined(WEBRTC_NS_FIXED) - const float kNormalizationFactor = 1.f / (1 << 9); + const float kNormalizationFactor = 1.f / (1 << 23); noise_estimate.assign(WebRtcNsx_num_freq(), 0.f); for (auto& suppressor : suppressors_) { const uint32_t* noise = WebRtcNsx_noise_estimate(suppressor->state()); diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.cc b/webrtc/modules/audio_processing/test/audio_file_processor.cc index 5f57917337..5febcd0dc8 100644 --- a/webrtc/modules/audio_processing/test/audio_file_processor.cc +++ b/webrtc/modules/audio_processing/test/audio_file_processor.cc @@ -42,39 +42,14 @@ ChannelBuffer GetChannelBuffer(const WavFile& file) { WavFileProcessor::WavFileProcessor(std::unique_ptr ap, std::unique_ptr in_file, - std::unique_ptr out_file, - std::unique_ptr reverse_in_file, - std::unique_ptr reverse_out_file) + std::unique_ptr out_file) : ap_(std::move(ap)), in_buf_(GetChannelBuffer(*in_file)), out_buf_(GetChannelBuffer(*out_file)), input_config_(GetStreamConfig(*in_file)), output_config_(GetStreamConfig(*out_file)), buffer_reader_(std::move(in_file)), - buffer_writer_(std::move(out_file)) { - if (reverse_in_file) { - const WavFile* reverse_out_config; - if (reverse_out_file) { - reverse_out_config = reverse_out_file.get(); - } else { - reverse_out_config = reverse_in_file.get(); - } - reverse_in_buf_.reset( - new ChannelBuffer(GetChannelBuffer(*reverse_in_file))); - reverse_out_buf_.reset( - new ChannelBuffer(GetChannelBuffer(*reverse_out_config))); - reverse_input_config_.reset( - new StreamConfig(GetStreamConfig(*reverse_in_file))); - reverse_output_config_.reset( - new StreamConfig(GetStreamConfig(*reverse_out_config))); - reverse_buffer_reader_.reset( - new ChannelBufferWavReader(std::move(reverse_in_file))); - if (reverse_out_file) { - reverse_buffer_writer_.reset( - new ChannelBufferWavWriter(std::move(reverse_out_file))); - } - } -} + buffer_writer_(std::move(out_file)) {} bool WavFileProcessor::ProcessChunk() { if (!buffer_reader_.Read(&in_buf_)) { @@ -87,22 +62,6 @@ bool WavFileProcessor::ProcessChunk() { output_config_, out_buf_.channels())); } buffer_writer_.Write(out_buf_); - if (reverse_buffer_reader_) { - if (!reverse_buffer_reader_->Read(reverse_in_buf_.get())) { - return false; - } - { - const auto st = ScopedTimer(mutable_proc_time()); - RTC_CHECK_EQ(kNoErr, - ap_->ProcessReverseStream(reverse_in_buf_->channels(), - *reverse_input_config_.get(), - *reverse_output_config_.get(), - reverse_out_buf_->channels())); - } - if (reverse_buffer_writer_) { - reverse_buffer_writer_->Write(*reverse_out_buf_.get()); - } - } return true; } diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.h b/webrtc/modules/audio_processing/test/audio_file_processor.h index f7cde59821..f3db86dc84 100644 --- a/webrtc/modules/audio_processing/test/audio_file_processor.h +++ b/webrtc/modules/audio_processing/test/audio_file_processor.h @@ -86,9 +86,7 @@ class WavFileProcessor final : public AudioFileProcessor { // Takes ownership of all parameters. WavFileProcessor(std::unique_ptr ap, std::unique_ptr in_file, - std::unique_ptr out_file, - std::unique_ptr reverse_in_file, - std::unique_ptr reverse_out_file); + std::unique_ptr out_file); virtual ~WavFileProcessor() {} // Processes one chunk from the WAV input and writes to the WAV output. @@ -103,12 +101,6 @@ class WavFileProcessor final : public AudioFileProcessor { const StreamConfig output_config_; ChannelBufferWavReader buffer_reader_; ChannelBufferWavWriter buffer_writer_; - std::unique_ptr> reverse_in_buf_; - std::unique_ptr> reverse_out_buf_; - std::unique_ptr reverse_input_config_; - std::unique_ptr reverse_output_config_; - std::unique_ptr reverse_buffer_reader_; - std::unique_ptr reverse_buffer_writer_; }; // Used to read from an aecdump file and write to a WavWriter. diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index 41e45bfdc6..b403c1fe05 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -42,20 +42,10 @@ DEFINE_string( o, "out.wav", "Name of the output file to write the processed capture stream to."); -DEFINE_string(ri, "", "Name of the render input stream file to read from."); -DEFINE_string( - ro, - "out_reverse.wav", - "Name of the output file to write the processed render stream to."); DEFINE_int32(out_channels, 1, "Number of output channels."); const bool out_channels_dummy = google::RegisterFlagValidator(&FLAGS_out_channels, &ValidateOutChannels); -DEFINE_int32(rev_out_channels, 1, "Number of reverse output channels."); -const bool rev_out_channels_dummy = - google::RegisterFlagValidator(&FLAGS_rev_out_channels, - &ValidateOutChannels); DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz."); -DEFINE_int32(rev_out_sample_rate, 48000, "Reverse output sample rate in Hz."); DEFINE_string(mic_positions, "", "Space delimited cartesian coordinates of microphones in meters. " "The coordinates of each point are contiguous. " @@ -87,7 +77,8 @@ const char kUsage[] = "an input capture WAV file or protobuf debug dump and writes to an output\n" "WAV file.\n" "\n" - "All components are disabled by default."; + "All components are disabled by default. If any bi-directional components\n" + "are enabled, only debug dump files are permitted."; } // namespace @@ -100,6 +91,15 @@ int main(int argc, char* argv[]) { "An input file must be specified with either -i or -dump.\n"); return 1; } + if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) { + fprintf(stderr, "-aec and -ie require a -dump file.\n"); + return 1; + } + if (FLAGS_ie) { + fprintf(stderr, + "FIXME(ajm): The intelligibility enhancer output is not dumped.\n"); + return 1; + } test::TraceToStderr trace_to_stderr(true); Config config; @@ -135,24 +135,8 @@ int main(int argc, char* argv[]) { if (FLAGS_dump.empty()) { auto in_file = std::unique_ptr(new WavReader(FLAGS_i)); std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl; - std::unique_ptr reverse_in_file; - std::unique_ptr reverse_out_file; - if (!FLAGS_ri.empty()) { - reverse_in_file.reset(new WavReader(FLAGS_ri)); - reverse_out_file.reset(new WavWriter( - FLAGS_ro, - FLAGS_rev_out_sample_rate, - static_cast(FLAGS_rev_out_channels))); - std::cout << FLAGS_ri << ": " - << reverse_in_file->FormatAsString() << std::endl; - std::cout << FLAGS_ro << ": " - << reverse_out_file->FormatAsString() << std::endl; - } - processor.reset(new WavFileProcessor(std::move(ap), - std::move(in_file), - std::move(out_file), - std::move(reverse_in_file), - std::move(reverse_out_file))); + processor.reset(new WavFileProcessor(std::move(ap), std::move(in_file), + std::move(out_file))); } else { processor.reset(new AecDumpFileProcessor(