From cb3f9bd9c024f11e1ee060de23bf65c7a1f9f594 Mon Sep 17 00:00:00 2001 From: Alejandro Luebs Date: Thu, 29 Oct 2015 18:21:34 -0700 Subject: [PATCH] Make the nonlinear beamformer steerable Depends on this CL: https://codereview.webrtc.org/1395453004/ R=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1394103003 . Cr-Commit-Position: refs/heads/master@{#10458} --- .../audio_processing/audio_processing_impl.cc | 4 +- .../audio_processing/audio_processing_impl.h | 1 + .../audio_processing/beamformer/array_util.cc | 86 ++++++++ .../audio_processing/beamformer/array_util.h | 55 +++++- .../beamformer/array_util_unittest.cc | 173 +++++++++++++++- .../audio_processing/beamformer/beamformer.h | 3 + .../beamformer/nonlinear_beamformer.cc | 186 ++++++++++-------- .../beamformer/nonlinear_beamformer.h | 30 ++- .../nonlinear_beamformer_unittest.cc | 147 ++++++++++++++ .../include/audio_processing.h | 19 +- .../audio_processing/test/audioproc_float.cc | 6 +- webrtc/modules/modules.gyp | 1 + 12 files changed, 613 insertions(+), 98 deletions(-) create mode 100644 webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 2a54a1a109..c6574151d0 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -225,6 +225,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, beamformer_enabled_(config.Get().enabled), beamformer_(beamformer), array_geometry_(config.Get().array_geometry), + target_direction_(config.Get().target_direction), intelligibility_enabled_(config.Get().enabled) { echo_cancellation_ = new EchoCancellationImpl(this, crit_); component_list_.push_back(echo_cancellation_); @@ -1099,7 +1100,8 @@ void AudioProcessingImpl::InitializeTransient() { void AudioProcessingImpl::InitializeBeamformer() { if (beamformer_enabled_) { if (!beamformer_) { - beamformer_.reset(new NonlinearBeamformer(array_geometry_)); + beamformer_.reset( + new NonlinearBeamformer(array_geometry_, target_direction_)); } beamformer_->Initialize(kChunkSizeMs, split_rate_); } diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index bf29bf3633..542886ee10 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -208,6 +208,7 @@ class AudioProcessingImpl : public AudioProcessing { const bool beamformer_enabled_; rtc::scoped_ptr> beamformer_; const std::vector array_geometry_; + const SphericalPointf target_direction_; bool intelligibility_enabled_; rtc::scoped_ptr intelligibility_enhancer_; diff --git a/webrtc/modules/audio_processing/beamformer/array_util.cc b/webrtc/modules/audio_processing/beamformer/array_util.cc index c1c4066b0c..8aaeee9f59 100644 --- a/webrtc/modules/audio_processing/beamformer/array_util.cc +++ b/webrtc/modules/audio_processing/beamformer/array_util.cc @@ -16,6 +16,11 @@ #include "webrtc/base/checks.h" namespace webrtc { +namespace { + +const float kMaxDotProduct = 1e-6f; + +} // namespace float GetMinimumSpacing(const std::vector& array_geometry) { RTC_CHECK_GT(array_geometry.size(), 1u); @@ -29,4 +34,85 @@ float GetMinimumSpacing(const std::vector& array_geometry) { return mic_spacing; } +Point PairDirection(const Point& a, const Point& b) { + return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()}; +} + +float DotProduct(const Point& a, const Point& b) { + return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); +} + +Point CrossProduct(const Point& a, const Point& b) { + return {a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(), + a.x() * b.y() - a.y() * b.x()}; +} + +bool AreParallel(const Point& a, const Point& b) { + Point cross_product = CrossProduct(a, b); + return DotProduct(cross_product, cross_product) < kMaxDotProduct; +} + +bool ArePerpendicular(const Point& a, const Point& b) { + return std::abs(DotProduct(a, b)) < kMaxDotProduct; +} + +rtc::Maybe GetDirectionIfLinear( + const std::vector& array_geometry) { + RTC_DCHECK_GT(array_geometry.size(), 1u); + const Point first_pair_direction = + PairDirection(array_geometry[0], array_geometry[1]); + for (size_t i = 2u; i < array_geometry.size(); ++i) { + const Point pair_direction = + PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!AreParallel(first_pair_direction, pair_direction)) { + return rtc::Maybe(); + } + } + return first_pair_direction; +} + +rtc::Maybe GetNormalIfPlanar(const std::vector& array_geometry) { + RTC_DCHECK_GT(array_geometry.size(), 1u); + const Point first_pair_direction = + PairDirection(array_geometry[0], array_geometry[1]); + Point pair_direction(0.f, 0.f, 0.f); + size_t i = 2u; + bool is_linear = true; + for (; i < array_geometry.size() && is_linear; ++i) { + pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!AreParallel(first_pair_direction, pair_direction)) { + is_linear = false; + } + } + if (is_linear) { + return rtc::Maybe(); + } + const Point normal_direction = + CrossProduct(first_pair_direction, pair_direction); + for (; i < array_geometry.size(); ++i) { + pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!ArePerpendicular(normal_direction, pair_direction)) { + return rtc::Maybe(); + } + } + return normal_direction; +} + +rtc::Maybe GetArrayNormalIfExists( + const std::vector& array_geometry) { + const rtc::Maybe direction = GetDirectionIfLinear(array_geometry); + if (direction) { + return Point(direction->y(), -direction->x(), 0.f); + } + const rtc::Maybe normal = GetNormalIfPlanar(array_geometry); + if (normal && normal->z() < kMaxDotProduct) { + return normal; + } + return rtc::Maybe(); +} + +Point AzimuthToPoint(float azimuth) { + return Point(std::cos(azimuth), std::sin(azimuth), 0.f); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/array_util.h b/webrtc/modules/audio_processing/beamformer/array_util.h index 2ac174ea8a..7fff9735a1 100644 --- a/webrtc/modules/audio_processing/beamformer/array_util.h +++ b/webrtc/modules/audio_processing/beamformer/array_util.h @@ -14,11 +14,23 @@ #include #include +#include "webrtc/base/maybe.h" + namespace webrtc { -// Coordinates in meters. +// Coordinates in meters. The convention used is: +// x: the horizontal dimension, with positive to the right from the camera's +// perspective. +// y: the depth dimension, with positive forward from the camera's +// perspective. +// z: the vertical dimension, with positive upwards. template struct CartesianPoint { + CartesianPoint() { + c[0] = 0; + c[1] = 0; + c[2] = 0; + } CartesianPoint(T x, T y, T z) { c[0] = x; c[1] = y; @@ -32,10 +44,35 @@ struct CartesianPoint { using Point = CartesianPoint; +// Calculates the direction from a to b. +Point PairDirection(const Point& a, const Point& b); + +float DotProduct(const Point& a, const Point& b); +Point CrossProduct(const Point& a, const Point& b); + +bool AreParallel(const Point& a, const Point& b); +bool ArePerpendicular(const Point& a, const Point& b); + // Returns the minimum distance between any two Points in the given // |array_geometry|. float GetMinimumSpacing(const std::vector& array_geometry); +// If the given array geometry is linear it returns the direction without +// normalizing. +rtc::Maybe GetDirectionIfLinear( + const std::vector& array_geometry); + +// If the given array geometry is planar it returns the normal without +// normalizing. +rtc::Maybe GetNormalIfPlanar(const std::vector& array_geometry); + +// Returns the normal of an array if it has one and it is in the xy-plane. +rtc::Maybe GetArrayNormalIfExists( + const std::vector& array_geometry); + +// The resulting Point will be in the xy-plane. +Point AzimuthToPoint(float azimuth); + template float Distance(CartesianPoint a, CartesianPoint b) { return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) + @@ -43,6 +80,11 @@ float Distance(CartesianPoint a, CartesianPoint b) { (a.z() - b.z()) * (a.z() - b.z())); } +// The convention used: +// azimuth: zero is to the right from the camera's perspective, with positive +// angles in radians counter-clockwise. +// elevation: zero is horizontal, with positive angles in radians upwards. +// radius: distance from the camera in meters. template struct SphericalPoint { SphericalPoint(T azimuth, T elevation, T radius) { @@ -58,6 +100,17 @@ struct SphericalPoint { using SphericalPointf = SphericalPoint; +// Helper functions to transform degrees to radians and the inverse. +template +T DegreesToRadians(T angle_degrees) { + return M_PI * angle_degrees / 180; +} + +template +T RadiansToDegrees(T angle_radians) { + return 180 * angle_radians / M_PI; +} + } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ diff --git a/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc b/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc index 57f1708225..e3a7bbd7aa 100644 --- a/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc +++ b/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc @@ -8,25 +8,178 @@ * be found in the AUTHORS file in the root of the source tree. */ +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + #include "webrtc/modules/audio_processing/beamformer/array_util.h" +#include #include #include "testing/gtest/include/gtest/gtest.h" namespace webrtc { +bool operator==(const Point& lhs, const Point& rhs) { + return lhs.x() == rhs.x() && lhs.y() == rhs.y() && lhs.z() == rhs.z(); +} + +TEST(ArrayUtilTest, PairDirection) { + EXPECT_EQ(Point(1.f, 2.f, 3.f), + PairDirection(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_EQ(Point(-1.f, -2.f, -3.f), + PairDirection(Point(1.f, 2.f, 3.f), Point(0.f, 0.f, 0.f))); + EXPECT_EQ(Point(0.f, 0.f, 0.f), + PairDirection(Point(1.f, 0.f, 0.f), Point(1.f, 0.f, 0.f))); + EXPECT_EQ(Point(-1.f, 2.f, 0.f), + PairDirection(Point(1.f, 0.f, 0.f), Point(0.f, 2.f, 0.f))); + EXPECT_EQ(Point(-4.f, 4.f, -4.f), + PairDirection(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, DotProduct) { + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 1.f, 0.f), Point(1.f, -1.f, 0.f))); + EXPECT_FLOAT_EQ(2.f, DotProduct(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_FLOAT_EQ(-6.f, + DotProduct(Point(-2.f, 0.f, 0.f), Point(3.f, 0.f, 0.f))); + EXPECT_FLOAT_EQ(-10.f, + DotProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, CrossProduct) { + EXPECT_EQ(Point(0.f, 0.f, 0.f), + CrossProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_EQ(Point(0.f, 0.f, 1.f), + CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 1.f, 0.f))); + EXPECT_EQ(Point(1.f, 0.f, 0.f), + CrossProduct(Point(0.f, 1.f, 0.f), Point(0.f, 0.f, 1.f))); + EXPECT_EQ(Point(0.f, -1.f, 0.f), + CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 0.f, 1.f))); + EXPECT_EQ(Point(-4.f, -8.f, -4.f), + CrossProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, AreParallel) { + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_FALSE(AreParallel(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_FALSE(AreParallel(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f))); + EXPECT_FALSE(AreParallel(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); + EXPECT_TRUE(AreParallel(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_TRUE(AreParallel(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f))); +} + +TEST(ArrayUtilTest, ArePerpendicular) { + EXPECT_TRUE(ArePerpendicular(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_TRUE(ArePerpendicular(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_TRUE(ArePerpendicular(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f))); +} + TEST(ArrayUtilTest, GetMinimumSpacing) { - std::vector array_geometry; - array_geometry.push_back(Point(0.f, 0.f, 0.f)); - array_geometry.push_back(Point(0.1f, 0.f, 0.f)); - EXPECT_FLOAT_EQ(0.1f, GetMinimumSpacing(array_geometry)); - array_geometry.push_back(Point(0.f, 0.05f, 0.f)); - EXPECT_FLOAT_EQ(0.05f, GetMinimumSpacing(array_geometry)); - array_geometry.push_back(Point(0.f, 0.f, 0.02f)); - EXPECT_FLOAT_EQ(0.02f, GetMinimumSpacing(array_geometry)); - array_geometry.push_back(Point(-0.003f, -0.004f, 0.02f)); - EXPECT_FLOAT_EQ(0.005f, GetMinimumSpacing(array_geometry)); + std::vector geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_FLOAT_EQ(0.1f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(0.f, 0.05f, 0.f)); + EXPECT_FLOAT_EQ(0.05f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(0.f, 0.f, 0.02f)); + EXPECT_FLOAT_EQ(0.02f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(-0.003f, -0.004f, 0.02f)); + EXPECT_FLOAT_EQ(0.005f, GetMinimumSpacing(geometry)); +} + +TEST(ArrayUtilTest, GetDirectionIfLinear) { + std::vector geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(-0.2f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.05f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.1f, 0.1f, 0.f)); + EXPECT_FALSE(GetDirectionIfLinear(geometry)); + geometry.push_back(Point(0.f, 0.f, -0.2f)); + EXPECT_FALSE(GetDirectionIfLinear(geometry)); +} + +TEST(ArrayUtilTest, GetNormalIfPlanar) { + std::vector geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.1f, 0.2f, 0.f)); + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry))); + geometry.push_back(Point(0.f, -0.15f, 0.f)); + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry))); + geometry.push_back(Point(0.f, 0.1f, 0.2f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.f, 0.f, -0.15f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.1f, 0.2f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); +} + +TEST(ArrayUtilTest, GetArrayNormalIfExists) { + std::vector geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.1f, 0.f, 0.2f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.f, 0.f, -0.1f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.1f, 0.2f, 0.3f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); + geometry.push_back(Point(0.f, -0.1f, 0.f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); + geometry.push_back(Point(1.f, 0.f, -0.2f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); +} + +TEST(ArrayUtilTest, DegreesToRadians) { + EXPECT_FLOAT_EQ(0.f, DegreesToRadians(0.f)); + EXPECT_FLOAT_EQ(static_cast(M_PI) / 6.f, DegreesToRadians(30.f)); + EXPECT_FLOAT_EQ(-static_cast(M_PI) / 4.f, DegreesToRadians(-45.f)); + EXPECT_FLOAT_EQ(static_cast(M_PI) / 3.f, DegreesToRadians(60.f)); + EXPECT_FLOAT_EQ(-static_cast(M_PI) / 2.f, DegreesToRadians(-90.f)); + EXPECT_FLOAT_EQ(2.f * static_cast(M_PI) / 3.f, + DegreesToRadians(120.f)); + EXPECT_FLOAT_EQ(-3.f * static_cast(M_PI) / 4.f, + DegreesToRadians(-135.f)); + EXPECT_FLOAT_EQ(5.f * static_cast(M_PI) / 6.f, + DegreesToRadians(150.f)); + EXPECT_FLOAT_EQ(-static_cast(M_PI), DegreesToRadians(-180.f)); +} + +TEST(ArrayUtilTest, RadiansToDegrees) { + EXPECT_FLOAT_EQ(0.f, RadiansToDegrees(0.f)); + EXPECT_FLOAT_EQ(30.f, RadiansToDegrees(M_PI / 6.f)); + EXPECT_FLOAT_EQ(-45.f, RadiansToDegrees(-M_PI / 4.f)); + EXPECT_FLOAT_EQ(60.f, RadiansToDegrees(M_PI / 3.f)); + EXPECT_FLOAT_EQ(-90.f, RadiansToDegrees(-M_PI / 2.f)); + EXPECT_FLOAT_EQ(120.f, RadiansToDegrees(2.f * M_PI / 3.f)); + EXPECT_FLOAT_EQ(-135.f, RadiansToDegrees(-3.f * M_PI / 4.f)); + EXPECT_FLOAT_EQ(150.f, RadiansToDegrees(5.f * M_PI / 6.f)); + EXPECT_FLOAT_EQ(-180.f, RadiansToDegrees(-M_PI)); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h index 54734dddb8..6a9ff45d12 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/beamformer.h @@ -32,6 +32,9 @@ class Beamformer { // Needs to be called before the the Beamformer can be used. virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0; + // Aim the beamformer at a point in space. + virtual void AimAt(const SphericalPointf& spherical_point) = 0; + // Indicates whether a given point is inside of the beam. virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; } diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc index d3f9b33bc2..029fa089fc 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc @@ -29,13 +29,6 @@ const float kKbdAlpha = 1.5f; const float kSpeedOfSoundMeterSeconds = 343; -// For both target and interference angles, PI / 2 is perpendicular to the -// microphone array, facing forwards. The positive direction goes -// counterclockwise. -// The angle at which we amplify sound. -// TODO(aluebs): Make the target angle dynamically settable. -const float kTargetAngleRadians = static_cast(M_PI) / 2.f; - // The minimum separation in radians between the target direction and an // interferer scenario. const float kMinAwayRadians = 0.2f; @@ -50,8 +43,6 @@ const float kAwaySlope = 0.008f; // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) const float kBalance = 0.95f; -const float kHalfBeamWidthRadians = static_cast(M_PI) * 20.f / 180.f; - // Alpha coefficients for mask smoothing. const float kMaskTimeSmoothAlpha = 0.2f; const float kMaskFrequencySmoothAlpha = 0.6f; @@ -187,14 +178,23 @@ std::vector GetCenteredArray(std::vector array_geometry) { } // namespace +const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); + // static const size_t NonlinearBeamformer::kNumFreqBins; NonlinearBeamformer::NonlinearBeamformer( - const std::vector& array_geometry) + const std::vector& array_geometry, + SphericalPointf target_direction) : num_input_channels_(array_geometry.size()), array_geometry_(GetCenteredArray(array_geometry)), - min_mic_spacing_(GetMinimumSpacing(array_geometry)) { + array_normal_(GetArrayNormalIfExists(array_geometry)), + min_mic_spacing_(GetMinimumSpacing(array_geometry)), + target_angle_radians_(target_direction.azimuth()), + away_radians_(std::min( + static_cast(M_PI), + std::max(kMinAwayRadians, + kAwaySlope * static_cast(M_PI) / min_mic_spacing_))) { WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); } @@ -202,7 +202,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { chunk_length_ = static_cast(sample_rate_hz / (1000.f / chunk_size_ms)); sample_rate_hz_ = sample_rate_hz; - InitFrequencyCorrectionRanges(); high_pass_postfilter_mask_ = 1.f; is_target_present_ = false; @@ -223,75 +222,86 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; } - // Initialize all nonadaptive values before looping through the frames. - InitInterfAngles(); - InitDelaySumMasks(); - InitTargetCovMats(); - InitInterfCovMats(); - - for (size_t i = 0; i < kNumFreqBins; ++i) { - rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); - rpsiws_[i].clear(); - for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { - rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); - } - } + InitLowFrequencyCorrectionRanges(); + InitDiffuseCovMats(); + AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); } -void NonlinearBeamformer::InitFrequencyCorrectionRanges() { +// These bin indexes determine the regions over which a mean is taken. This is +// applied as a constant value over the adjacent end "frequency correction" +// regions. +// +// low_mean_start_bin_ high_mean_start_bin_ +// v v constant +// |----------------|--------|----------------|-------|----------------| +// constant ^ ^ +// low_mean_end_bin_ high_mean_end_bin_ +// +void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { + low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); + low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); + + RTC_DCHECK_GT(low_mean_start_bin_, 0U); + RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); +} + +void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { const float kAliasingFreqHz = kSpeedOfSoundMeterSeconds / - (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); + (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, sample_rate_hz_ / 2.f); const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, sample_rate_hz_ / 2.f); - - low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); - low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); - // These bin indexes determine the regions over which a mean is taken. This - // is applied as a constant value over the adjacent end "frequency correction" - // regions. - // - // low_mean_start_bin_ high_mean_start_bin_ - // v v constant - // |----------------|--------|----------------|-------|----------------| - // constant ^ ^ - // low_mean_end_bin_ high_mean_end_bin_ - // - RTC_DCHECK_GT(low_mean_start_bin_, 0U); - RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); + RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); } - void NonlinearBeamformer::InitInterfAngles() { - const float kAwayRadians = - std::min(static_cast(M_PI), - std::max(kMinAwayRadians, kAwaySlope * static_cast(M_PI) / - min_mic_spacing_)); - interf_angles_radians_.clear(); - // TODO(aluebs): When the target angle is settable, make sure the interferer - // scenarios aren't reflected over the target one for linear geometries. - interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); - interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); + const Point target_direction = AzimuthToPoint(target_angle_radians_); + const Point clockwise_interf_direction = + AzimuthToPoint(target_angle_radians_ - away_radians_); + if (!array_normal_ || + DotProduct(*array_normal_, target_direction) * + DotProduct(*array_normal_, clockwise_interf_direction) >= + 0.f) { + // The target and clockwise interferer are in the same half-plane defined + // by the array. + interf_angles_radians_.push_back(target_angle_radians_ - away_radians_); + } else { + // Otherwise, the interferer will begin reflecting back at the target. + // Instead rotate it away 180 degrees. + interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ + + M_PI); + } + const Point counterclock_interf_direction = + AzimuthToPoint(target_angle_radians_ + away_radians_); + if (!array_normal_ || + DotProduct(*array_normal_, target_direction) * + DotProduct(*array_normal_, counterclock_interf_direction) >= + 0.f) { + // The target and counter-clockwise interferer are in the same half-plane + // defined by the array. + interf_angles_radians_.push_back(target_angle_radians_ + away_radians_); + } else { + // Otherwise, the interferer will begin reflecting back at the target. + // Instead rotate it away 180 degrees. + interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ - + M_PI); + } } void NonlinearBeamformer::InitDelaySumMasks() { for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { delay_sum_masks_[f_ix].Resize(1, num_input_channels_); - CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, - kFftSize, - sample_rate_hz_, - kSpeedOfSoundMeterSeconds, - array_geometry_, - kTargetAngleRadians, - &delay_sum_masks_[f_ix]); + CovarianceMatrixGenerator::PhaseAlignmentMasks( + f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, + array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); complex_f norm_factor = sqrt( ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); @@ -309,15 +319,19 @@ void NonlinearBeamformer::InitTargetCovMats() { } } +void NonlinearBeamformer::InitDiffuseCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); + CovarianceMatrixGenerator::UniformCovarianceMatrix( + wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); + complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; + uniform_cov_mat_[i].Scale(1.f / normalization_factor); + uniform_cov_mat_[i].Scale(1 - kBalance); + } +} + void NonlinearBeamformer::InitInterfCovMats() { for (size_t i = 0; i < kNumFreqBins; ++i) { - ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); - CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], - array_geometry_, - &uniform_cov_mat); - complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; - uniform_cov_mat.Scale(1.f / normalization_factor); - uniform_cov_mat.Scale(1 - kBalance); interf_cov_mats_[i].clear(); for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, @@ -333,11 +347,21 @@ void NonlinearBeamformer::InitInterfCovMats() { array_geometry_, &angled_cov_mat); // Normalize matrices before averaging them. - normalization_factor = angled_cov_mat.elements()[0][0]; + complex_f normalization_factor = angled_cov_mat.elements()[0][0]; angled_cov_mat.Scale(1.f / normalization_factor); // Weighted average of matrices. angled_cov_mat.Scale(kBalance); - interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); + interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); + } + } +} + +void NonlinearBeamformer::NormalizeCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); + rpsiws_[i].clear(); + for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { + rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); } } } @@ -354,28 +378,32 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer& input, const float ramp_increment = (high_pass_postfilter_mask_ - old_high_pass_mask) / input.num_frames_per_band(); - // Apply delay and sum and post-filter in the time domain. WARNING: only works - // because delay-and-sum is not frequency dependent. + // Apply the smoothed high-pass mask to the first channel of each band. + // This can be done because the effct of the linear beamformer is negligible + // compared to the post-filter. for (size_t i = 1; i < input.num_bands(); ++i) { float smoothed_mask = old_high_pass_mask; for (size_t j = 0; j < input.num_frames_per_band(); ++j) { smoothed_mask += ramp_increment; - - // Applying the delay and sum (at zero degrees, this is equivalent to - // averaging). - float sum = 0.f; - for (int k = 0; k < input.num_channels(); ++k) { - sum += input.channels(i)[k][j]; - } - output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; + output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; } } } +void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { + target_angle_radians_ = target_direction.azimuth(); + InitHighFrequencyCorrectionRanges(); + InitInterfAngles(); + InitDelaySumMasks(); + InitTargetCovMats(); + InitInterfCovMats(); + NormalizeCovMats(); +} + bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { // If more than half-beamwidth degrees away from the beam's center, // you are out of the beam. - return fabs(spherical_point.azimuth() - kTargetAngleRadians) < + return fabs(spherical_point.azimuth() - target_angle_radians_) < kHalfBeamWidthRadians; } diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h index 62d5d69168..565c1f349f 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h @@ -11,6 +11,10 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include #include #include "webrtc/common_audio/lapped_transform.h" @@ -31,7 +35,12 @@ class NonlinearBeamformer : public Beamformer, public LappedTransform::Callback { public: - explicit NonlinearBeamformer(const std::vector& array_geometry); + static const float kHalfBeamWidthRadians; + + explicit NonlinearBeamformer( + const std::vector& array_geometry, + SphericalPointf target_direction = + SphericalPointf(static_cast(M_PI) / 2.f, 0.f, 1.f)); // Sample rate corresponds to the lower band. // Needs to be called before the NonlinearBeamformer can be used. @@ -44,6 +53,8 @@ class NonlinearBeamformer void ProcessChunk(const ChannelBuffer& input, ChannelBuffer* output) override; + void AimAt(const SphericalPointf& target_direction) override; + bool IsInBeam(const SphericalPointf& spherical_point) override; // After processing each block |is_target_present_| is set to true if the @@ -62,15 +73,21 @@ class NonlinearBeamformer complex* const* output) override; private: + FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest, + InterfAnglesTakeAmbiguityIntoAccount); + typedef Matrix MatrixF; typedef ComplexMatrix ComplexMatrixF; typedef complex complex_f; - void InitFrequencyCorrectionRanges(); + void InitLowFrequencyCorrectionRanges(); + void InitHighFrequencyCorrectionRanges(); void InitInterfAngles(); void InitDelaySumMasks(); void InitTargetCovMats(); + void InitDiffuseCovMats(); void InitInterfCovMats(); + void NormalizeCovMats(); // Calculates postfilter masks that minimize the mean squared error of our // estimation of the desired signal. @@ -116,6 +133,8 @@ class NonlinearBeamformer int sample_rate_hz_; const std::vector array_geometry_; + // The normal direction of the array if it has one and it is in the xy-plane. + const rtc::Maybe array_normal_; // Minimum spacing between microphone pairs. const float min_mic_spacing_; @@ -133,17 +152,20 @@ class NonlinearBeamformer // Time and frequency smoothed mask. float final_mask_[kNumFreqBins]; + float target_angle_radians_; // Angles of the interferer scenarios. std::vector interf_angles_radians_; + // The angle between the target and the interferer scenarios. + const float away_radians_; // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. ComplexMatrixF delay_sum_masks_[kNumFreqBins]; ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; - // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x + // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // |num_input_channels_|. ComplexMatrixF target_cov_mats_[kNumFreqBins]; - + ComplexMatrixF uniform_cov_mat_[kNumFreqBins]; // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // |num_input_channels_|. ScopedVector has a size equal to the number of // interferer scenarios. diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc new file mode 100644 index 0000000000..a38a49b1e1 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace webrtc { +namespace { + +const int kChunkSizeMs = 10; +const int kSampleRateHz = 16000; + +SphericalPointf AzimuthToSphericalPoint(float azimuth_radians) { + return SphericalPointf(azimuth_radians, 0.f, 1.f); +} + +void Verify(NonlinearBeamformer* bf, float target_azimuth_radians) { + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint(target_azimuth_radians))); + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians + + 0.001f))); + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians - + 0.001f))); + EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians - + 0.001f))); + EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians + + 0.001f))); +} + +void AimAndVerify(NonlinearBeamformer* bf, float target_azimuth_radians) { + bf->AimAt(AzimuthToSphericalPoint(target_azimuth_radians)); + Verify(bf, target_azimuth_radians); +} + +} // namespace + +TEST(NonlinearBeamformerTest, AimingModifiesBeam) { + std::vector array_geometry; + array_geometry.push_back(Point(-0.025f, 0.f, 0.f)); + array_geometry.push_back(Point(0.025f, 0.f, 0.f)); + NonlinearBeamformer bf(array_geometry); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + // The default constructor parameter sets the target angle to PI / 2. + Verify(&bf, static_cast(M_PI) / 2.f); + AimAndVerify(&bf, static_cast(M_PI) / 3.f); + AimAndVerify(&bf, 3.f * static_cast(M_PI) / 4.f); + AimAndVerify(&bf, static_cast(M_PI) / 6.f); + AimAndVerify(&bf, static_cast(M_PI)); +} + +TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { + { + // For linear arrays there is ambiguity. + std::vector array_geometry; + array_geometry.push_back(Point(-0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + NonlinearBeamformer bf(array_geometry); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For planar arrays with normal in the xy-plane there is ambiguity. + std::vector array_geometry; + array_geometry.push_back(Point(-0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + array_geometry.push_back(Point(0.1f, 0.f, 0.2f)); + array_geometry.push_back(Point(0.f, 0.f, -0.1f)); + NonlinearBeamformer bf(array_geometry); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For planar arrays with normal not in the xy-plane there is no ambiguity. + std::vector array_geometry; + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.1f, -0.2f)); + NonlinearBeamformer bf(array_geometry); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For arrays which are not linear or planar there is no ambiguity. + std::vector array_geometry; + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.2f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.3f)); + NonlinearBeamformer bf(array_geometry); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 318b2f8953..c8ddc6a483 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -11,6 +11,10 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include #include // size_t #include // FILE #include @@ -109,12 +113,23 @@ struct ExperimentalNs { struct Beamforming { Beamforming() : enabled(false), - array_geometry() {} + array_geometry(), + target_direction( + SphericalPointf(static_cast(M_PI) / 2.f, 0.f, 1.f)) {} Beamforming(bool enabled, const std::vector& array_geometry) + : Beamforming(enabled, + array_geometry, + SphericalPointf(static_cast(M_PI) / 2.f, 0.f, 1.f)) { + } + Beamforming(bool enabled, + const std::vector& array_geometry, + SphericalPointf target_direction) : enabled(enabled), - array_geometry(array_geometry) {} + array_geometry(array_geometry), + target_direction(target_direction) {} const bool enabled; const std::vector array_geometry; + const SphericalPointf target_direction; }; // Use to enable intelligibility enhancer in audio processing. Must be provided diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index 88d636e825..811e9070fa 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -37,6 +37,7 @@ DEFINE_string(mic_positions, "", "Space delimited cartesian coordinates of microphones in meters. " "The coordinates of each point are contiguous. " "For a two element array: \"x1 y1 z1 x2 y2 z2\""); +DEFINE_double(target_angle_degrees, 90, "The azimuth of the target in radians"); DEFINE_bool(aec, false, "Enable echo cancellation."); DEFINE_bool(agc, false, "Enable automatic gain control."); @@ -107,7 +108,10 @@ int main(int argc, char* argv[]) { ParseArrayGeometry(FLAGS_mic_positions, num_mics); RTC_CHECK_EQ(array_geometry.size(), num_mics); - config.Set(new Beamforming(true, array_geometry)); + config.Set(new Beamforming( + true, array_geometry, + SphericalPointf(DegreesToRadians(FLAGS_target_angle_degrees), 0.f, + 1.f))); } rtc::scoped_ptr ap(AudioProcessing::Create(config)); diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 2bf0ac8079..f3ac454c19 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -171,6 +171,7 @@ 'audio_processing/beamformer/covariance_matrix_generator_unittest.cc', 'audio_processing/beamformer/matrix_unittest.cc', 'audio_processing/beamformer/mock_nonlinear_beamformer.h', + 'audio_processing/beamformer/nonlinear_beamformer_unittest.cc', 'audio_processing/echo_cancellation_impl_unittest.cc', 'audio_processing/intelligibility/intelligibility_enhancer_unittest.cc', 'audio_processing/intelligibility/intelligibility_utils_unittest.cc',