From de770dd7e9c7a22c4ebb002f0fb6336e2ed83175 Mon Sep 17 00:00:00 2001 From: flim Date: Fri, 6 Jan 2017 09:49:47 -0800 Subject: [PATCH] Remove AudioClassifier BUG=webrtc:5676 Review-Url: https://codereview.webrtc.org/2615983002 Cr-Commit-Position: refs/heads/master@{#15933} --- webrtc/modules/BUILD.gn | 1 - webrtc/modules/audio_coding/BUILD.gn | 25 ----- .../audio_coding/neteq/audio_classifier.cc | 75 ------------- .../audio_coding/neteq/audio_classifier.h | 58 ---------- .../neteq/audio_classifier_unittest.cc | 82 -------------- .../audio_coding/neteq/include/neteq.h | 2 - webrtc/modules/audio_coding/neteq/neteq.cc | 3 +- .../neteq/test/audio_classifier_test.cc | 104 ------------------ 8 files changed, 1 insertion(+), 349 deletions(-) delete mode 100644 webrtc/modules/audio_coding/neteq/audio_classifier.cc delete mode 100644 webrtc/modules/audio_coding/neteq/audio_classifier.h delete mode 100644 webrtc/modules/audio_coding/neteq/audio_classifier_unittest.cc delete mode 100644 webrtc/modules/audio_coding/neteq/test/audio_classifier_test.cc diff --git a/webrtc/modules/BUILD.gn b/webrtc/modules/BUILD.gn index cfe12845bf..bbb5744422 100644 --- a/webrtc/modules/BUILD.gn +++ b/webrtc/modules/BUILD.gn @@ -307,7 +307,6 @@ if (rtc_include_tests) { "audio_coding/codecs/opus/audio_encoder_opus_unittest.cc", "audio_coding/codecs/opus/opus_unittest.cc", "audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc", - "audio_coding/neteq/audio_classifier_unittest.cc", "audio_coding/neteq/audio_multi_vector_unittest.cc", "audio_coding/neteq/audio_vector_unittest.cc", "audio_coding/neteq/background_noise_unittest.cc", diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn index c44d022994..b78ba7936a 100644 --- a/webrtc/modules/audio_coding/BUILD.gn +++ b/webrtc/modules/audio_coding/BUILD.gn @@ -936,14 +936,6 @@ rtc_static_library("audio_network_adaptor") { } } -config("neteq_config") { - include_dirs = [ - # Need Opus header files for the audio classifier. - "//third_party/opus/src/celt", - "//third_party/opus/src/src", - ] -} - rtc_static_library("neteq") { # TODO(kjellander): Remove (bugs.webrtc.org/6828) # Cyclic dependency with :audio_coding if enabled. @@ -952,8 +944,6 @@ rtc_static_library("neteq") { sources = [ "neteq/accelerate.cc", "neteq/accelerate.h", - "neteq/audio_classifier.cc", - "neteq/audio_classifier.h", "neteq/audio_decoder_impl.cc", "neteq/audio_decoder_impl.h", "neteq/audio_multi_vector.cc", @@ -1025,8 +1015,6 @@ rtc_static_library("neteq") { "neteq/timestamp_scaler.h", ] - public_configs = [ ":neteq_config" ] - deps = [ ":audio_decoder_factory_interface", ":audio_decoder_interface", @@ -1109,7 +1097,6 @@ if (rtc_include_tests) { ":RTPtimeshift", ":acm_receive_test", ":acm_send_test", - ":audio_classifier_test", ":audio_codec_speed_tests", ":audio_decoder_unittests", ":audio_decoder_unittests", @@ -1717,18 +1704,6 @@ if (rtc_include_tests) { ] } - rtc_executable("audio_classifier_test") { - testonly = true - sources = [ - "neteq/test/audio_classifier_test.cc", - ] - deps = [ - ":neteq", - ":webrtc_opus", - "../../system_wrappers:system_wrappers_default", - ] - } - rtc_executable("neteq_ilbc_quality_test") { testonly = true diff --git a/webrtc/modules/audio_coding/neteq/audio_classifier.cc b/webrtc/modules/audio_coding/neteq/audio_classifier.cc deleted file mode 100644 index 4a8c6fb974..0000000000 --- a/webrtc/modules/audio_coding/neteq/audio_classifier.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_coding/neteq/audio_classifier.h" - -#include -#include - -namespace webrtc { - -static const int kDefaultSampleRateHz = 48000; -static const int kDefaultFrameRateHz = 50; -static const int kDefaultFrameSizeSamples = - kDefaultSampleRateHz / kDefaultFrameRateHz; -static const float kDefaultThreshold = 0.5f; - -AudioClassifier::AudioClassifier() - : analysis_info_(), - is_music_(false), - music_probability_(0), - // This actually assigns the pointer to a static constant struct - // rather than creates a struct and |celt_mode_| does not need - // to be deleted. - celt_mode_(opus_custom_mode_create(kDefaultSampleRateHz, - kDefaultFrameSizeSamples, - NULL)), - analysis_state_() { - assert(celt_mode_); -} - -AudioClassifier::~AudioClassifier() {} - -bool AudioClassifier::Analysis(const int16_t* input, - int input_length, - int channels) { - // Must be 20 ms frames at 48 kHz sampling. - assert((input_length / channels) == kDefaultFrameSizeSamples); - - // Only mono or stereo are allowed. - assert(channels == 1 || channels == 2); - - // Call Opus' classifier, defined in - // "third_party/opus/src/src/analysis.h", with lsb_depth = 16. - // Also uses a down-mixing function downmix_int, defined in - // "third_party/opus/src/src/opus_private.h", with - // constants c1 = 0, and c2 = -2. - run_analysis(&analysis_state_, - celt_mode_, - input, - kDefaultFrameSizeSamples, - kDefaultFrameSizeSamples, - 0, - -2, - channels, - kDefaultSampleRateHz, - 16, - downmix_int, - &analysis_info_); - music_probability_ = analysis_info_.music_prob; - is_music_ = music_probability_ > kDefaultThreshold; - return is_music_; -} - -bool AudioClassifier::is_music() const { - return is_music_; -} - -} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/audio_classifier.h b/webrtc/modules/audio_coding/neteq/audio_classifier.h deleted file mode 100644 index c94bd02a00..0000000000 --- a/webrtc/modules/audio_coding/neteq/audio_classifier.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_ -#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_ - -#include "webrtc/base/ignore_wundef.h" - -extern "C" { -RTC_PUSH_IGNORING_WUNDEF() -#include "celt.h" -RTC_POP_IGNORING_WUNDEF() -#include "analysis.h" -#include "opus_private.h" -} - -#include "webrtc/typedefs.h" - -namespace webrtc { - -// This class provides a speech/music classification and is a wrapper over the -// Opus classifier. It currently only supports 48 kHz mono or stereo with a -// frame size of 20 ms. - -class AudioClassifier { - public: - AudioClassifier(); - virtual ~AudioClassifier(); - - // Classifies one frame of audio data in input, - // input_length : must be channels * 960; - // channels : must be 1 (mono) or 2 (stereo). - bool Analysis(const int16_t* input, int input_length, int channels); - - // Gets the current classification : true = music, false = speech. - virtual bool is_music() const; - - // Gets the current music probability. - float music_probability() const { return music_probability_; } - - private: - AnalysisInfo analysis_info_; - bool is_music_; - float music_probability_; - const CELTMode* celt_mode_; - TonalityAnalysisState analysis_state_; -}; - -} // namespace webrtc - -#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_ diff --git a/webrtc/modules/audio_coding/neteq/audio_classifier_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_classifier_unittest.cc deleted file mode 100644 index 09c569966b..0000000000 --- a/webrtc/modules/audio_coding/neteq/audio_classifier_unittest.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_coding/neteq/audio_classifier.h" - -#include -#include -#include -#include -#include -#include - -#include "webrtc/test/gtest.h" -#include "webrtc/test/testsupport/fileutils.h" - -namespace webrtc { - -static const size_t kFrameSize = 960; - -TEST(AudioClassifierTest, AllZeroInput) { - int16_t in_mono[kFrameSize] = {0}; - - // Test all-zero vectors and let the classifier converge from its default - // to the expected value. - AudioClassifier zero_classifier; - for (int i = 0; i < 100; ++i) { - zero_classifier.Analysis(in_mono, kFrameSize, 1); - } - EXPECT_TRUE(zero_classifier.is_music()); -} - -void RunAnalysisTest(const std::string& audio_filename, - const std::string& data_filename, - size_t channels) { - AudioClassifier classifier; - std::unique_ptr in(new int16_t[channels * kFrameSize]); - bool is_music_ref; - - FILE* audio_file = fopen(audio_filename.c_str(), "rb"); - ASSERT_TRUE(audio_file != NULL) << "Failed to open file " << audio_filename - << std::endl; - FILE* data_file = fopen(data_filename.c_str(), "rb"); - ASSERT_TRUE(audio_file != NULL) << "Failed to open file " << audio_filename - << std::endl; - while (fread(in.get(), sizeof(int16_t), channels * kFrameSize, audio_file) == - channels * kFrameSize) { - bool is_music = - classifier.Analysis(in.get(), channels * kFrameSize, channels); - EXPECT_EQ(is_music, classifier.is_music()); - ASSERT_EQ(1u, fread(&is_music_ref, sizeof(is_music_ref), 1, data_file)); - EXPECT_EQ(is_music_ref, is_music); - } - fclose(audio_file); - fclose(data_file); -} - -TEST(AudioClassifierTest, DoAnalysisMono) { -#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) - RunAnalysisTest(test::ResourcePath("short_mixed_mono_48", "pcm"), - test::ResourcePath("short_mixed_mono_48_arm", "dat"), - 1); -#else - RunAnalysisTest(test::ResourcePath("short_mixed_mono_48", "pcm"), - test::ResourcePath("short_mixed_mono_48", "dat"), - 1); -#endif // WEBRTC_ARCH_ARM -} - -TEST(AudioClassifierTest, DoAnalysisStereo) { - RunAnalysisTest(test::ResourcePath("short_mixed_stereo_48", "pcm"), - test::ResourcePath("short_mixed_stereo_48", "dat"), - 2); -} - -} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/include/neteq.h b/webrtc/modules/audio_coding/neteq/include/neteq.h index 5b17e128d8..ea05940598 100644 --- a/webrtc/modules/audio_coding/neteq/include/neteq.h +++ b/webrtc/modules/audio_coding/neteq/include/neteq.h @@ -76,7 +76,6 @@ class NetEq { struct Config { Config() : sample_rate_hz(16000), - enable_audio_classifier(false), enable_post_decode_vad(false), max_packets_in_buffer(50), // |max_delay_ms| has the same effect as calling SetMaximumDelay(). @@ -88,7 +87,6 @@ class NetEq { std::string ToString() const; int sample_rate_hz; // Initial value. Will change with input data. - bool enable_audio_classifier; bool enable_post_decode_vad; size_t max_packets_in_buffer; int max_delay_ms; diff --git a/webrtc/modules/audio_coding/neteq/neteq.cc b/webrtc/modules/audio_coding/neteq/neteq.cc index e3b9f8be92..73233c42e4 100644 --- a/webrtc/modules/audio_coding/neteq/neteq.cc +++ b/webrtc/modules/audio_coding/neteq/neteq.cc @@ -19,8 +19,7 @@ namespace webrtc { std::string NetEq::Config::ToString() const { std::stringstream ss; - ss << "sample_rate_hz=" << sample_rate_hz << ", enable_audio_classifier=" - << (enable_audio_classifier ? "true" : "false") + ss << "sample_rate_hz=" << sample_rate_hz << ", enable_post_decode_vad=" << (enable_post_decode_vad ? "true" : "false") << ", max_packets_in_buffer=" << max_packets_in_buffer diff --git a/webrtc/modules/audio_coding/neteq/test/audio_classifier_test.cc b/webrtc/modules/audio_coding/neteq/test/audio_classifier_test.cc deleted file mode 100644 index 22de05ad8c..0000000000 --- a/webrtc/modules/audio_coding/neteq/test/audio_classifier_test.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_coding/neteq/audio_classifier.h" - -#include -#include -#include -#include - -#include -#include -#include - -int main(int argc, char* argv[]) { - if (argc != 5) { - std::cout << "Usage: " << argv[0] << - " channels output_type " - << std::endl << std::endl; - std::cout << "Where channels can be 1 (mono) or 2 (interleaved stereo),"; - std::cout << " outputs can be 1 (classification (boolean)) or 2"; - std::cout << " (classification and music probability (float))," - << std::endl; - std::cout << "and the sampling frequency is assumed to be 48 kHz." - << std::endl; - return -1; - } - - const int kFrameSizeSamples = 960; - int channels = atoi(argv[1]); - if (channels < 1 || channels > 2) { - std::cout << "Disallowed number of channels " << channels << std::endl; - return -1; - } - - int outputs = atoi(argv[2]); - if (outputs < 1 || outputs > 2) { - std::cout << "Disallowed number of outputs " << outputs << std::endl; - return -1; - } - - const int data_size = channels * kFrameSizeSamples; - std::unique_ptr in(new int16_t[data_size]); - - std::string input_filename = argv[3]; - std::string output_filename = argv[4]; - - std::cout << "Input file: " << input_filename << std::endl; - std::cout << "Output file: " << output_filename << std::endl; - - FILE* in_file = fopen(input_filename.c_str(), "rb"); - if (!in_file) { - std::cout << "Cannot open input file " << input_filename << std::endl; - return -1; - } - - FILE* out_file = fopen(output_filename.c_str(), "wb"); - if (!out_file) { - std::cout << "Cannot open output file " << output_filename << std::endl; - return -1; - } - - webrtc::AudioClassifier classifier; - int frame_counter = 0; - int music_counter = 0; - while (fread(in.get(), sizeof(*in.get()), - data_size, in_file) == (size_t) data_size) { - bool is_music = classifier.Analysis(in.get(), data_size, channels); - if (!fwrite(&is_music, sizeof(is_music), 1, out_file)) { - std::cout << "Error writing." << std::endl; - return -1; - } - if (is_music) { - music_counter++; - } - std::cout << "frame " << frame_counter << " decision " << is_music; - if (outputs == 2) { - float music_prob = classifier.music_probability(); - if (!fwrite(&music_prob, sizeof(music_prob), 1, out_file)) { - std::cout << "Error writing." << std::endl; - return -1; - } - std::cout << " music prob " << music_prob; - } - std::cout << std::endl; - frame_counter++; - } - std::cout << frame_counter << " frames processed." << std::endl; - if (frame_counter > 0) { - float music_percentage = music_counter / static_cast(frame_counter); - std::cout << music_percentage << " percent music." << std::endl; - } - - fclose(in_file); - fclose(out_file); - return 0; -}