This CL finalizes the Conversational Speech tool.

The following changes have been made:
- command line args wired,
- user output added,
- final polishing.

BUG=webrtc:7218

Review-Url: https://codereview.webrtc.org/2808053002
Cr-Commit-Position: refs/heads/master@{#18609}
This commit is contained in:
alessiob 2017-06-15 03:49:57 -07:00 committed by Commit Bot
parent b2152b7b61
commit 19e087fc91
6 changed files with 57 additions and 44 deletions

View File

@ -69,5 +69,8 @@ rtc_source_set("unittest") {
"../../../../../webrtc/test:test_support",
"//testing/gmock",
"//testing/gtest",
"//webrtc:webrtc_common",
"//webrtc/base:rtc_base_approved",
"//webrtc/test:test_support",
]
}

View File

@ -17,9 +17,7 @@ For instance, echo cancellation in the APM module can be evaluated using two-end
audio tracks as input and reverse input.
By indicating negative and positive time offsets, one can reproduce cross-talk
and silence in the conversation.
IMPORTANT: **the whole code has not been landed yet.**
(aka double-talk) and silence in the conversation.
### Example

View File

@ -11,8 +11,12 @@
#include <iostream>
#include "gflags/gflags.h"
#include "webrtc/base/logging.h"
#include "webrtc/base/ptr_util.h"
#include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
#include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
#include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h"
#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
#include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
@ -48,14 +52,31 @@ DEFINE_validator(o, dir_exists);
int main(int argc, char* argv[]) {
google::SetUsageMessage(kUsageDescription);
google::ParseCommandLineFlags(&argc, &argv, true);
conversational_speech::Config config(FLAGS_i, FLAGS_t, FLAGS_o);
// TODO(alessiob): remove line below once debugged.
rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
LOG(LS_VERBOSE) << "i = " << config.audiotracks_path();
LOG(LS_VERBOSE) << "t = " << config.timing_filepath();
LOG(LS_VERBOSE) << "o = " << config.output_path();
// Load timing.
std::vector<conversational_speech::Turn> timing =
conversational_speech::LoadTiming(config.timing_filepath());
// Parse timing and audio tracks.
auto wavreader_factory = rtc::MakeUnique<
conversational_speech::WavReaderFactory>();
conversational_speech::MultiEndCall multiend_call(
timing, config.audiotracks_path(), std::move(wavreader_factory));
// Generate output audio tracks.
auto generated_audiotrack_pairs = conversational_speech::Simulate(
multiend_call, config.output_path());
// Show paths to created audio tracks.
std::cout << "Output files:" << std::endl;
for (const auto& output_paths_entry : *generated_audiotrack_pairs) {
std::cout << " speaker: " << output_paths_entry.first << std::endl;
std::cout << " near end: " << output_paths_entry.second.near_end
<< std::endl;
std::cout << " far end: " << output_paths_entry.second.far_end
<< std::endl;
}
return 0;
}

View File

@ -12,7 +12,7 @@
// members. Part of them focus on accepting or rejecting different
// conversational speech setups. A setup is defined by a set of audio tracks and
// timing information).
// The docstring at the beginning of each TEST_F(ConversationalSpeechTest,
// The docstring at the beginning of each TEST(ConversationalSpeechTest,
// MultiEndCallSetup*) function looks like the drawing below and indicates which
// setup is tested.
//
@ -174,16 +174,7 @@ void DeleteFolderAndContents(const std::string& dir) {
using testing::_;
// TODO(alessiob): Remove fixture once conversational_speech fully implemented
// and replace TEST_F with TEST.
class ConversationalSpeechTest : public testing::Test {
public:
ConversationalSpeechTest() {
rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
}
};
TEST_F(ConversationalSpeechTest, Settings) {
TEST(ConversationalSpeechTest, Settings) {
const conversational_speech::Config config(
audiotracks_path, timing_filepath, output_path);
@ -193,7 +184,7 @@ TEST_F(ConversationalSpeechTest, Settings) {
EXPECT_EQ(output_path, config.output_path());
}
TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
TEST(ConversationalSpeechTest, TimingSaveLoad) {
// Save test timing.
const std::string temporary_filepath = TempFilename(
OutputPath(), "TempTimingTestFile");
@ -213,7 +204,7 @@ TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
}
}
TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
TEST(ConversationalSpeechTest, MultiEndCallCreate) {
auto mock_wavreader_factory = CreateMockWavReaderFactory();
// There are 5 unique audio tracks to read.
@ -230,7 +221,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
EXPECT_EQ(6u, multiend_call.speaking_turns().size());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
const std::vector<Turn> timing = {
{"A", "sr8000", 0},
{"B", "sr16000", 0},
@ -245,7 +236,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
const std::vector<Turn> timing = {
{"A", "sr16000_stereo", 0},
{"B", "sr16000_stereo", 0},
@ -260,7 +251,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest,
TEST(ConversationalSpeechTest,
MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
const std::vector<Turn> timing = {
{"A", "sr8000", 0},
@ -276,7 +267,7 @@ TEST_F(ConversationalSpeechTest,
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
const std::vector<Turn> timing = {
{"A", "t500", -100},
{"B", "t500", 0},
@ -291,7 +282,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) {
// Accept:
// A 0****.....
// B .....1****
@ -316,7 +307,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
TEST(ConversationalSpeechTest, MultiEndCallSetupPause) {
// Accept:
// A 0****.......
// B .......1****
@ -341,7 +332,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
// Accept:
// A 0****....
// B ....1****
@ -366,7 +357,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
// Reject:
// A ..0****
// B .1****. The n-th turn cannot start before the (n-1)-th one.
@ -384,7 +375,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
// Accept:
// A 0****2****...
// B ...1*********
@ -410,7 +401,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
// Reject:
// A 0****......
// A ...1****...
@ -431,7 +422,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
// Reject:
// A 0*********
// B 1**.......
@ -454,7 +445,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
// Accept:
// A 0*********..
// B ..1****.....
@ -481,7 +472,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
// Reject:
// A 0*********
// B ..1****...
@ -503,7 +494,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
// Accept:
// A 0*********..
// B .2****......
@ -530,7 +521,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
// Accept:
// A 0****
// B 1****
@ -553,7 +544,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
EXPECT_EQ(2u, multiend_call.speaking_turns().size());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
// Accept:
// A 0****....3****.5**.
// B .....1****...4**...
@ -586,7 +577,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
}
TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
// Reject:
// A 0****....3****.6**
// B .....1****...4**..
@ -614,7 +605,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
EXPECT_FALSE(multiend_call.valid());
}
TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
// Parameters with which wav files are created.
constexpr int duration_seconds = 5;
const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000};
@ -641,7 +632,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
}
}
TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
// Simulated call (one character corresponding to 500 ms):
// A 0*********...........2*********.....
// B ...........1*********.....3*********

View File

@ -28,7 +28,7 @@ class MockWavReader : public WavReaderInterface {
MockWavReader(int sample_rate, size_t num_channels, size_t num_samples);
~MockWavReader();
// TODO(alessiob): use ON_CALL to return random samples.
// TODO(alessiob): use ON_CALL to return random samples if needed.
MOCK_METHOD1(ReadFloatSamples, size_t(rtc::ArrayView<float>));
MOCK_METHOD1(ReadInt16Samples, size_t(rtc::ArrayView<int16_t>));

View File

@ -53,7 +53,7 @@ std::vector<Turn> LoadTiming(const std::string& timing_filepath) {
void SaveTiming(const std::string& timing_filepath,
rtc::ArrayView<const Turn> timing) {
std::ofstream outfile(timing_filepath);
// TODO(alessio): check if file open for writing.
RTC_CHECK(outfile.is_open());
for (const Turn& turn : timing) {
outfile << turn.speaker_name << " " << turn.audiotrack_file_name
<< " " << turn.offset << std::endl;