From 19e087fc91453b761b6b56ba84e7c9c5fafd9e8e Mon Sep 17 00:00:00 2001
From: alessiob <alessiob@webrtc.org>
Date: Thu, 15 Jun 2017 03:49:57 -0700
Subject: [PATCH] This CL finalizes the Conversational Speech tool. The
 following changes have been made: - command line args wired, - user output
 added, - final polishing.

BUG=webrtc:7218

Review-Url: https://codereview.webrtc.org/2808053002
Cr-Commit-Position: refs/heads/master@{#18609}
---
 .../test/conversational_speech/BUILD.gn       |  3 +
 .../test/conversational_speech/README.md      |  4 +-
 .../test/conversational_speech/generator.cc   | 35 +++++++++---
 .../generator_unittest.cc                     | 55 ++++++++-----------
 .../conversational_speech/mock_wavreader.h    |  2 +-
 .../test/conversational_speech/timing.cc      |  2 +-
 6 files changed, 57 insertions(+), 44 deletions(-)
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
index df12fc1527..af24f8ab5e 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
+++ b/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
@@ -69,5 +69,8 @@ rtc_source_set("unittest") {
     "../../../../../webrtc/test:test_support",
     "//testing/gmock",
     "//testing/gtest",
+    "//webrtc:webrtc_common",
+    "//webrtc/base:rtc_base_approved",
+    "//webrtc/test:test_support",
   ]
 }
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/README.md b/webrtc/modules/audio_processing/test/conversational_speech/README.md
index 415c65b027..bbb4112fc0 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/README.md
+++ b/webrtc/modules/audio_processing/test/conversational_speech/README.md
@@ -17,9 +17,7 @@ For instance, echo cancellation in the APM module can be evaluated using two-end
 audio tracks as input and reverse input.
 
 By indicating negative and positive time offsets, one can reproduce cross-talk
-and silence in the conversation.
-
-IMPORTANT: **the whole code has not been landed yet.**
+(aka double-talk) and silence in the conversation.
 
 ### Example
 
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/generator.cc b/webrtc/modules/audio_processing/test/conversational_speech/generator.cc
index 923736ffef..57996c14a4 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/generator.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/generator.cc
@@ -11,8 +11,12 @@
 #include <iostream>
 
 #include "gflags/gflags.h"
-#include "webrtc/base/logging.h"
+#include "webrtc/base/ptr_util.h"
 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
+#include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
+#include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h"
+#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
+#include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h"
 #include "webrtc/test/testsupport/fileutils.h"
 
 namespace webrtc {
@@ -48,14 +52,31 @@ DEFINE_validator(o, dir_exists);
 int main(int argc, char* argv[]) {
   google::SetUsageMessage(kUsageDescription);
   google::ParseCommandLineFlags(&argc, &argv, true);
-
   conversational_speech::Config config(FLAGS_i, FLAGS_t, FLAGS_o);
 
-  // TODO(alessiob): remove line below once debugged.
-  rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
-  LOG(LS_VERBOSE) << "i = " << config.audiotracks_path();
-  LOG(LS_VERBOSE) << "t = " << config.timing_filepath();
-  LOG(LS_VERBOSE) << "o = " << config.output_path();
+  // Load timing.
+  std::vector<conversational_speech::Turn> timing =
+      conversational_speech::LoadTiming(config.timing_filepath());
+
+  // Parse timing and audio tracks.
+  auto wavreader_factory = rtc::MakeUnique<
+      conversational_speech::WavReaderFactory>();
+  conversational_speech::MultiEndCall multiend_call(
+      timing, config.audiotracks_path(), std::move(wavreader_factory));
+
+  // Generate output audio tracks.
+  auto generated_audiotrack_pairs = conversational_speech::Simulate(
+      multiend_call, config.output_path());
+
+  // Show paths to created audio tracks.
+  std::cout << "Output files:" << std::endl;
+  for (const auto& output_paths_entry : *generated_audiotrack_pairs) {
+    std::cout << "  speaker: " << output_paths_entry.first << std::endl;
+    std::cout << "    near end: " << output_paths_entry.second.near_end
+        << std::endl;
+    std::cout << "    far end: " << output_paths_entry.second.far_end
+        << std::endl;
+  }
 
   return 0;
 }
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
index b112e8ff25..c38fc435c5 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
@@ -12,7 +12,7 @@
 // members. Part of them focus on accepting or rejecting different
 // conversational speech setups. A setup is defined by a set of audio tracks and
 // timing information).
-// The docstring at the beginning of each TEST_F(ConversationalSpeechTest,
+// The docstring at the beginning of each TEST(ConversationalSpeechTest,
 // MultiEndCallSetup*) function looks like the drawing below and indicates which
 // setup is tested.
 //
@@ -174,16 +174,7 @@ void DeleteFolderAndContents(const std::string& dir) {
 
 using testing::_;
 
-// TODO(alessiob): Remove fixture once conversational_speech fully implemented
-// and replace TEST_F with TEST.
-class ConversationalSpeechTest : public testing::Test {
- public:
-  ConversationalSpeechTest() {
-    rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
-  }
-};
-
-TEST_F(ConversationalSpeechTest, Settings) {
+TEST(ConversationalSpeechTest, Settings) {
   const conversational_speech::Config config(
       audiotracks_path, timing_filepath, output_path);
 
@@ -193,7 +184,7 @@ TEST_F(ConversationalSpeechTest, Settings) {
   EXPECT_EQ(output_path, config.output_path());
 }
 
-TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
+TEST(ConversationalSpeechTest, TimingSaveLoad) {
   // Save test timing.
   const std::string temporary_filepath = TempFilename(
       OutputPath(), "TempTimingTestFile");
@@ -213,7 +204,7 @@ TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
   }
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
+TEST(ConversationalSpeechTest, MultiEndCallCreate) {
   auto mock_wavreader_factory = CreateMockWavReaderFactory();
 
   // There are 5 unique audio tracks to read.
@@ -230,7 +221,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
   EXPECT_EQ(6u, multiend_call.speaking_turns().size());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
   const std::vector<Turn> timing = {
       {"A", "sr8000", 0},
       {"B", "sr16000", 0},
@@ -245,7 +236,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
   const std::vector<Turn> timing = {
       {"A", "sr16000_stereo", 0},
       {"B", "sr16000_stereo", 0},
@@ -260,7 +251,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest,
+TEST(ConversationalSpeechTest,
        MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
   const std::vector<Turn> timing = {
       {"A", "sr8000", 0},
@@ -276,7 +267,7 @@ TEST_F(ConversationalSpeechTest,
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
   const std::vector<Turn> timing = {
       {"A", "t500", -100},
       {"B", "t500", 0},
@@ -291,7 +282,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) {
   // Accept:
   // A 0****.....
   // B .....1****
@@ -316,7 +307,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupPause) {
   // Accept:
   // A 0****.......
   // B .......1****
@@ -341,7 +332,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
   // Accept:
   // A 0****....
   // B ....1****
@@ -366,7 +357,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
   // Reject:
   // A ..0****
   // B .1****.  The n-th turn cannot start before the (n-1)-th one.
@@ -384,7 +375,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
   // Accept:
   // A 0****2****...
   // B ...1*********
@@ -410,7 +401,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
   // Reject:
   // A 0****......
   // A ...1****...
@@ -431,7 +422,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
   // Reject:
   // A 0*********
   // B 1**.......
@@ -454,7 +445,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
   // Accept:
   // A 0*********..
   // B ..1****.....
@@ -481,7 +472,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
   // Reject:
   // A 0*********
   // B ..1****...
@@ -503,7 +494,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
   // Accept:
   // A 0*********..
   // B .2****......
@@ -530,7 +521,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
   // Accept:
   // A 0****
   // B 1****
@@ -553,7 +544,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
   EXPECT_EQ(2u, multiend_call.speaking_turns().size());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
   // Accept:
   // A 0****....3****.5**.
   // B .....1****...4**...
@@ -586,7 +577,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
   EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
+TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
   // Reject:
   // A 0****....3****.6**
   // B .....1****...4**..
@@ -614,7 +605,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
   EXPECT_FALSE(multiend_call.valid());
 }
 
-TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
+TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
   // Parameters with which wav files are created.
   constexpr int duration_seconds = 5;
   const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000};
@@ -641,7 +632,7 @@ TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
   }
 }
 
-TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
+TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
   // Simulated call (one character corresponding to 500 ms):
   // A 0*********...........2*********.....
   // B ...........1*********.....3*********
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
index 030e163e80..98dfaa27e8 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
+++ b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
@@ -28,7 +28,7 @@ class MockWavReader : public WavReaderInterface {
   MockWavReader(int sample_rate, size_t num_channels, size_t num_samples);
   ~MockWavReader();
 
-  // TODO(alessiob): use ON_CALL to return random samples.
+  // TODO(alessiob): use ON_CALL to return random samples if needed.
   MOCK_METHOD1(ReadFloatSamples, size_t(rtc::ArrayView<float>));
   MOCK_METHOD1(ReadInt16Samples, size_t(rtc::ArrayView<int16_t>));
 
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/timing.cc b/webrtc/modules/audio_processing/test/conversational_speech/timing.cc
index 0aa44fa42c..53076f1cac 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/timing.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/timing.cc
@@ -53,7 +53,7 @@ std::vector<Turn> LoadTiming(const std::string& timing_filepath) {
 void SaveTiming(const std::string& timing_filepath,
                 rtc::ArrayView<const Turn> timing) {
   std::ofstream outfile(timing_filepath);
-  // TODO(alessio): check if file open for writing.
+  RTC_CHECK(outfile.is_open());
   for (const Turn& turn : timing) {
     outfile << turn.speaker_name << " " << turn.audiotrack_file_name
         << " " << turn.offset << std::endl;