diff --git a/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
index ca5977fae8..10601fed3c 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
+++ b/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn
@@ -6,7 +6,7 @@
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.
 
-import("//webrtc/webrtc.gni")
+import("../../../../../webrtc/webrtc.gni")
 
 group("conversational_speech") {
   testonly = true
@@ -22,9 +22,9 @@ rtc_executable("conversational_speech_generator") {
   ]
   deps = [
     ":lib",
+    "../../../../../webrtc/base:rtc_base_approved",
+    "../../../../../webrtc/test:test_support",
     "//third_party/gflags",
-    "//webrtc/base:rtc_base_approved",
-    "//webrtc/test:test_support",
   ]
 }
 
@@ -45,9 +45,9 @@ rtc_static_library("lib") {
     "wavreader_interface.h",
   ]
   deps = [
-    "//webrtc:webrtc_common",
-    "//webrtc/base:rtc_base_approved",
-    "//webrtc/common_audio",
+    "../../../../../webrtc:webrtc_common",
+    "../../../../../webrtc/base:rtc_base_approved",
+    "../../../../../webrtc/common_audio",
   ]
   visibility = [ ":*" ]  # Only targets in this file can depend on this.
 }
@@ -56,15 +56,17 @@ rtc_source_set("unittest") {
   testonly = true
   sources = [
     "generator_unittest.cc",
+    "mock_wavreader.cc",
     "mock_wavreader.h",
     "mock_wavreader_factory.cc",
     "mock_wavreader_factory.h",
   ]
   deps = [
     ":lib",
+    "../../../../../webrtc:webrtc_common",
+    "../../../../../webrtc/base:rtc_base_approved",
+    "../../../../../webrtc/test:test_support",
     "//testing/gmock",
     "//testing/gtest",
-    "//webrtc:webrtc_common",
-    "//webrtc/test:test_support",
   ]
 }
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
index 59454d9d47..406d95cf21 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc
@@ -8,9 +8,36 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+// This file consists of unit tests for webrtc::test::conversational_speech
+// members. Part of them focus on accepting or rejecting different
+// conversational speech setups. A setup is defined by a set of audio tracks and
+// timing information).
+// The docstring at the beginning of each TEST_F(ConversationalSpeechTest,
+// MultiEndCallSetup*) function looks like the drawing below and indicates which
+// setup is tested.
+//
+//    Accept:
+//    A 0****.....
+//    B .....1****
+//
+// The drawing indicates the following:
+// - the illustrated setup should be accepted,
+// - there are two speakers (namely, A and B),
+// - A is the first speaking, B is the second one,
+// - each character after the speaker's letter indicates a time unit (e.g., 100
+//   ms),
+// - "*" indicates speaking, "." listening,
+// - numbers indicate the turn index in std::vector<Turn>.
+//
+// Note that the same speaker can appear in multiple lines in order to depict
+// cases in which there are wrong offsets leading to self cross-talk (which is
+// rejected).
+
 #include <stdio.h>
+#include <map>
 #include <memory>
 
+#include "webrtc/base/logging.h"
 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h"
 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
@@ -44,9 +71,38 @@ const std::vector<Turn> expected_timing = {
 };
 const std::size_t kNumberOfTurns = expected_timing.size();
 
+// Default arguments for MockWavReaderFactory ctor.
+// Fake audio track parameters.
+constexpr int kDefaultSampleRate = 48000;
+const std::map<std::string, const MockWavReaderFactory::Params>
+    kDefaultMockWavReaderFactoryParamsMap = {
+  {"t300", {kDefaultSampleRate, 1u, 14400u}},  // 0.3 seconds.
+  {"t500", {kDefaultSampleRate, 1u, 24000u}},  // 0.5 seconds.
+  {"t1000", {kDefaultSampleRate, 1u, 48000u}},  // 1.0 seconds.
+};
+const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
+    kDefaultMockWavReaderFactoryParamsMap.at("t500");
+
+std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
+  return std::unique_ptr<MockWavReaderFactory>(
+      new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+                               kDefaultMockWavReaderFactoryParamsMap));
+}
+
 }  // namespace
 
-TEST(ConversationalSpeechTest, Settings) {
+using testing::_;
+
+// TODO(alessiob): Remove fixture once conversational_speech fully implemented
+// and replace TEST_F with TEST.
+class ConversationalSpeechTest : public testing::Test {
+ public:
+  ConversationalSpeechTest() {
+    rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
+  }
+};
+
+TEST_F(ConversationalSpeechTest, Settings) {
   const conversational_speech::Config config(
       audiotracks_path, timing_filepath, output_path);
 
@@ -56,7 +112,7 @@ TEST(ConversationalSpeechTest, Settings) {
   EXPECT_EQ(output_path, config.output_path());
 }
 
-TEST(ConversationalSpeechTest, TimingSaveLoad) {
+TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
   // Save test timing.
   const std::string temporary_filepath = webrtc::test::TempFilename(
       webrtc::test::OutputPath(), "TempTimingTestFile");
@@ -76,20 +132,359 @@ TEST(ConversationalSpeechTest, TimingSaveLoad) {
   }
 }
 
-TEST(ConversationalSpeechTest, MultiEndCallCreate) {
-  auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
-      new MockWavReaderFactory());
+TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
 
   // There are 5 unique audio tracks to read.
-  EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5);
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5);
 
   // Inject the mock wav reader factory.
   conversational_speech::MultiEndCall multiend_call(
       expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
 
   // Test.
   EXPECT_EQ(2u, multiend_call.speaker_names().size());
   EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(6u, multiend_call.speaking_turns().size());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
+  const std::vector<Turn> timing = {
+      {"A", "t500", -100},
+      {"B", "t500", 0},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
+  // Accept:
+  // A 0****.....
+  // B .....1****
+  constexpr std::size_t expected_duration = kDefaultSampleRate;
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", 0},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(2u, multiend_call.speaker_names().size());
+  EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
+  // Accept:
+  // A 0****.......
+  // B .......1****
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", 200},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(2u, multiend_call.speaker_names().size());
+  EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
+  // Accept:
+  // A 0****....
+  // B ....1****
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9;
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", -100},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(2u, multiend_call.speaker_names().size());
+  EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
+  // Reject:
+  // A ..0****
+  // B .1****.  The n-th turn cannot start before the (n-1)-th one.
+  const std::vector<Turn> timing = {
+      {"A", "t500", 200},
+      {"B", "t500", -600},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
+  // Accept:
+  // A 0****2****...
+  // B ...1*********
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3;
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t1000", -200},
+      {"A", "t500", -800},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(2u, multiend_call.speaker_names().size());
+  EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
+  // Reject:
+  // A 0****......
+  // A ...1****...
+  // B ......2****
+  //      ^  Turn #1 overlaps with #0 which is from the same speaker.
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"A", "t500", -200},
+      {"B", "t500", -200},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
+  // Reject:
+  // A 0*********
+  // B 1**.......
+  // C ...2**....
+  // A ......3**.
+  //         ^  Turn #3 overlaps with #0 which is from the same speaker.
+  const std::vector<Turn> timing = {
+      {"A", "t1000", 0},
+      {"B", "t300", -1000},
+      {"C", "t300", 0},
+      {"A", "t300", 0},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
+  // Accept:
+  // A 0*********..
+  // B ..1****.....
+  // C .......2****
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+  const std::vector<Turn> timing = {
+      {"A", "t1000", 0},
+      {"B", "t500", -800},
+      {"C", "t500", 0},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(3u, multiend_call.speaker_names().size());
+  EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
+  // Reject:
+  // A 0*********
+  // B ..1****...
+  // C ....2****.
+  //       ^  Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
+  //          not permitted).
+  const std::vector<Turn> timing = {
+      {"A", "t1000", 0},
+      {"B", "t500", -800},
+      {"C", "t500", -300},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
+  // Accept:
+  // A 0*********..
+  // B .2****......
+  // C .......3****
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
+  const std::vector<Turn> timing = {
+      {"A", "t1000", 0},
+      {"B", "t500", -900},
+      {"C", "t500", 100},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(3u, multiend_call.speaker_names().size());
+  EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(3u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
+  // Accept:
+  // A 0****
+  // B 1****
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", -500},
+  };
+  auto mock_wavreader_factory = CreateMockWavReaderFactory();
+
+  // There is one unique audio track to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(2u, multiend_call.speaker_names().size());
+  EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(2u, multiend_call.speaking_turns().size());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
+  // Accept:
+  // A 0****....3****.5**.
+  // B .....1****...4**...
+  // C ......2**.......6**..
+  constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9;
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", 0},
+      {"C", "t300", -400},
+      {"A", "t500", 0},
+      {"B", "t300", -100},
+      {"A", "t300", -100},
+      {"C", "t300", -200},
+  };
+  auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
+      new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+                               kDefaultMockWavReaderFactoryParamsMap));
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_TRUE(multiend_call.valid());
+
+  // Test.
+  EXPECT_EQ(3u, multiend_call.speaker_names().size());
+  EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
+  EXPECT_EQ(7u, multiend_call.speaking_turns().size());
+  EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
+}
+
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
+  // Reject:
+  // A 0****....3****.6**
+  // B .....1****...4**..
+  // C ......2**.....5**..
+  //                 ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
+  //                   speakers not permitted).
+  const std::vector<Turn> timing = {
+      {"A", "t500", 0},
+      {"B", "t500", 0},
+      {"C", "t300", -400},
+      {"A", "t500", 0},
+      {"B", "t300", -100},
+      {"A", "t300", -200},
+      {"C", "t300", -200},
+  };
+  auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
+      new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
+                               kDefaultMockWavReaderFactoryParamsMap));
+
+  // There are two unique audio tracks to read.
+  EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
+
+  conversational_speech::MultiEndCall multiend_call(
+      timing, audiotracks_path, std::move(mock_wavreader_factory));
+  EXPECT_FALSE(multiend_call.valid());
 }
 
 }  // namespace test
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc
new file mode 100644
index 0000000000..7d2f2b663e
--- /dev/null
+++ b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h"
+
+namespace webrtc {
+namespace test {
+namespace conversational_speech {
+
+using testing::Return;
+
+MockWavReader::MockWavReader(
+    int sample_rate, size_t num_channels, size_t num_samples)
+      : sample_rate_(sample_rate), num_channels_(num_channels),
+          num_samples_(num_samples) {
+  ON_CALL(*this, sample_rate()).WillByDefault(Return(sample_rate_));
+  ON_CALL(*this, num_channels()).WillByDefault(Return(num_channels_));
+  ON_CALL(*this, num_samples()).WillByDefault(Return(num_samples_));
+}
+
+MockWavReader::~MockWavReader() = default;
+
+}  // namespace conversational_speech
+}  // namespace test
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
index 83aa9382e5..d71e6f896b 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
+++ b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h
@@ -24,17 +24,13 @@ namespace conversational_speech {
 
 class MockWavReader : public WavReaderInterface {
  public:
-  MockWavReader(
-      int sample_rate, size_t num_channels, size_t num_samples)
-          : sample_rate_(sample_rate), num_channels_(num_channels),
-            num_samples_(num_samples) {}
-  ~MockWavReader() = default;
+  MockWavReader(int sample_rate, size_t num_channels, size_t num_samples);
+  ~MockWavReader();
 
-  // TOOD(alessiob): use ON_CALL to return random samples.
+  // TODO(alessiob): use ON_CALL to return random samples.
   MOCK_METHOD2(ReadFloatSamples, size_t(size_t, float*));
   MOCK_METHOD2(ReadInt16Samples, size_t(size_t, int16_t*));
 
-  // TOOD(alessiob): use ON_CALL to return properties.
   MOCK_CONST_METHOD0(sample_rate, int());
   MOCK_CONST_METHOD0(num_channels, size_t());
   MOCK_CONST_METHOD0(num_samples, size_t());
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc
index 1097639527..2dd21dadf1 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc
@@ -10,14 +10,60 @@
 
 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h"
 
+#include "webrtc/base/logging.h"
+#include "webrtc/base/pathutils.h"
+#include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h"
+#include "webrtc/test/gmock.h"
+
 namespace webrtc {
 namespace test {
 namespace conversational_speech {
 
-MockWavReaderFactory::MockWavReaderFactory() = default;
+using testing::_;
+using testing::Invoke;
+
+MockWavReaderFactory::MockWavReaderFactory(
+    const Params& default_params,
+    const std::map<std::string, const Params>& params)
+        : default_params_(default_params),
+          audiotrack_names_params_(params) {
+  ON_CALL(*this, Create(_)).WillByDefault(Invoke(
+      this, &MockWavReaderFactory::CreateMock));
+}
+
+MockWavReaderFactory::MockWavReaderFactory(const Params& default_params)
+    : MockWavReaderFactory(default_params,
+                           std::map<std::string, const Params>{}) {}
 
 MockWavReaderFactory::~MockWavReaderFactory() = default;
 
+std::unique_ptr<WavReaderInterface> MockWavReaderFactory::CreateMock(
+    const std::string& filepath) {
+  // Search the parameters corresponding to filepath.
+  const rtc::Pathname audiotrack_file_path(filepath);
+  const auto it = audiotrack_names_params_.find(
+      audiotrack_file_path.filename());
+
+  // If not found, use default parameters.
+  if (it == audiotrack_names_params_.end()) {
+    LOG(LS_VERBOSE) << "using default parameters for " << filepath;
+    return std::unique_ptr<WavReaderInterface>(
+        new MockWavReader(default_params_.sample_rate,
+                          default_params_.num_channels,
+                          default_params_.num_samples));
+  }
+
+  // Found, use the audiotrack-specific parameters.
+  LOG(LS_VERBOSE) << "using ad-hoc parameters for " << filepath;
+  LOG(LS_VERBOSE) << "sample_rate " << it->second.sample_rate;
+  LOG(LS_VERBOSE) << "num_channels " << it->second.num_channels;
+  LOG(LS_VERBOSE) << "num_samples " << it->second.num_samples;
+  return std::unique_ptr<WavReaderInterface>(
+      new MockWavReader(it->second.sample_rate,
+                        it->second.num_channels,
+                        it->second.num_samples));
+}
+
 }  // namespace conversational_speech
 }  // namespace test
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h
index 3686d12dd7..d22856e80c 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h
+++ b/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h
@@ -11,6 +11,7 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_
 
+#include <map>
 #include <memory>
 #include <string>
 
@@ -24,15 +25,28 @@ namespace conversational_speech {
 
 class MockWavReaderFactory : public WavReaderAbstractFactory {
  public:
-  MockWavReaderFactory();
-  // TODO(alessiob): add ctor that gets map string->(sr, #samples, #channels).
+  struct Params{
+    int sample_rate;
+    size_t num_channels;
+    size_t num_samples;
+  };
+
+  MockWavReaderFactory(const Params& default_params,
+                       const std::map<std::string, const Params>& params);
+  explicit MockWavReaderFactory(const Params& default_params);
   ~MockWavReaderFactory();
 
-  // TODO(alessiob): use ON_CALL to return MockWavReader with desired params.
   MOCK_CONST_METHOD1(Create, std::unique_ptr<WavReaderInterface>(
       const std::string&));
 
-  // TODO(alessiob): add const ref to map (see ctor to add).
+ private:
+  // Creates a MockWavReader instance using the parameters in
+  // audiotrack_names_params_ if the entry corresponding to filepath exists,
+  // otherwise creates a MockWavReader instance using the default parameters.
+  std::unique_ptr<WavReaderInterface> CreateMock(const std::string& filepath);
+
+  const Params& default_params_;
+  std::map<std::string, const Params> audiotrack_names_params_;
 };
 
 }  // namespace conversational_speech
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
index f16aa753fa..ad1d9a0c87 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
@@ -10,8 +10,10 @@
 
 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
 
-#include <utility>
+#include <algorithm>
+#include <iterator>
 
+#include "webrtc/base/logging.h"
 #include "webrtc/base/pathutils.h"
 
 namespace webrtc {
@@ -25,7 +27,7 @@ MultiEndCall::MultiEndCall(
           wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
   FindSpeakerNames();
   CreateAudioTrackReaders();
-  CheckTiming();
+  valid_ = CheckTiming();
 }
 
 MultiEndCall::~MultiEndCall() = default;
@@ -39,10 +41,23 @@ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
   return audiotrack_readers_;
 }
 
+bool MultiEndCall::valid() const {
+  return valid_;
+}
+
+size_t MultiEndCall::total_duration_samples() const {
+  return total_duration_samples_;
+}
+
+const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()
+    const {
+  return speaking_turns_;
+}
+
 void MultiEndCall::FindSpeakerNames() {
   RTC_DCHECK(speaker_names_.empty());
   for (const Turn& turn : timing_) {
-    speaker_names_.insert(turn.speaker_name);
+    speaker_names_.emplace(turn.speaker_name);
   }
 }
 
@@ -60,14 +75,119 @@ void MultiEndCall::CreateAudioTrackReaders() {
     // Map the audiotrack file name to a new instance of WavReaderInterface.
     std::unique_ptr<WavReaderInterface> wavreader =
         wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());
-    audiotrack_readers_.insert(std::make_pair(
-        turn.audiotrack_file_name, std::move(wavreader)));
+    audiotrack_readers_.emplace(
+        turn.audiotrack_file_name, std::move(wavreader));
   }
 }
 
-void MultiEndCall::CheckTiming() {
-  // TODO(alessiob): use audiotrack lengths and offset to check whether the
-  // timing is valid.
+bool MultiEndCall::CheckTiming() {
+  struct Interval {
+    size_t begin;
+    size_t end;
+  };
+  size_t number_of_turns = timing_.size();
+  auto millisecond_to_samples = [](int ms, int sr) -> int {
+    // Truncation may happen if the sampling rate is not an integer multiple
+    // of 1000 (e.g., 44100).
+    return ms * sr / 1000;
+  };
+  auto in_interval = [](size_t value, const Interval& interval) {
+    return interval.begin <= value && value < interval.end;
+  };
+  total_duration_samples_ = 0;
+  speaking_turns_.clear();
+
+  // Begin and end timestamps for the last two turns (unit: number of samples).
+  Interval second_last_turn = {0, 0};
+  Interval last_turn = {0, 0};
+
+  // Initialize map to store speaking turn indices of each speaker (used to
+  // detect self cross-talk).
+  std::map<std::string, std::vector<size_t>> speaking_turn_indices;
+  for (const std::string& speaker_name : speaker_names_) {
+    speaking_turn_indices.emplace(
+        std::piecewise_construct,
+        std::forward_as_tuple(speaker_name),
+        std::forward_as_tuple());
+  }
+
+  // Parse turns.
+  for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
+    const Turn& turn = timing_[turn_index];
+    auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
+    RTC_CHECK(it != audiotrack_readers_.end())
+        << "Audio track reader not created";
+
+    // Begin and end timestamps for the current turn.
+    int offset_samples = millisecond_to_samples(
+        turn.offset, it->second->sample_rate());
+    size_t begin_timestamp = last_turn.end + offset_samples;
+    size_t end_timestamp = begin_timestamp + it->second->num_samples();
+    LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
+        << "-" << end_timestamp << " ms";
+
+    // The order is invalid if the offset is negative and its absolute value is
+    // larger then the duration of the previous turn.
+    if (offset_samples < 0 && -offset_samples > static_cast<int>(
+        last_turn.end - last_turn.begin)) {
+      LOG(LS_ERROR) << "invalid order";
+      return false;
+    }
+
+    // Cross-talk with 3 or more speakers occurs when the beginning of the
+    // current interval falls in the last two turns.
+    if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
+        && in_interval(begin_timestamp, second_last_turn)) {
+      LOG(LS_ERROR) << "cross-talk with 3+ speakers";
+      return false;
+    }
+
+    // Append turn.
+    speaking_turns_.emplace_back(
+        turn.speaker_name, turn.audiotrack_file_name,
+        begin_timestamp, end_timestamp);
+
+    // Save speaking turn index for self cross-talk detection.
+    RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);
+    speaking_turn_indices[turn.speaker_name].push_back(turn_index);
+
+    // Update total duration of the consversational speech.
+    if (total_duration_samples_ < end_timestamp)
+      total_duration_samples_ = end_timestamp;
+
+    // Update and continue with next turn.
+    second_last_turn = last_turn;
+    last_turn.begin = begin_timestamp;
+    last_turn.end = end_timestamp;
+  }
+
+  // Detect self cross-talk.
+  for (const std::string& speaker_name : speaker_names_) {
+    LOG(LS_INFO) << "checking self cross-talk for <"
+        << speaker_name << ">";
+
+    // Copy all turns for this speaker to new vector.
+    std::vector<SpeakingTurn> speaking_turns_for_name;
+    std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),
+                 std::back_inserter(speaking_turns_for_name),
+                 [&speaker_name](const SpeakingTurn& st){
+                   return st.speaker_name == speaker_name; });
+
+    // Check for overlap between adjacent elements.
+    // This is a sufficient condition for self cross-talk since the intervals
+    // are sorted by begin timestamp.
+    auto overlap = std::adjacent_find(
+        speaking_turns_for_name.begin(), speaking_turns_for_name.end(),
+        [](const SpeakingTurn& a, const SpeakingTurn& b) {
+            return a.end > b.begin; });
+
+    if (overlap != speaking_turns_for_name.end()) {
+      LOG(LS_ERROR) << "Self cross-talk detected";
+      return false;
+    }
+  }
+
+  return true;
 }
 
 }  // namespace conversational_speech
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h
index 234cb2799e..dd03a07e7a 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h
+++ b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h
@@ -11,10 +11,13 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_
 
+#include <stddef.h>
 #include <map>
 #include <memory>
 #include <set>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include "webrtc/base/array_view.h"
 #include "webrtc/base/constructormagic.h"
@@ -28,6 +31,20 @@ namespace conversational_speech {
 
 class MultiEndCall {
  public:
+  struct SpeakingTurn {
+    // Constructor required in order to use std::vector::emplace_back().
+    SpeakingTurn(std::string new_speaker_name,
+                 std::string new_audiotrack_file_name,
+                 size_t new_begin, size_t new_end)
+        : speaker_name(std::move(new_speaker_name)),
+          audiotrack_file_name(std::move(new_audiotrack_file_name)),
+          begin(new_begin), end(new_end) {}
+    std::string speaker_name;
+    std::string audiotrack_file_name;
+    size_t begin;
+    size_t end;
+  };
+
   MultiEndCall(
       rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,
       std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory);
@@ -36,16 +53,20 @@ class MultiEndCall {
   const std::set<std::string>& speaker_names() const;
   const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
       audiotrack_readers() const;
+  bool valid() const;
+  size_t total_duration_samples() const;
+  const std::vector<SpeakingTurn>& speaking_turns() const;
 
  private:
-  // Find unique speaker names.
+  // Finds unique speaker names.
   void FindSpeakerNames();
 
-  // Create one WavReader instance for each unique audiotrack.
+  // Creates one WavReader instance for each unique audiotrack.
   void CreateAudioTrackReaders();
 
-  // Check the speaking turns timing.
-  void CheckTiming();
+  // Validates the speaking turns timing information. Accepts cross-talk, but
+  // only up to 2 speakers. Rejects unordered turns and self cross-talk.
+  bool CheckTiming();
 
   rtc::ArrayView<const Turn> timing_;
   const std::string& audiotracks_path_;
@@ -53,6 +74,9 @@ class MultiEndCall {
   std::set<std::string> speaker_names_;
   std::map<std::string, std::unique_ptr<WavReaderInterface>>
       audiotrack_readers_;
+  bool valid_;
+  size_t total_duration_samples_;
+  std::vector<SpeakingTurn> speaking_turns_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(MultiEndCall);
 };