From 849030dab8bc40f5ba82734c8cfc7e0fec8d397c Mon Sep 17 00:00:00 2001
From: Alessio Bazzica <alessiob@webrtc.org>
Date: Wed, 18 Oct 2017 15:24:57 +0200
Subject: [PATCH] Optionally copy clean speech input files under _cache with
 APM-QA.

TBR=

Bug: webrtc:7494
Change-Id: I41c5cfc6fd57aefaf246816c0ba4094947b9e767
Reviewed-on: https://webrtc-review.googlesource.com/13123
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20343}
---
 .../apm_quality_assessment.py                 |  8 ++-
 .../quality_assessment/export_unittest.py     |  3 +-
 .../quality_assessment/simulation_unittest.py |  9 ++--
 .../test_data_generation.py                   | 17 +++++-
 .../test_data_generation_factory.py           | 16 +++++-
 .../test_data_generation_unittest.py          | 52 ++++++++++++++++++-
 6 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py
index cdb43d4276..78ff5e93e6 100755
--- a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py
+++ b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py
@@ -108,6 +108,11 @@ def _InstanceArgumentsParser():
                               AudioProcWrapper.  \
                               DEFAULT_APM_SIMULATOR_BIN_PATH)
 
+  parser.add_argument('--copy_with_identity_generator', required=False,
+                      help=('If true, the identity test data generator makes a '
+                            'copy of the clean speech input file.'),
+                      default=False)
+
   return parser
 
 
@@ -135,7 +140,8 @@ def main():
       test_data_generator_factory=(
           test_data_generation_factory.TestDataGeneratorFactory(
               aechen_ir_database_path=args.air_db_path,
-              noise_tracks_path=args.additive_noise_tracks_path)),
+              noise_tracks_path=args.additive_noise_tracks_path,
+              copy_with_identity=args.copy_with_identity_generator)),
       evaluation_score_factory=eval_scores_factory.EvaluationScoreWorkerFactory(
           polqa_tool_bin_path=os.path.join(args.polqa_path, _POLQA_BIN_NAME)),
       ap_wrapper=audioproc_wrapper.AudioProcWrapper(args.apm_sim_path),
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py
index 0eab6cb57e..85ecccd115 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py
@@ -41,7 +41,8 @@ class TestEchoPathSimulators(unittest.TestCase):
         test_data_generator_factory=(
             test_data_generation_factory.TestDataGeneratorFactory(
                 aechen_ir_database_path='',
-                noise_tracks_path='')),
+                noise_tracks_path='',
+                copy_with_identity=False)),
         evaluation_score_factory=(
           eval_scores_factory.EvaluationScoreWorkerFactory(
               polqa_tool_bin_path=os.path.join(
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py
index 265ff5442a..cf9aac8da9 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py
@@ -64,7 +64,8 @@ class TestApmModuleSimulator(unittest.TestCase):
     test_data_generator_factory = (
         test_data_generation_factory.TestDataGeneratorFactory(
             aechen_ir_database_path='',
-            noise_tracks_path=''))
+            noise_tracks_path='',
+            copy_with_identity=False))
     evaluation_score_factory = eval_scores_factory.EvaluationScoreWorkerFactory(
         polqa_tool_bin_path=os.path.join(
             os.path.dirname(__file__), 'fake_polqa'))
@@ -108,7 +109,8 @@ class TestApmModuleSimulator(unittest.TestCase):
         test_data_generator_factory=(
             test_data_generation_factory.TestDataGeneratorFactory(
                 aechen_ir_database_path='',
-                noise_tracks_path='')),
+                noise_tracks_path='',
+                copy_with_identity=False)),
         evaluation_score_factory=(
             eval_scores_factory.EvaluationScoreWorkerFactory(
                 polqa_tool_bin_path=os.path.join(
@@ -143,7 +145,8 @@ class TestApmModuleSimulator(unittest.TestCase):
         test_data_generator_factory=(
             test_data_generation_factory.TestDataGeneratorFactory(
                 aechen_ir_database_path='',
-                noise_tracks_path='')),
+                noise_tracks_path='',
+                copy_with_identity=False)),
         evaluation_score_factory=(
             eval_scores_factory.EvaluationScoreWorkerFactory(
                 polqa_tool_bin_path=os.path.join(
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
index 3f3c17237b..dac4328588 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py
@@ -23,6 +23,7 @@ obtained by convolving the input signal with an impulse response.
 
 import logging
 import os
+import shutil
 import sys
 
 try:
@@ -182,13 +183,27 @@ class IdentityTestDataGenerator(TestDataGenerator):
 
   NAME = 'identity'
 
-  def __init__(self, output_directory_prefix):
+  def __init__(self, output_directory_prefix, copy_with_identity):
     TestDataGenerator.__init__(self, output_directory_prefix)
+    self._copy_with_identity = copy_with_identity
+
+  @property
+  def copy_with_identity(self):
+    return self._copy_with_identity
 
   def _Generate(
       self, input_signal_filepath, test_data_cache_path, base_output_path):
     config_name = 'default'
     output_path = self._MakeDir(base_output_path, config_name)
+
+    if self._copy_with_identity:
+      input_signal_filepath_new = os.path.join(
+          test_data_cache_path, os.path.split(input_signal_filepath)[1])
+      logging.info('copying ' + input_signal_filepath + ' to ' + (
+          input_signal_filepath_new))
+      shutil.copy(input_signal_filepath, input_signal_filepath_new)
+      input_signal_filepath = input_signal_filepath_new
+
     self._AddNoiseReferenceFilesPair(
         config_name=config_name,
         noisy_signal_filepath=input_signal_filepath,
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py
index fd7f3f7c0c..c80d150228 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py
@@ -22,10 +22,20 @@ class TestDataGeneratorFactory(object):
   generators will be produced.
   """
 
-  def __init__(self, aechen_ir_database_path, noise_tracks_path):
+  def __init__(self, aechen_ir_database_path, noise_tracks_path,
+               copy_with_identity):
+    """Ctor.
+
+    Args:
+      aechen_ir_database_path: Path to the Aechen Impulse Response database.
+      noise_tracks_path: Path to the noise tracks to add.
+      copy_with_identity: Flag indicating whether the identity generator has to
+                          make copies of the clean speech input files.
+    """
     self._output_directory_prefix = None
     self._aechen_ir_database_path = aechen_ir_database_path
     self._noise_tracks_path = noise_tracks_path
+    self._copy_with_identity = copy_with_identity
 
   def SetOutputDirectoryPrefix(self, prefix):
     self._output_directory_prefix = prefix
@@ -46,6 +56,10 @@ class TestDataGeneratorFactory(object):
     logging.debug('factory producing %s', test_data_generators_class)
 
     if test_data_generators_class == (
+        test_data_generation.IdentityTestDataGenerator):
+      return test_data_generation.IdentityTestDataGenerator(
+          self._output_directory_prefix, self._copy_with_identity)
+    elif test_data_generators_class == (
         test_data_generation.ReverberationTestDataGenerator):
       return test_data_generation.ReverberationTestDataGenerator(
           self._output_directory_prefix, self._aechen_ir_database_path)
diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py
index 3e59cf9ae9..b0d003dbe8 100644
--- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py
+++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py
@@ -68,10 +68,11 @@ class TestTestDataGenerators(unittest.TestCase):
         aechen_ir_database_path=self._fake_air_db_path,
         noise_tracks_path=test_data_generation.  \
                           AdditiveNoiseTestDataGenerator.  \
-                          DEFAULT_NOISE_TRACKS_PATH)
+                          DEFAULT_NOISE_TRACKS_PATH,
+        copy_with_identity=False)
     generators_factory.SetOutputDirectoryPrefix('datagen-')
 
-    # Use a sample input file as clean input signal.
+    # Use a simple input file as clean input signal.
     input_signal_filepath = os.path.join(
         os.getcwd(), 'probing_signals', 'tone-880.wav')
     self.assertTrue(os.path.exists(input_signal_filepath))
@@ -97,6 +98,53 @@ class TestTestDataGenerators(unittest.TestCase):
       self._CheckGeneratedPairsSignalDurations(generator, input_signal)
       self._CheckGeneratedPairsOutputPaths(generator)
 
+  def testTestidentityDataGenerator(self):
+    # Preliminary check.
+    self.assertTrue(os.path.exists(self._base_output_path))
+    self.assertTrue(os.path.exists(self._test_data_cache_path))
+
+    # Use a simple input file as clean input signal.
+    input_signal_filepath = os.path.join(
+        os.getcwd(), 'probing_signals', 'tone-880.wav')
+    self.assertTrue(os.path.exists(input_signal_filepath))
+
+    def GetNoiseReferenceFilePaths(identity_generator):
+      noisy_signal_filepaths = identity_generator.noisy_signal_filepaths
+      reference_signal_filepaths = identity_generator.reference_signal_filepaths
+      assert noisy_signal_filepaths.keys() == reference_signal_filepaths.keys()
+      assert len(noisy_signal_filepaths.keys()) == 1
+      key = noisy_signal_filepaths.keys()[0]
+      return noisy_signal_filepaths[key], reference_signal_filepaths[key]
+
+    # Test the |copy_with_identity| flag.
+    for copy_with_identity in [False, True]:
+      # Instance the generator through the factory.
+      factory = test_data_generation_factory.TestDataGeneratorFactory(
+        aechen_ir_database_path='', noise_tracks_path='',
+        copy_with_identity=copy_with_identity)
+      factory.SetOutputDirectoryPrefix('datagen-')
+      generator = factory.GetInstance(
+          test_data_generation.IdentityTestDataGenerator)
+      # Check |copy_with_identity| is set correctly.
+      self.assertEqual(copy_with_identity, generator.copy_with_identity)
+
+      # Generate test data and extract the paths to the noise and the reference
+      # files.
+      generator.Generate(
+          input_signal_filepath=input_signal_filepath,
+          test_data_cache_path=self._test_data_cache_path,
+          base_output_path=self._base_output_path)
+      noisy_signal_filepath, reference_signal_filepath = (
+          GetNoiseReferenceFilePaths(generator))
+
+      # Check that a copy is made if and only if |copy_with_identity| is True.
+      if copy_with_identity:
+        self.assertNotEqual(noisy_signal_filepath, input_signal_filepath)
+        self.assertNotEqual(reference_signal_filepath, input_signal_filepath)
+      else:
+        self.assertEqual(noisy_signal_filepath, input_signal_filepath)
+        self.assertEqual(reference_signal_filepath, input_signal_filepath)
+
   def _CheckGeneratedPairsListSizes(self, generator):
     config_names = generator.config_names
     number_of_pairs = len(config_names)