From 2bdeb226d587853a4422c8dfe65d3ca9756d169d Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Tue, 17 Oct 2017 15:59:24 +0200 Subject: [PATCH] APM-QA clean speech annotations. Extract and save some simple annotations for the clean speech input. The annotations are estimated level, VAD (assuming clean speech) and speech level. TBR= Bug: webrtc:7494 Change-Id: Id73358e228fac721a77fc8a61a3474a5d52bdc84 Reviewed-on: https://webrtc-review.googlesource.com/12321 Commit-Queue: Alessio Bazzica Reviewed-by: Alessio Bazzica Cr-Commit-Position: refs/heads/master@{#20327} --- .../test/py_quality_assessment/BUILD.gn | 2 + .../quality_assessment/annotations.py | 119 ++++++++++++++++++ .../annotations_unittest.py | 67 ++++++++++ .../quality_assessment/signal_processing.py | 4 +- .../quality_assessment/simulation.py | 62 ++++++++- .../quality_assessment/simulation_unittest.py | 51 ++++++++ .../test_data_generation.py | 35 ------ .../test_data_generation_unittest.py | 19 --- 8 files changed, 299 insertions(+), 60 deletions(-) create mode 100644 modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py create mode 100644 modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py diff --git a/modules/audio_processing/test/py_quality_assessment/BUILD.gn b/modules/audio_processing/test/py_quality_assessment/BUILD.gn index c5b7bec5d7..dfae858276 100644 --- a/modules/audio_processing/test/py_quality_assessment/BUILD.gn +++ b/modules/audio_processing/test/py_quality_assessment/BUILD.gn @@ -54,6 +54,7 @@ copy("lib") { testonly = true sources = [ "quality_assessment/__init__.py", + "quality_assessment/annotations.py", "quality_assessment/audioproc_wrapper.py", "quality_assessment/collect_data.py", "quality_assessment/data_access.py", @@ -120,6 +121,7 @@ rtc_executable("fake_polqa") { copy("lib_unit_tests") { testonly = true sources = [ + "quality_assessment/annotations_unittest.py", "quality_assessment/echo_path_simulation_unittest.py", "quality_assessment/eval_scores_unittest.py", "quality_assessment/input_mixer_unittest.py", diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py new file mode 100644 index 0000000000..81f6af435e --- /dev/null +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py @@ -0,0 +1,119 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Extraction of annotations from audio files. +""" + +from __future__ import division +import logging +import os +import sys + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import signal_processing + + +class AudioAnnotationsExtractor(object): + """Extracts annotations from audio files. + """ + + _LEVEL_FILENAME = 'level.npy' + _VAD_FILENAME = 'vad.npy' + _SPEECH_LEVEL_FILENAME = 'speech_level.npy' + + # Level estimation params. The time constants in ms indicate the time it takes + # for the level estimate to go down/up by 1 db if the signal is zero. + _LEVEL_ATTACK_MS = 5.0 + _LEVEL_DECAY_MS = 20.0 + _ONE_DB_REDUCTION = np.power(10.0, -1.0 / 20.0) + + # VAD params. + _VAD_THRESHOLD = 1 + + def __init__(self): + self._signal = None + self._level = None + self._vad = None + self._speech_level = None + self._c_attack = None + self._c_decay = None + + @classmethod + def GetLevelFileName(cls): + return cls._LEVEL_FILENAME + + @classmethod + def GetVadFileName(cls): + return cls._VAD_FILENAME + + @classmethod + def GetSpeechLevelFileName(cls): + return cls._SPEECH_LEVEL_FILENAME + + def GetLevel(self): + return self._level + + def GetVad(self): + return self._vad + + def GetSpeechLevel(self): + return self._speech_level + + def Extract(self, filepath): + # Load signal. + self._signal = signal_processing.SignalProcessingUtils.LoadWav(filepath) + if self._signal.channels != 1: + raise NotImplementedError('multiple-channel annotations not implemented') + + # Smoothing params. + sample_duration_ms = 1000.0 / self._signal.frame_rate + self._c_attack = 0 if self._LEVEL_ATTACK_MS == 0 else ( + self._ONE_DB_REDUCTION ** (sample_duration_ms / self._LEVEL_ATTACK_MS)) + self._c_decay = 0 if self._LEVEL_DECAY_MS == 0 else ( + self._ONE_DB_REDUCTION ** (sample_duration_ms / self._LEVEL_DECAY_MS)) + + # Compute level. + self._LevelEstimation() + + # Naive VAD based on level thresholding. It assumes ideal clean speech + # with high SNR. + # TODO(alessiob): Maybe replace with a VAD based on stationary-noise + # detection. + vad_threshold = np.percentile(self._level, self._VAD_THRESHOLD) + self._vad = np.uint8(self._level > vad_threshold) + + # Speech level based on VAD output. + self._speech_level = self._level * self._vad + + def Save(self, output_path): + np.save(os.path.join(output_path, self._LEVEL_FILENAME), self._level) + np.save(os.path.join(output_path, self._VAD_FILENAME), self._vad) + np.save(os.path.join(output_path, self._SPEECH_LEVEL_FILENAME), + self._speech_level) + + def _LevelEstimation(self): + # Read samples. + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._signal) + num_samples = len(samples) + + # Envelope. + self._level = np.abs(samples) + + # Envelope smoothing. + smooth = lambda curr, prev, k: (1 - k) * curr + k * prev + self._level[0] = smooth(self._level[0], 0.0, self._c_attack) + for i in range(1, num_samples): + self._level[i] = smooth( + self._level[i], self._level[i - 1], self._c_attack if ( + self._level[i] > self._level[i - 1]) else self._c_decay) diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py new file mode 100644 index 0000000000..b59397cc09 --- /dev/null +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py @@ -0,0 +1,67 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the annotations module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import numpy as np + +from . import annotations +from . import input_signal_creator +from . import signal_processing + + +class TestAnnotationsExtraction(unittest.TestCase): + """Unit tests for the annotations module. + """ + + _CLEAN_TMP_OUTPUT = False + + def setUp(self): + """Create temporary folder.""" + self._tmp_path = tempfile.mkdtemp() + self._wav_file_path = os.path.join(self._tmp_path, 'tone.wav') + pure_tone, _ = input_signal_creator.InputSignalCreator.Create( + 'pure_tone', [440, 1000]) + signal_processing.SignalProcessingUtils.SaveWav( + self._wav_file_path, pure_tone) + + def tearDown(self): + """Recursively delete temporary folder.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + ( + self._tmp_path)) + + def testExtraction(self): + e = annotations.AudioAnnotationsExtractor() + e.Extract(self._wav_file_path) + vad = e.GetVad() + assert len(vad) > 0 + self.assertGreaterEqual(float(np.sum(vad)) / len(vad), 0.95) + + def testSaveLoad(self): + e = annotations.AudioAnnotationsExtractor() + e.Extract(self._wav_file_path) + e.Save(self._tmp_path) + np.testing.assert_array_equal( + e.GetLevel(), + np.load(os.path.join(self._tmp_path, e.GetLevelFileName()))) + np.testing.assert_array_equal( + e.GetVad(), + np.load(os.path.join(self._tmp_path, e.GetVadFileName()))) + np.testing.assert_array_equal( + e.GetSpeechLevel(), + np.load(os.path.join(self._tmp_path, e.GetSpeechLevelFileName()))) diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py index 5591a289c9..9e0198da85 100644 --- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py @@ -165,6 +165,8 @@ class SignalProcessingUtils(object): @classmethod def Fft(cls, signal, normalize=True): + if signal.channels != 1: + raise NotImplementedError('multiple-channel FFT not implemented') x = cls.AudioSegmentToRawData(signal).astype(np.float32) if normalize: x /= max(abs(np.max(x)), 1.0) @@ -188,7 +190,7 @@ class SignalProcessingUtils(object): True if hard clipping is detect, False otherwise. """ if signal.channels != 1: - raise NotImplementedError('mutliple-channel clipping not implemented') + raise NotImplementedError('multiple-channel clipping not implemented') if signal.sample_width != 2: # Note that signal.sample_width is in bytes. raise exceptions.SignalProcessingException( 'hard-clipping detection only supported for 16 bit samples') diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py index 6545e0ec3f..d62069fc8e 100644 --- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py @@ -12,12 +12,15 @@ import logging import os +from . import annotations from . import data_access from . import echo_path_simulation from . import echo_path_simulation_factory from . import eval_scores from . import exceptions from . import input_mixer +from . import input_signal_creator +from . import signal_processing from . import test_data_generation @@ -43,6 +46,7 @@ class ApmModuleSimulator(object): self._evaluation_score_factory = evaluation_score_factory self._audioproc_wrapper = ap_wrapper self._evaluator = evaluator + self._annotator = annotations.AudioAnnotationsExtractor() # Init. self._test_data_generator_factory.SetOutputDirectoryPrefix( @@ -52,6 +56,7 @@ class ApmModuleSimulator(object): # Properties for each run. self._base_output_path = None + self._output_cache_path = None self._test_data_generators = None self._evaluation_score_workers = None self._config_filepaths = None @@ -116,6 +121,9 @@ class ApmModuleSimulator(object): 'invalid echo path simulator') self._base_output_path = os.path.abspath(output_dir) + # Output path used to cache the data shared across simulations. + self._output_cache_path = os.path.join(self._base_output_path, '_cache') + # Instance test data generators. self._test_data_generators = [self._test_data_generator_factory.GetInstance( test_data_generators_class=( @@ -164,14 +172,28 @@ class ApmModuleSimulator(object): # Try different capture-render pairs. for capture_input_name in self._capture_input_filepaths: + # Output path for the capture signal annotations. + capture_annotations_cache_path = os.path.join( + self._output_cache_path, + self._PREFIX_CAPTURE + capture_input_name) + data_access.MakeDirectory(capture_annotations_cache_path) + + # Capture. capture_input_filepath = self._capture_input_filepaths[ capture_input_name] + if not os.path.exists(capture_input_filepath): + # If the input signal file does not exist, try to create using the + # available input signal creators. + self._CreateInputSignal(capture_input_filepath) + assert os.path.exists(capture_input_filepath) + self._ExtractCaptureAnnotations( + capture_input_filepath, capture_annotations_cache_path) + + # Render and simulated echo path (optional). render_input_filepath = None if without_render_input else ( self._render_input_filepaths[capture_input_name]) render_input_name = '(none)' if without_render_input else ( self._ExtractFileName(render_input_filepath)) - - # Instance echo path simulator (if needed). echo_path_simulator = ( echo_path_simulation_factory.EchoPathSimulatorFactory.GetInstance( self._echo_path_simulator_class, render_input_filepath)) @@ -184,10 +206,8 @@ class ApmModuleSimulator(object): test_data_generators.NAME, echo_path_simulator.NAME) # Output path for the generated test data. - # The path is used to cache the signals shared across simulations. test_data_cache_path = os.path.join( - self._base_output_path, '_cache', - self._PREFIX_CAPTURE + capture_input_name, + capture_annotations_cache_path, self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) data_access.MakeDirectory(test_data_cache_path) logging.debug('test data cache path: <%s>', test_data_cache_path) @@ -216,6 +236,38 @@ class ApmModuleSimulator(object): echo_test_data_cache_path, output_path, config_filepath, echo_path_simulator) + @staticmethod + def _CreateInputSignal(input_signal_filepath): + """Creates a missing input signal file. + + The file name is parsed to extract input signal creator and params. If a + creator is matched and the parameters are valid, a new signal is generated + and written in |input_signal_filepath|. + + Args: + input_signal_filepath: Path to the input signal audio file to write. + + Raises: + InputSignalCreatorException + """ + filename = os.path.splitext(os.path.split(input_signal_filepath)[-1])[0] + filename_parts = filename.split('-') + + if len(filename_parts) < 2: + raise exceptions.InputSignalCreatorException( + 'Cannot parse input signal file name') + + signal, metadata = input_signal_creator.InputSignalCreator.Create( + filename_parts[0], filename_parts[1].split('_')) + + signal_processing.SignalProcessingUtils.SaveWav( + input_signal_filepath, signal) + data_access.Metadata.SaveFileMetadata(input_signal_filepath, metadata) + + def _ExtractCaptureAnnotations(self, input_filepath, output_path): + self._annotator.Extract(input_filepath) + self._annotator.Save(output_path) + def _Simulate(self, test_data_generators, clean_capture_input_filepath, render_input_filepath, test_data_cache_path, echo_test_data_cache_path, output_path, config_filepath, diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py index 521f006b6d..265ff5442a 100644 --- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py @@ -102,6 +102,39 @@ class TestApmModuleSimulator(unittest.TestCase): self.assertGreaterEqual(len(evaluator.Run.call_args_list), min_number_of_simulations) + def testInputSignalCreation(self): + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='')), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(__file__), 'fake_polqa'))), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # Inexistent input files to be silently created. + input_files = [ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-1000_500.wav'), + ] + self.assertFalse(any([os.path.exists(input_file) for input_file in ( + input_files)])) + + # The input files are created during the simulation. + simulator.Run( + config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=['audio_level_peak'], + output_dir=self._output_path) + self.assertTrue(all([os.path.exists(input_file) for input_file in ( + input_files)])) + def testPureToneGenerationWithTotalHarmonicDistorsion(self): logging.warning = mock.MagicMock(name='warning') @@ -143,3 +176,21 @@ class TestApmModuleSimulator(unittest.TestCase): logging.warning.assert_called_with('the evaluation failed: %s', ( 'The THD score cannot be used with any test data generator other than ' '"identity"')) + + # # Init. + # generator = test_data_generation.IdentityTestDataGenerator('tmp') + # input_signal_filepath = os.path.join( + # self._test_data_cache_path, 'pure_tone-440_1000.wav') + + # # Check that the input signal is generated. + # self.assertFalse(os.path.exists(input_signal_filepath)) + # generator.Generate( + # input_signal_filepath=input_signal_filepath, + # test_data_cache_path=self._test_data_cache_path, + # base_output_path=self._base_output_path) + # self.assertTrue(os.path.exists(input_signal_filepath)) + + # # Check input signal properties. + # input_signal = signal_processing.SignalProcessingUtils.LoadWav( + # input_signal_filepath) + # self.assertEqual(1000, len(input_signal)) diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py index bd0efb7512..3f3c17237b 100644 --- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py @@ -33,7 +33,6 @@ except ImportError: from . import data_access from . import exceptions -from . import input_signal_creator from . import signal_processing @@ -110,12 +109,6 @@ class TestDataGenerator(object): base_output_path: base path where output is written. """ self.Clear() - - # If the input signal file does not exist, try to create using the - # available input signal creators. - if not os.path.exists(input_signal_filepath): - self._CreateInputSignal(input_signal_filepath) - self._Generate( input_signal_filepath, test_data_cache_path, base_output_path) @@ -126,34 +119,6 @@ class TestDataGenerator(object): self._apm_output_paths = {} self._reference_signal_filepaths = {} - @classmethod - def _CreateInputSignal(cls, input_signal_filepath): - """Creates a missing input signal file. - - The file name is parsed to extract input signal creator and params. If a - creator is matched and the parameters are valid, a new signal is generated - and written in |input_signal_filepath|. - - Args: - input_signal_filepath: Path to the input signal audio file to write. - - Raises: - InputSignalCreatorException - """ - filename = os.path.splitext(os.path.split(input_signal_filepath)[-1])[0] - filename_parts = filename.split('-') - - if len(filename_parts) < 2: - raise exceptions.InputSignalCreatorException( - 'Cannot parse input signal file name') - - signal, metadata = input_signal_creator.InputSignalCreator.Create( - filename_parts[0], filename_parts[1].split('_')) - - signal_processing.SignalProcessingUtils.SaveWav( - input_signal_filepath, signal) - data_access.Metadata.SaveFileMetadata(input_signal_filepath, metadata) - def _Generate( self, input_signal_filepath, test_data_cache_path, base_output_path): """Abstract method to be implemented in each concrete class. diff --git a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py index 73ea45d2a2..3e59cf9ae9 100644 --- a/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py +++ b/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py @@ -52,25 +52,6 @@ class TestTestDataGenerators(unittest.TestCase): shutil.rmtree(self._test_data_cache_path) shutil.rmtree(self._fake_air_db_path) - def testInputSignalCreation(self): - # Init. - generator = test_data_generation.IdentityTestDataGenerator('tmp') - input_signal_filepath = os.path.join( - self._test_data_cache_path, 'pure_tone-440_1000.wav') - - # Check that the input signal is generated. - self.assertFalse(os.path.exists(input_signal_filepath)) - generator.Generate( - input_signal_filepath=input_signal_filepath, - test_data_cache_path=self._test_data_cache_path, - base_output_path=self._base_output_path) - self.assertTrue(os.path.exists(input_signal_filepath)) - - # Check input signal properties. - input_signal = signal_processing.SignalProcessingUtils.LoadWav( - input_signal_filepath) - self.assertEqual(1000, len(input_signal)) - def testTestDataGenerators(self): # Preliminary check. self.assertTrue(os.path.exists(self._base_output_path))