diff --git a/webrtc/modules/audio_processing/test/py_conversational_speech/README.md b/webrtc/modules/audio_processing/test/py_conversational_speech/README.md index 432448e278..79d07fdf08 100644 --- a/webrtc/modules/audio_processing/test/py_conversational_speech/README.md +++ b/webrtc/modules/audio_processing/test/py_conversational_speech/README.md @@ -26,12 +26,16 @@ IMPORTANT: **the whole code has not been landed yet.** For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A) and b1, b2 (speaker B). The text file with the timing information may look like this: -``` A a1 0 - B b1 0 - A a2 100 - B b2 -200 - A a3 0 - A a4 0``` + +``` +A a1 0 +B b1 0 +A a2 100 +B b2 -200 +A a3 0 +A a4 0 +``` + The first column indicates the speaker name, the second contains the audio track file names, and the third the offsets (in milliseconds) used to concatenate the chunks. @@ -39,26 +43,32 @@ chunks. Assume that all the audio tracks in the example above are 1000 ms long. The tool will then generate two tracks (A and B) that look like this: -```Track A: +**Track A** +``` a1 (1000 ms) silence (1100 ms) a2 (1000 ms) silence (800 ms) a3 (1000 ms) - a4 (1000 ms)``` + a4 (1000 ms) +``` -```Track B: +**Track B** +``` silence (1000 ms) b1 (1000 ms) silence (900 ms) b2 (1000 ms) - silence (2000 ms)``` + silence (2000 ms) +``` The two tracks can be also visualized as follows (one characheter represents 100 ms, "." is silence and "*" is speech). -```t: 0 1 2 3 4 5 6 (s) +``` +t: 0 1 2 3 4 5 6 (s) A: **********...........**********........******************** B: ..........**********.........**********.................... ^ 200 ms cross-talk - 100 ms silence ^``` + 100 ms silence ^ +``` diff --git a/webrtc/modules/audio_processing/test/py_conversational_speech/generate_conversational_tracks.py b/webrtc/modules/audio_processing/test/py_conversational_speech/generate_conversational_tracks.py new file mode 100644 index 0000000000..41e3bbdc6b --- /dev/null +++ b/webrtc/modules/audio_processing/test/py_conversational_speech/generate_conversational_tracks.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Generate multiple-end audio tracks to simulate conversational + speech with two or more participants. + +Usage: generate_conversational_tracks.py + -i path/to/source/audiotracks + -t path/to/timing_file.txt + -o output/path +""" + +import argparse +import logging +import sys + +def _InstanceArgumentsParser(): + parser = argparse.ArgumentParser(description=( + 'Generate multiple-end audio tracks to simulate conversational speech ' + 'with two or more participants.')) + + parser.add_argument('-i', '--input_tracks_path', required=True, + help='directory containing the speech turn wav files') + + parser.add_argument('-t', '--timing_file', required=True, + help='path to the timing text file') + + parser.add_argument('-o', '--output_dir', required=False, + help=('base path to the output directory in which the ' + 'output wav files are saved'), + default='output') + + return parser + + +def main(): + # TODO(alessiob): level = logging.INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + + parser = _InstanceArgumentsParser() + args = parser.parse_args() + + # TODO(alessiob): pass the arguments to the app controller. + + # TODO(alessiob): remove when comment above addressed. + logging.debug(args) + + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/webrtc/modules/audio_processing/test/py_conversational_speech/test_generation.py b/webrtc/modules/audio_processing/test/py_conversational_speech/test_generation.py new file mode 100644 index 0000000000..3fc34e399d --- /dev/null +++ b/webrtc/modules/audio_processing/test/py_conversational_speech/test_generation.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import unittest + +import generate_conversational_tracks + +class TestGenerationScript(unittest.TestCase): + + def TestMain(self): + # Exit with error code if no arguments are passed. + with self.assertRaises(SystemExit) as cm: + generate_conversational_tracks.main() + self.assertGreater(cm.exception.code, 0)