diff --git a/common_audio/wav_file.cc b/common_audio/wav_file.cc index 30968c25c9..e49126f139 100644 --- a/common_audio/wav_file.cc +++ b/common_audio/wav_file.cc @@ -13,30 +13,35 @@ #include #include +#include #include #include #include #include "common_audio/include/audio_util.h" -#include "common_audio/wav_header.h" #include "rtc_base/checks.h" -#include "rtc_base/logging.h" #include "rtc_base/system/arch.h" namespace webrtc { namespace { -// We write 16-bit PCM WAV files. -constexpr WavFormat kWavFormat = kWavFormatPcm; static_assert(std::is_trivially_destructible::value, ""); -constexpr size_t kBytesPerSample = 2; + +// Checks whether the format is supported or not. +bool FormatSupported(WavFormat format) { + // Only PCM and IEEE Float formats are supported. + return format == WavFormat::kWavFormatPcm || + format == WavFormat::kWavFormatIeeeFloat; +} // Doesn't take ownership of the file handle and won't close it. -class ReadableWavFile : public ReadableWav { +class WavHeaderFileReader : public WavHeaderReader { public: - explicit ReadableWavFile(FileWrapper* file) : file_(file) {} - ReadableWavFile(const ReadableWavFile&) = delete; - ReadableWavFile& operator=(const ReadableWavFile&) = delete; + explicit WavHeaderFileReader(FileWrapper* file) : file_(file) {} + + WavHeaderFileReader(const WavHeaderFileReader&) = delete; + WavHeaderFileReader& operator=(const WavHeaderFileReader&) = delete; + size_t Read(void* buf, size_t num_bytes) override { size_t count = file_->Read(buf, num_bytes); pos_ += count; @@ -49,13 +54,15 @@ class ReadableWavFile : public ReadableWav { } return success; } - int64_t GetPosition() { return pos_; } + int64_t GetPosition() override { return pos_; } private: FileWrapper* file_; int64_t pos_ = 0; }; +constexpr size_t kMaxChunksize = 4096; + } // namespace WavReader::WavReader(const std::string& filename) @@ -65,69 +72,106 @@ WavReader::WavReader(FileWrapper file) : file_(std::move(file)) { RTC_CHECK(file_.is_open()) << "Invalid file. Could not create file handle for wav file."; - ReadableWavFile readable(&file_); - WavFormat format; + WavHeaderFileReader readable(&file_); size_t bytes_per_sample; - RTC_CHECK(ReadWavHeader(&readable, &num_channels_, &sample_rate_, &format, - &bytes_per_sample, &num_samples_)); - num_samples_remaining_ = num_samples_; - RTC_CHECK_EQ(kWavFormat, format); - RTC_CHECK_EQ(kBytesPerSample, bytes_per_sample); - data_start_pos_ = readable.GetPosition(); -} - -WavReader::~WavReader() { - Close(); + RTC_CHECK(ReadWavHeader(&readable, &num_channels_, &sample_rate_, &format_, + &bytes_per_sample, &num_samples_in_file_, + &data_start_pos_)); + num_unread_samples_ = num_samples_in_file_; + RTC_CHECK(FormatSupported(format_)) << "Non-implemented wav-format"; } void WavReader::Reset() { RTC_CHECK(file_.SeekTo(data_start_pos_)) << "Failed to set position in the file to WAV data start position"; - num_samples_remaining_ = num_samples_; + num_unread_samples_ = num_samples_in_file_; } -int WavReader::sample_rate() const { - return sample_rate_; -} - -size_t WavReader::num_channels() const { - return num_channels_; -} - -size_t WavReader::num_samples() const { - return num_samples_; -} - -size_t WavReader::ReadSamples(size_t num_samples, int16_t* samples) { +size_t WavReader::ReadSamples(const size_t num_samples, + int16_t* const samples) { #ifndef WEBRTC_ARCH_LITTLE_ENDIAN #error "Need to convert samples to big-endian when reading from WAV file" #endif - // There could be metadata after the audio; ensure we don't read it. - num_samples = std::min(num_samples, num_samples_remaining_); - const size_t num_bytes = num_samples * sizeof(*samples); - const size_t read_bytes = file_.Read(samples, num_bytes); - // If we didn't read what was requested, ensure we've reached the EOF. - RTC_CHECK(read_bytes == num_bytes || file_.ReadEof()); - RTC_CHECK_EQ(read_bytes % 2, 0) - << "End of file in the middle of a 16-bit sample"; - const size_t read_samples = read_bytes / 2; - RTC_CHECK_LE(read_samples, num_samples_remaining_); - num_samples_remaining_ -= read_samples; - return read_samples; + + size_t num_samples_left_to_read = num_samples; + size_t next_chunk_start = 0; + while (num_samples_left_to_read > 0 && num_unread_samples_ > 0) { + const size_t chunk_size = std::min( + std::min(kMaxChunksize, num_samples_left_to_read), num_unread_samples_); + size_t num_bytes_read; + size_t num_samples_read; + if (format_ == WavFormat::kWavFormatIeeeFloat) { + std::array samples_to_convert; + num_bytes_read = file_.Read(samples_to_convert.data(), + chunk_size * sizeof(samples_to_convert[0])); + num_samples_read = num_bytes_read / sizeof(samples_to_convert[0]); + + for (size_t j = 0; j < num_samples_read; ++j) { + samples[next_chunk_start + j] = FloatToS16(samples_to_convert[j]); + } + } else { + RTC_CHECK_EQ(format_, WavFormat::kWavFormatPcm); + num_bytes_read = file_.Read(&samples[next_chunk_start], + chunk_size * sizeof(samples[0])); + num_samples_read = num_bytes_read / sizeof(samples[0]); + } + RTC_CHECK(num_samples_read == 0 || (num_bytes_read % num_samples_read) == 0) + << "Corrupt file: file ended in the middle of a sample."; + RTC_CHECK(num_samples_read == chunk_size || file_.ReadEof()) + << "Corrupt file: payload size does not match header."; + + next_chunk_start += num_samples_read; + num_unread_samples_ -= num_samples_read; + num_samples_left_to_read -= num_samples_read; + } + + return num_samples - num_samples_left_to_read; } -size_t WavReader::ReadSamples(size_t num_samples, float* samples) { - static const size_t kChunksize = 4096 / sizeof(uint16_t); - size_t read = 0; - for (size_t i = 0; i < num_samples; i += kChunksize) { - int16_t isamples[kChunksize]; - size_t chunk = std::min(kChunksize, num_samples - i); - chunk = ReadSamples(chunk, isamples); - for (size_t j = 0; j < chunk; ++j) - samples[i + j] = isamples[j]; - read += chunk; +size_t WavReader::ReadSamples(const size_t num_samples, float* const samples) { +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert samples to big-endian when reading from WAV file" +#endif + + size_t num_samples_left_to_read = num_samples; + size_t next_chunk_start = 0; + while (num_samples_left_to_read > 0 && num_unread_samples_ > 0) { + const size_t chunk_size = std::min( + std::min(kMaxChunksize, num_samples_left_to_read), num_unread_samples_); + size_t num_bytes_read; + size_t num_samples_read; + if (format_ == WavFormat::kWavFormatPcm) { + std::array samples_to_convert; + num_bytes_read = file_.Read(samples_to_convert.data(), + chunk_size * sizeof(samples_to_convert[0])); + num_samples_read = num_bytes_read / sizeof(samples_to_convert[0]); + + for (size_t j = 0; j < num_samples_read; ++j) { + samples[next_chunk_start + j] = + static_cast(samples_to_convert[j]); + } + } else { + RTC_CHECK_EQ(format_, WavFormat::kWavFormatIeeeFloat); + num_bytes_read = file_.Read(&samples[next_chunk_start], + chunk_size * sizeof(samples[0])); + num_samples_read = num_bytes_read / sizeof(samples[0]); + + for (size_t j = 0; j < num_samples_read; ++j) { + samples[next_chunk_start + j] = + FloatToFloatS16(samples[next_chunk_start + j]); + } + } + RTC_CHECK(num_samples_read == 0 || (num_bytes_read % num_samples_read) == 0) + << "Corrupt file: file ended in the middle of a sample."; + RTC_CHECK(num_samples_read == chunk_size || file_.ReadEof()) + << "Corrupt file: payload size does not match header."; + + next_chunk_start += num_samples_read; + num_unread_samples_ -= num_samples_read; + num_samples_left_to_read -= num_samples_read; } - return read; + + return num_samples - num_samples_left_to_read; } void WavReader::Close() { @@ -136,71 +180,110 @@ void WavReader::Close() { WavWriter::WavWriter(const std::string& filename, int sample_rate, - size_t num_channels) + size_t num_channels, + SampleFormat sample_format) // Unlike plain fopen, OpenWriteOnly takes care of filename utf8 -> // wchar conversion on windows. : WavWriter(FileWrapper::OpenWriteOnly(filename), sample_rate, - num_channels) {} + num_channels, + sample_format) {} -WavWriter::WavWriter(FileWrapper file, int sample_rate, size_t num_channels) +WavWriter::WavWriter(FileWrapper file, + int sample_rate, + size_t num_channels, + SampleFormat sample_format) : sample_rate_(sample_rate), num_channels_(num_channels), - num_samples_(0), + num_samples_written_(0), + format_(sample_format == SampleFormat::kInt16 + ? WavFormat::kWavFormatPcm + : WavFormat::kWavFormatIeeeFloat), file_(std::move(file)) { // Handle errors from the OpenWriteOnly call in above constructor. RTC_CHECK(file_.is_open()) << "Invalid file. Could not create wav file."; - RTC_CHECK(CheckWavParameters(num_channels_, sample_rate_, kWavFormat, - kBytesPerSample, num_samples_)); + RTC_CHECK(CheckWavParameters(num_channels_, sample_rate_, format_, + num_samples_written_)); // Write a blank placeholder header, since we need to know the total number // of samples before we can fill in the real data. - static const uint8_t blank_header[kWavHeaderSize] = {0}; - RTC_CHECK(file_.Write(blank_header, kWavHeaderSize)); -} - -WavWriter::~WavWriter() { - Close(); -} - -int WavWriter::sample_rate() const { - return sample_rate_; -} - -size_t WavWriter::num_channels() const { - return num_channels_; -} - -size_t WavWriter::num_samples() const { - return num_samples_; + static const uint8_t blank_header[MaxWavHeaderSize()] = {0}; + RTC_CHECK(file_.Write(blank_header, WavHeaderSize(format_))); } void WavWriter::WriteSamples(const int16_t* samples, size_t num_samples) { #ifndef WEBRTC_ARCH_LITTLE_ENDIAN #error "Need to convert samples to little-endian when writing to WAV file" #endif - RTC_CHECK(file_.Write(samples, sizeof(*samples) * num_samples)); - num_samples_ += num_samples; - RTC_CHECK(num_samples_ >= num_samples); // detect size_t overflow + + for (size_t i = 0; i < num_samples; i += kMaxChunksize) { + const size_t num_remaining_samples = num_samples - i; + const size_t num_samples_to_write = + std::min(kMaxChunksize, num_remaining_samples); + + if (format_ == WavFormat::kWavFormatPcm) { + RTC_CHECK( + file_.Write(&samples[i], num_samples_to_write * sizeof(samples[0]))); + } else { + RTC_CHECK_EQ(format_, WavFormat::kWavFormatIeeeFloat); + std::array converted_samples; + for (size_t j = 0; j < num_samples_to_write; ++j) { + converted_samples[j] = S16ToFloat(samples[i + j]); + } + RTC_CHECK( + file_.Write(converted_samples.data(), + num_samples_to_write * sizeof(converted_samples[0]))); + } + + num_samples_written_ += num_samples_to_write; + RTC_CHECK_GE(num_samples_written_, + num_samples_to_write); // detect size_t overflow + } } void WavWriter::WriteSamples(const float* samples, size_t num_samples) { - static const size_t kChunksize = 4096 / sizeof(uint16_t); - for (size_t i = 0; i < num_samples; i += kChunksize) { - int16_t isamples[kChunksize]; - const size_t chunk = std::min(kChunksize, num_samples - i); - FloatS16ToS16(samples + i, chunk, isamples); - WriteSamples(isamples, chunk); +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert samples to little-endian when writing to WAV file" +#endif + + for (size_t i = 0; i < num_samples; i += kMaxChunksize) { + const size_t num_remaining_samples = num_samples - i; + const size_t num_samples_to_write = + std::min(kMaxChunksize, num_remaining_samples); + + if (format_ == WavFormat::kWavFormatPcm) { + std::array converted_samples; + for (size_t j = 0; j < num_samples_to_write; ++j) { + converted_samples[j] = FloatS16ToS16(samples[i + j]); + } + RTC_CHECK( + file_.Write(converted_samples.data(), + num_samples_to_write * sizeof(converted_samples[0]))); + } else { + RTC_CHECK_EQ(format_, WavFormat::kWavFormatIeeeFloat); + std::array converted_samples; + for (size_t j = 0; j < num_samples_to_write; ++j) { + converted_samples[j] = FloatS16ToFloat(samples[i + j]); + } + RTC_CHECK( + file_.Write(converted_samples.data(), + num_samples_to_write * sizeof(converted_samples[0]))); + } + + num_samples_written_ += num_samples_to_write; + RTC_CHECK(num_samples_written_ >= + num_samples_to_write); // detect size_t overflow } } void WavWriter::Close() { RTC_CHECK(file_.Rewind()); - uint8_t header[kWavHeaderSize]; - WriteWavHeader(header, num_channels_, sample_rate_, kWavFormat, - kBytesPerSample, num_samples_); - RTC_CHECK(file_.Write(header, kWavHeaderSize)); + std::array header; + size_t header_size; + WriteWavHeader(num_channels_, sample_rate_, format_, num_samples_written_, + header.data(), &header_size); + RTC_CHECK(file_.Write(header.data(), header_size)); RTC_CHECK(file_.Close()); } diff --git a/common_audio/wav_file.h b/common_audio/wav_file.h index 65f2453736..dda611be6a 100644 --- a/common_audio/wav_file.h +++ b/common_audio/wav_file.h @@ -16,14 +16,16 @@ #include #include -#include "rtc_base/constructor_magic.h" +#include "common_audio/wav_header.h" #include "rtc_base/system/file_wrapper.h" namespace webrtc { -// Interface to provide access to WAV file parameters. +// Interface to provide access WAV file parameters. class WavFile { public: + enum class SampleFormat { kInt16, kFloat }; + virtual ~WavFile() {} virtual int sample_rate() const = 0; @@ -31,37 +33,44 @@ class WavFile { virtual size_t num_samples() const = 0; }; -// Simple C++ class for writing 16-bit PCM WAV files. All error handling is -// by calls to RTC_CHECK(), making it unsuitable for anything but debug code. +// Simple C++ class for writing 16-bit integer and 32 bit floating point PCM WAV +// files. All error handling is by calls to RTC_CHECK(), making it unsuitable +// for anything but debug code. class WavWriter final : public WavFile { public: - // Open a new WAV file for writing. - WavWriter(const std::string& filename, int sample_rate, size_t num_channels); + // Opens a new WAV file for writing. + WavWriter(const std::string& filename, + int sample_rate, + size_t num_channels, + SampleFormat sample_format = SampleFormat::kInt16); + WavWriter(FileWrapper file, + int sample_rate, + size_t num_channels, + SampleFormat sample_format = SampleFormat::kInt16); - // Open a new WAV file for writing. - WavWriter(FileWrapper file, int sample_rate, size_t num_channels); + // Closes the WAV file, after writing its header. + ~WavWriter() { Close(); } - // Close the WAV file, after writing its header. - ~WavWriter() override; + WavWriter(const WavWriter&) = delete; + WavWriter& operator=(const WavWriter&) = delete; // Write additional samples to the file. Each sample is in the range - // [-32768,32767], and there must be the previously specified number of + // [-32768.0,32767.0], and there must be the previously specified number of // interleaved channels. void WriteSamples(const float* samples, size_t num_samples); void WriteSamples(const int16_t* samples, size_t num_samples); - int sample_rate() const override; - size_t num_channels() const override; - size_t num_samples() const override; + int sample_rate() const override { return sample_rate_; } + size_t num_channels() const override { return num_channels_; } + size_t num_samples() const override { return num_samples_written_; } private: void Close(); const int sample_rate_; const size_t num_channels_; - size_t num_samples_; // Total number of samples written to file. - FileWrapper file_; // Output file, owned by this class - - RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter); + size_t num_samples_written_; + WavFormat format_; + FileWrapper file_; }; // Follows the conventions of WavWriter. @@ -69,12 +78,13 @@ class WavReader final : public WavFile { public: // Opens an existing WAV file for reading. explicit WavReader(const std::string& filename); - - // Use an existing WAV file for reading. explicit WavReader(FileWrapper file); // Close the WAV file. - ~WavReader() override; + ~WavReader() { Close(); } + + WavReader(const WavReader&) = delete; + WavReader& operator=(const WavReader&) = delete; // Resets position to the beginning of the file. void Reset(); @@ -84,21 +94,20 @@ class WavReader final : public WavFile { size_t ReadSamples(size_t num_samples, float* samples); size_t ReadSamples(size_t num_samples, int16_t* samples); - int sample_rate() const override; - size_t num_channels() const override; - size_t num_samples() const override; + int sample_rate() const override { return sample_rate_; } + size_t num_channels() const override { return num_channels_; } + size_t num_samples() const override { return num_samples_in_file_; } private: void Close(); int sample_rate_; size_t num_channels_; - size_t num_samples_; // Total number of samples in the file. - size_t num_samples_remaining_; - FileWrapper file_; // Input file, owned by this class. + WavFormat format_; + size_t num_samples_in_file_; + size_t num_unread_samples_; + FileWrapper file_; int64_t data_start_pos_; // Position in the file immediately after WAV header. - - RTC_DISALLOW_COPY_AND_ASSIGN(WavReader); }; } // namespace webrtc diff --git a/common_audio/wav_file_unittest.cc b/common_audio/wav_file_unittest.cc index 01c920cdf1..97cecc345f 100644 --- a/common_audio/wav_file_unittest.cc +++ b/common_audio/wav_file_unittest.cc @@ -78,7 +78,7 @@ TEST(WavWriterTest, MAYBE_CPP) { // clang-format on }; static const size_t kContentSize = - kWavHeaderSize + kNumSamples * sizeof(int16_t) + sizeof(kMetadata); + kPcmWavHeaderSize + kNumSamples * sizeof(int16_t) + sizeof(kMetadata); static_assert(sizeof(kExpectedContents) == kContentSize, "content size"); EXPECT_EQ(kContentSize, test::GetFileSize(outfile)); FILE* f = fopen(outfile.c_str(), "rb"); @@ -103,43 +103,75 @@ TEST(WavWriterTest, MAYBE_CPP) { // Write a larger WAV file. You can listen to this file to sanity-check it. TEST(WavWriterTest, LargeFile) { - std::string outfile = test::OutputPath() + "wavtest3.wav"; - static const int kSampleRate = 8000; - static const size_t kNumChannels = 2; - static const size_t kNumSamples = 3 * kSampleRate * kNumChannels; - float samples[kNumSamples]; - for (size_t i = 0; i < kNumSamples; i += kNumChannels) { - // A nice periodic beeping sound. - static const double kToneHz = 440; - const double t = static_cast(i) / (kNumChannels * kSampleRate); - const double x = - std::numeric_limits::max() * std::sin(t * kToneHz * 2 * M_PI); - samples[i] = std::pow(std::sin(t * 2 * 2 * M_PI), 10) * x; - samples[i + 1] = std::pow(std::cos(t * 2 * 2 * M_PI), 10) * x; - } - { - WavWriter w(outfile, kSampleRate, kNumChannels); - EXPECT_EQ(kSampleRate, w.sample_rate()); - EXPECT_EQ(kNumChannels, w.num_channels()); - EXPECT_EQ(0u, w.num_samples()); - w.WriteSamples(samples, kNumSamples); - EXPECT_EQ(kNumSamples, w.num_samples()); - } - EXPECT_EQ(sizeof(int16_t) * kNumSamples + kWavHeaderSize, - test::GetFileSize(outfile)); + constexpr int kSampleRate = 8000; + constexpr size_t kNumChannels = 2; + constexpr size_t kNumSamples = 3 * kSampleRate * kNumChannels; + for (WavFile::SampleFormat wav_format : + {WavFile::SampleFormat::kInt16, WavFile::SampleFormat::kFloat}) { + for (WavFile::SampleFormat write_format : + {WavFile::SampleFormat::kInt16, WavFile::SampleFormat::kFloat}) { + for (WavFile::SampleFormat read_format : + {WavFile::SampleFormat::kInt16, WavFile::SampleFormat::kFloat}) { + std::string outfile = test::OutputPath() + "wavtest3.wav"; + float samples[kNumSamples]; + for (size_t i = 0; i < kNumSamples; i += kNumChannels) { + // A nice periodic beeping sound. + static const double kToneHz = 440; + const double t = + static_cast(i) / (kNumChannels * kSampleRate); + const double x = std::numeric_limits::max() * + std::sin(t * kToneHz * 2 * M_PI); + samples[i] = std::pow(std::sin(t * 2 * 2 * M_PI), 10) * x; + samples[i + 1] = std::pow(std::cos(t * 2 * 2 * M_PI), 10) * x; + } + { + WavWriter w(outfile, kSampleRate, kNumChannels, wav_format); + EXPECT_EQ(kSampleRate, w.sample_rate()); + EXPECT_EQ(kNumChannels, w.num_channels()); + EXPECT_EQ(0u, w.num_samples()); + if (write_format == WavFile::SampleFormat::kFloat) { + float truncated_samples[kNumSamples]; + for (size_t k = 0; k < kNumSamples; ++k) { + truncated_samples[k] = static_cast(samples[k]); + } + w.WriteSamples(truncated_samples, kNumSamples); + } else { + w.WriteSamples(samples, kNumSamples); + } + EXPECT_EQ(kNumSamples, w.num_samples()); + } + if (wav_format == WavFile::SampleFormat::kFloat) { + EXPECT_EQ(sizeof(float) * kNumSamples + kIeeeFloatWavHeaderSize, + test::GetFileSize(outfile)); + } else { + EXPECT_EQ(sizeof(int16_t) * kNumSamples + kPcmWavHeaderSize, + test::GetFileSize(outfile)); + } - { - WavReader r(outfile); - EXPECT_EQ(kSampleRate, r.sample_rate()); - EXPECT_EQ(kNumChannels, r.num_channels()); - EXPECT_EQ(kNumSamples, r.num_samples()); + { + WavReader r(outfile); + EXPECT_EQ(kSampleRate, r.sample_rate()); + EXPECT_EQ(kNumChannels, r.num_channels()); + EXPECT_EQ(kNumSamples, r.num_samples()); - float read_samples[kNumSamples]; - EXPECT_EQ(kNumSamples, r.ReadSamples(kNumSamples, read_samples)); - for (size_t i = 0; i < kNumSamples; ++i) - EXPECT_NEAR(samples[i], read_samples[i], 1); - - EXPECT_EQ(0u, r.ReadSamples(kNumSamples, read_samples)); + if (read_format == WavFile::SampleFormat::kFloat) { + float read_samples[kNumSamples]; + EXPECT_EQ(kNumSamples, r.ReadSamples(kNumSamples, read_samples)); + for (size_t i = 0; i < kNumSamples; ++i) { + EXPECT_NEAR(samples[i], read_samples[i], 1); + } + EXPECT_EQ(0u, r.ReadSamples(kNumSamples, read_samples)); + } else { + int16_t read_samples[kNumSamples]; + EXPECT_EQ(kNumSamples, r.ReadSamples(kNumSamples, read_samples)); + for (size_t i = 0; i < kNumSamples; ++i) { + EXPECT_NEAR(samples[i], static_cast(read_samples[i]), 1); + } + EXPECT_EQ(0u, r.ReadSamples(kNumSamples, read_samples)); + } + } + } + } } } @@ -188,7 +220,7 @@ TEST(WavReaderTest, MAYBE_CPPReset) { // clang-format on }; static const size_t kContentSize = - kWavHeaderSize + kNumSamples * sizeof(int16_t) + sizeof(kMetadata); + kPcmWavHeaderSize + kNumSamples * sizeof(int16_t) + sizeof(kMetadata); static_assert(sizeof(kExpectedContents) == kContentSize, "content size"); EXPECT_EQ(kContentSize, test::GetFileSize(outfile)); FILE* f = fopen(outfile.c_str(), "rb"); diff --git a/common_audio/wav_header.cc b/common_audio/wav_header.cc index 15eb1f844e..d3dca9055d 100644 --- a/common_audio/wav_header.cc +++ b/common_audio/wav_header.cc @@ -26,20 +26,28 @@ namespace webrtc { namespace { +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Code not working properly for big endian platforms." +#endif + +#pragma pack(2) struct ChunkHeader { uint32_t ID; uint32_t Size; }; static_assert(sizeof(ChunkHeader) == 8, "ChunkHeader size"); +#pragma pack(2) struct RiffHeader { ChunkHeader header; uint32_t Format; }; +static_assert(sizeof(RiffHeader) == sizeof(ChunkHeader) + 4, "RiffHeader size"); // We can't nest this definition in WavHeader, because VS2013 gives an error // on sizeof(WavHeader::fmt): "error C2070: 'unknown': illegal sizeof operand". -struct FmtSubchunk { +#pragma pack(2) +struct FmtPcmSubchunk { ChunkHeader header; uint16_t AudioFormat; uint16_t NumChannels; @@ -48,60 +56,108 @@ struct FmtSubchunk { uint16_t BlockAlign; uint16_t BitsPerSample; }; -static_assert(sizeof(FmtSubchunk) == 24, "FmtSubchunk size"); -const uint32_t kFmtSubchunkSize = sizeof(FmtSubchunk) - sizeof(ChunkHeader); +static_assert(sizeof(FmtPcmSubchunk) == 24, "FmtPcmSubchunk size"); +const uint32_t kFmtPcmSubchunkSize = + sizeof(FmtPcmSubchunk) - sizeof(ChunkHeader); -// Simple wav header. It does not include chunks that are not essential to read -// audio samples. -struct WavHeader { - WavHeader(const WavHeader&) = default; - WavHeader& operator=(const WavHeader&) = default; +// Pack struct to avoid additional padding bytes. +#pragma pack(2) +struct FmtIeeeFloatSubchunk { + ChunkHeader header; + uint16_t AudioFormat; + uint16_t NumChannels; + uint32_t SampleRate; + uint32_t ByteRate; + uint16_t BlockAlign; + uint16_t BitsPerSample; + uint16_t ExtensionSize; +}; +static_assert(sizeof(FmtIeeeFloatSubchunk) == 26, "FmtIeeeFloatSubchunk size"); +const uint32_t kFmtIeeeFloatSubchunkSize = + sizeof(FmtIeeeFloatSubchunk) - sizeof(ChunkHeader); + +// Simple PCM wav header. It does not include chunks that are not essential to +// read audio samples. +#pragma pack(2) +struct WavHeaderPcm { + WavHeaderPcm(const WavHeaderPcm&) = default; + WavHeaderPcm& operator=(const WavHeaderPcm&) = default; RiffHeader riff; - FmtSubchunk fmt; + FmtPcmSubchunk fmt; struct { ChunkHeader header; } data; }; -static_assert(sizeof(WavHeader) == kWavHeaderSize, "no padding in header"); +static_assert(sizeof(WavHeaderPcm) == kPcmWavHeaderSize, + "no padding in header"); -#ifdef WEBRTC_ARCH_LITTLE_ENDIAN -static inline void WriteLE16(uint16_t* f, uint16_t x) { - *f = x; -} -static inline void WriteLE32(uint32_t* f, uint32_t x) { - *f = x; -} -static inline void WriteFourCC(uint32_t* f, char a, char b, char c, char d) { - *f = static_cast(a) | static_cast(b) << 8 | - static_cast(c) << 16 | static_cast(d) << 24; +// IEEE Float Wav header, includes extra chunks necessary for proper non-PCM +// WAV implementation. +#pragma pack(2) +struct WavHeaderIeeeFloat { + WavHeaderIeeeFloat(const WavHeaderIeeeFloat&) = default; + WavHeaderIeeeFloat& operator=(const WavHeaderIeeeFloat&) = default; + RiffHeader riff; + FmtIeeeFloatSubchunk fmt; + struct { + ChunkHeader header; + uint32_t SampleLength; + } fact; + struct { + ChunkHeader header; + } data; +}; +static_assert(sizeof(WavHeaderIeeeFloat) == kIeeeFloatWavHeaderSize, + "no padding in header"); + +uint32_t PackFourCC(char a, char b, char c, char d) { + uint32_t packed_value = + static_cast(a) | static_cast(b) << 8 | + static_cast(c) << 16 | static_cast(d) << 24; + return packed_value; } -static inline uint16_t ReadLE16(uint16_t x) { - return x; -} -static inline uint32_t ReadLE32(uint32_t x) { - return x; -} -static inline std::string ReadFourCC(uint32_t x) { +std::string ReadFourCC(uint32_t x) { return std::string(reinterpret_cast(&x), 4); } -#else -#error "Write be-to-le conversion functions" -#endif -static inline uint32_t RiffChunkSize(size_t bytes_in_payload) { - return static_cast(bytes_in_payload + kWavHeaderSize - +uint16_t MapWavFormatToHeaderField(WavFormat format) { + switch (format) { + case WavFormat::kWavFormatPcm: + return 1; + case WavFormat::kWavFormatIeeeFloat: + return 3; + case WavFormat::kWavFormatALaw: + return 6; + case WavFormat::kWavFormatMuLaw: + return 7; + } + RTC_CHECK(false); +} + +WavFormat MapHeaderFieldToWavFormat(uint16_t format_header_value) { + if (format_header_value == 1) { + return WavFormat::kWavFormatPcm; + } + if (format_header_value == 3) { + return WavFormat::kWavFormatIeeeFloat; + } + + RTC_CHECK(false) << "Unsupported WAV format"; +} + +uint32_t RiffChunkSize(size_t bytes_in_payload, size_t header_size) { + return static_cast(bytes_in_payload + header_size - sizeof(ChunkHeader)); } -static inline uint32_t ByteRate(size_t num_channels, - int sample_rate, - size_t bytes_per_sample) { +uint32_t ByteRate(size_t num_channels, + int sample_rate, + size_t bytes_per_sample) { return static_cast(num_channels * sample_rate * bytes_per_sample); } -static inline uint16_t BlockAlign(size_t num_channels, - size_t bytes_per_sample) { +uint16_t BlockAlign(size_t num_channels, size_t bytes_per_sample) { return static_cast(num_channels * bytes_per_sample); } @@ -109,7 +165,7 @@ static inline uint16_t BlockAlign(size_t num_channels, // first byte of the sought chunk data. If not found, the end of the file is // reached. bool FindWaveChunk(ChunkHeader* chunk_header, - ReadableWav* readable, + WavHeaderReader* readable, const std::string sought_chunk_id) { RTC_DCHECK_EQ(sought_chunk_id.size(), 4); while (true) { @@ -124,17 +180,17 @@ bool FindWaveChunk(ChunkHeader* chunk_header, } } -bool ReadFmtChunkData(FmtSubchunk* fmt_subchunk, ReadableWav* readable) { +bool ReadFmtChunkData(FmtPcmSubchunk* fmt_subchunk, WavHeaderReader* readable) { // Reads "fmt " chunk payload. - if (readable->Read(&(fmt_subchunk->AudioFormat), kFmtSubchunkSize) != - kFmtSubchunkSize) + if (readable->Read(&(fmt_subchunk->AudioFormat), kFmtPcmSubchunkSize) != + kFmtPcmSubchunkSize) return false; - const uint32_t fmt_size = ReadLE32(fmt_subchunk->header.Size); - if (fmt_size != kFmtSubchunkSize) { + const uint32_t fmt_size = fmt_subchunk->header.Size; + if (fmt_size != kFmtPcmSubchunkSize) { // There is an optional two-byte extension field permitted to be present // with PCM, but which must be zero. int16_t ext_size; - if (kFmtSubchunkSize + sizeof(ext_size) != fmt_size) + if (kFmtPcmSubchunkSize + sizeof(ext_size) != fmt_size) return false; if (readable->Read(&ext_size, sizeof(ext_size)) != sizeof(ext_size)) return false; @@ -144,7 +200,89 @@ bool ReadFmtChunkData(FmtSubchunk* fmt_subchunk, ReadableWav* readable) { return true; } -} // namespace +void WritePcmWavHeader(size_t num_channels, + int sample_rate, + size_t bytes_per_sample, + size_t num_samples, + uint8_t* buf, + size_t* header_size) { + RTC_CHECK(buf); + RTC_CHECK(header_size); + *header_size = kPcmWavHeaderSize; + auto header = rtc::MsanUninitialized({}); + const size_t bytes_in_payload = bytes_per_sample * num_samples; + + header.riff.header.ID = PackFourCC('R', 'I', 'F', 'F'); + header.riff.header.Size = RiffChunkSize(bytes_in_payload, *header_size); + header.riff.Format = PackFourCC('W', 'A', 'V', 'E'); + header.fmt.header.ID = PackFourCC('f', 'm', 't', ' '); + header.fmt.header.Size = kFmtPcmSubchunkSize; + header.fmt.AudioFormat = MapWavFormatToHeaderField(WavFormat::kWavFormatPcm); + header.fmt.NumChannels = static_cast(num_channels); + header.fmt.SampleRate = sample_rate; + header.fmt.ByteRate = ByteRate(num_channels, sample_rate, bytes_per_sample); + header.fmt.BlockAlign = BlockAlign(num_channels, bytes_per_sample); + header.fmt.BitsPerSample = static_cast(8 * bytes_per_sample); + header.data.header.ID = PackFourCC('d', 'a', 't', 'a'); + header.data.header.Size = static_cast(bytes_in_payload); + + // Do an extra copy rather than writing everything to buf directly, since buf + // might not be correctly aligned. + memcpy(buf, &header, *header_size); +} + +void WriteIeeeFloatWavHeader(size_t num_channels, + int sample_rate, + size_t bytes_per_sample, + size_t num_samples, + uint8_t* buf, + size_t* header_size) { + RTC_CHECK(buf); + RTC_CHECK(header_size); + *header_size = kIeeeFloatWavHeaderSize; + auto header = rtc::MsanUninitialized({}); + const size_t bytes_in_payload = bytes_per_sample * num_samples; + + header.riff.header.ID = PackFourCC('R', 'I', 'F', 'F'); + header.riff.header.Size = RiffChunkSize(bytes_in_payload, *header_size); + header.riff.Format = PackFourCC('W', 'A', 'V', 'E'); + header.fmt.header.ID = PackFourCC('f', 'm', 't', ' '); + header.fmt.header.Size = kFmtIeeeFloatSubchunkSize; + header.fmt.AudioFormat = + MapWavFormatToHeaderField(WavFormat::kWavFormatIeeeFloat); + header.fmt.NumChannels = static_cast(num_channels); + header.fmt.SampleRate = sample_rate; + header.fmt.ByteRate = ByteRate(num_channels, sample_rate, bytes_per_sample); + header.fmt.BlockAlign = BlockAlign(num_channels, bytes_per_sample); + header.fmt.BitsPerSample = static_cast(8 * bytes_per_sample); + header.fmt.ExtensionSize = 0; + header.fact.header.ID = PackFourCC('f', 'a', 'c', 't'); + header.fact.header.Size = 4; + header.fact.SampleLength = static_cast(num_channels * num_samples); + header.data.header.ID = PackFourCC('d', 'a', 't', 'a'); + header.data.header.Size = static_cast(bytes_in_payload); + + // Do an extra copy rather than writing everything to buf directly, since buf + // might not be correctly aligned. + memcpy(buf, &header, *header_size); +} + +// Returns the number of bytes per sample for the format. +size_t GetFormatBytesPerSample(WavFormat format) { + switch (format) { + case WavFormat::kWavFormatPcm: + // Other values may be OK, but for now we're conservative. + return 2; + case WavFormat::kWavFormatALaw: + case WavFormat::kWavFormatMuLaw: + return 1; + case WavFormat::kWavFormatIeeeFloat: + return 4; + default: + RTC_CHECK(false); + return 2; + } +} bool CheckWavParameters(size_t num_channels, int sample_rate, @@ -169,23 +307,27 @@ bool CheckWavParameters(size_t num_channels, // format and bytes_per_sample must agree. switch (format) { - case kWavFormatPcm: + case WavFormat::kWavFormatPcm: // Other values may be OK, but for now we're conservative: if (bytes_per_sample != 1 && bytes_per_sample != 2) return false; break; - case kWavFormatALaw: - case kWavFormatMuLaw: + case WavFormat::kWavFormatALaw: + case WavFormat::kWavFormatMuLaw: if (bytes_per_sample != 1) return false; break; + case WavFormat::kWavFormatIeeeFloat: + if (bytes_per_sample != 4) + return false; + break; default: return false; } // The number of bytes in the file, not counting the first ChunkHeader, must // be less than 2^32; otherwise, the ChunkSize field overflows. - const size_t header_size = kWavHeaderSize - sizeof(ChunkHeader); + const size_t header_size = kPcmWavHeaderSize - sizeof(ChunkHeader); const size_t max_samples = (std::numeric_limits::max() - header_size) / bytes_per_sample; if (num_samples > max_samples) @@ -198,48 +340,47 @@ bool CheckWavParameters(size_t num_channels, return true; } -void WriteWavHeader(uint8_t* buf, - size_t num_channels, - int sample_rate, - WavFormat format, - size_t bytes_per_sample, - size_t num_samples) { - RTC_CHECK(CheckWavParameters(num_channels, sample_rate, format, - bytes_per_sample, num_samples)); +} // namespace - auto header = rtc::MsanUninitialized({}); - const size_t bytes_in_payload = bytes_per_sample * num_samples; - - WriteFourCC(&header.riff.header.ID, 'R', 'I', 'F', 'F'); - WriteLE32(&header.riff.header.Size, RiffChunkSize(bytes_in_payload)); - WriteFourCC(&header.riff.Format, 'W', 'A', 'V', 'E'); - - WriteFourCC(&header.fmt.header.ID, 'f', 'm', 't', ' '); - WriteLE32(&header.fmt.header.Size, kFmtSubchunkSize); - WriteLE16(&header.fmt.AudioFormat, format); - WriteLE16(&header.fmt.NumChannels, static_cast(num_channels)); - WriteLE32(&header.fmt.SampleRate, sample_rate); - WriteLE32(&header.fmt.ByteRate, - ByteRate(num_channels, sample_rate, bytes_per_sample)); - WriteLE16(&header.fmt.BlockAlign, BlockAlign(num_channels, bytes_per_sample)); - WriteLE16(&header.fmt.BitsPerSample, - static_cast(8 * bytes_per_sample)); - - WriteFourCC(&header.data.header.ID, 'd', 'a', 't', 'a'); - WriteLE32(&header.data.header.Size, static_cast(bytes_in_payload)); - - // Do an extra copy rather than writing everything to buf directly, since buf - // might not be correctly aligned. - memcpy(buf, &header, kWavHeaderSize); +bool CheckWavParameters(size_t num_channels, + int sample_rate, + WavFormat format, + size_t num_samples) { + return CheckWavParameters(num_channels, sample_rate, format, + GetFormatBytesPerSample(format), num_samples); } -bool ReadWavHeader(ReadableWav* readable, +void WriteWavHeader(size_t num_channels, + int sample_rate, + WavFormat format, + size_t num_samples, + uint8_t* buf, + size_t* header_size) { + RTC_CHECK(buf); + RTC_CHECK(header_size); + + const size_t bytes_per_sample = GetFormatBytesPerSample(format); + RTC_CHECK(CheckWavParameters(num_channels, sample_rate, format, + bytes_per_sample, num_samples)); + if (format == WavFormat::kWavFormatPcm) { + WritePcmWavHeader(num_channels, sample_rate, bytes_per_sample, num_samples, + buf, header_size); + } else { + RTC_CHECK_EQ(format, WavFormat::kWavFormatIeeeFloat); + WriteIeeeFloatWavHeader(num_channels, sample_rate, bytes_per_sample, + num_samples, buf, header_size); + } +} + +bool ReadWavHeader(WavHeaderReader* readable, size_t* num_channels, int* sample_rate, WavFormat* format, size_t* bytes_per_sample, - size_t* num_samples) { - auto header = rtc::MsanUninitialized({}); + size_t* num_samples, + int64_t* data_start_pos) { + // Read using the PCM header, even though it might be float Wav file + auto header = rtc::MsanUninitialized({}); // Read RIFF chunk. if (readable->Read(&header.riff, sizeof(header.riff)) != sizeof(header.riff)) @@ -267,26 +408,34 @@ bool ReadWavHeader(ReadableWav* readable, } // Parse needed fields. - *format = static_cast(ReadLE16(header.fmt.AudioFormat)); - *num_channels = ReadLE16(header.fmt.NumChannels); - *sample_rate = ReadLE32(header.fmt.SampleRate); - *bytes_per_sample = ReadLE16(header.fmt.BitsPerSample) / 8; - const size_t bytes_in_payload = ReadLE32(header.data.header.Size); + *format = MapHeaderFieldToWavFormat(header.fmt.AudioFormat); + *num_channels = header.fmt.NumChannels; + *sample_rate = header.fmt.SampleRate; + *bytes_per_sample = header.fmt.BitsPerSample / 8; + const size_t bytes_in_payload = header.data.header.Size; if (*bytes_per_sample == 0) return false; *num_samples = bytes_in_payload / *bytes_per_sample; - if (ReadLE32(header.riff.header.Size) < RiffChunkSize(bytes_in_payload)) + const size_t header_size = *format == WavFormat::kWavFormatPcm + ? kPcmWavHeaderSize + : kIeeeFloatWavHeaderSize; + + if (header.riff.header.Size < RiffChunkSize(bytes_in_payload, header_size)) return false; - if (ReadLE32(header.fmt.ByteRate) != + if (header.fmt.ByteRate != ByteRate(*num_channels, *sample_rate, *bytes_per_sample)) return false; - if (ReadLE16(header.fmt.BlockAlign) != - BlockAlign(*num_channels, *bytes_per_sample)) + if (header.fmt.BlockAlign != BlockAlign(*num_channels, *bytes_per_sample)) return false; - return CheckWavParameters(*num_channels, *sample_rate, *format, - *bytes_per_sample, *num_samples); + if (!CheckWavParameters(*num_channels, *sample_rate, *format, + *bytes_per_sample, *num_samples)) { + return false; + } + + *data_start_pos = readable->GetPosition(); + return true; } } // namespace webrtc diff --git a/common_audio/wav_header.h b/common_audio/wav_header.h index 0c83d8d84d..2cccd7d34b 100644 --- a/common_audio/wav_header.h +++ b/common_audio/wav_header.h @@ -13,52 +13,77 @@ #include #include +#include + +#include "rtc_base/checks.h" namespace webrtc { -static const size_t kWavHeaderSize = 44; - -class ReadableWav { +// Interface providing header reading functionality. +class WavHeaderReader { public: // Returns the number of bytes read. virtual size_t Read(void* buf, size_t num_bytes) = 0; virtual bool SeekForward(uint32_t num_bytes) = 0; - virtual ~ReadableWav() = default; + virtual ~WavHeaderReader() = default; + virtual int64_t GetPosition() = 0; }; -enum WavFormat { - kWavFormatPcm = 1, // PCM, each sample of size bytes_per_sample - kWavFormatALaw = 6, // 8-bit ITU-T G.711 A-law - kWavFormatMuLaw = 7, // 8-bit ITU-T G.711 mu-law +// Possible WAV formats. +enum class WavFormat { + kWavFormatPcm = 1, // PCM, each sample of size bytes_per_sample. + kWavFormatIeeeFloat = 3, // IEEE float. + kWavFormatALaw = 6, // 8-bit ITU-T G.711 A-law. + kWavFormatMuLaw = 7, // 8-bit ITU-T G.711 mu-law. }; +// Header sizes for supported WAV formats. +constexpr size_t kPcmWavHeaderSize = 44; +constexpr size_t kIeeeFloatWavHeaderSize = 58; + +// Returns the size of the WAV header for the specified format. +constexpr size_t WavHeaderSize(WavFormat format) { + if (format == WavFormat::kWavFormatPcm) { + return kPcmWavHeaderSize; + } + RTC_CHECK_EQ(format, WavFormat::kWavFormatIeeeFloat); + return kIeeeFloatWavHeaderSize; +} + +// Returns the maximum size of the supported WAV formats. +constexpr size_t MaxWavHeaderSize() { + return std::max(WavHeaderSize(WavFormat::kWavFormatPcm), + WavHeaderSize(WavFormat::kWavFormatIeeeFloat)); +} + // Return true if the given parameters will make a well-formed WAV header. bool CheckWavParameters(size_t num_channels, int sample_rate, WavFormat format, - size_t bytes_per_sample, size_t num_samples); // Write a kWavHeaderSize bytes long WAV header to buf. The payload that // follows the header is supposed to have the specified number of interleaved // channels and contain the specified total number of samples of the specified -// type. CHECKs the input parameters for validity. -void WriteWavHeader(uint8_t* buf, - size_t num_channels, +// type. The size of the header is returned in header_size. CHECKs the input +// parameters for validity. +void WriteWavHeader(size_t num_channels, int sample_rate, WavFormat format, - size_t bytes_per_sample, - size_t num_samples); + size_t num_samples, + uint8_t* buf, + size_t* header_size); -// Read a WAV header from an implemented ReadableWav and parse the values into -// the provided output parameters. ReadableWav is used because the header can -// be variably sized. Returns false if the header is invalid. -bool ReadWavHeader(ReadableWav* readable, +// Read a WAV header from an implemented WavHeaderReader and parse the values +// into the provided output parameters. WavHeaderReader is used because the +// header can be variably sized. Returns false if the header is invalid. +bool ReadWavHeader(WavHeaderReader* readable, size_t* num_channels, int* sample_rate, WavFormat* format, size_t* bytes_per_sample, - size_t* num_samples); + size_t* num_samples, + int64_t* data_start_pos); } // namespace webrtc diff --git a/common_audio/wav_header_unittest.cc b/common_audio/wav_header_unittest.cc index 0dc3300c73..95721dac65 100644 --- a/common_audio/wav_header_unittest.cc +++ b/common_audio/wav_header_unittest.cc @@ -19,16 +19,16 @@ namespace webrtc { // Doesn't take ownership of the buffer. -class ReadableWavBuffer : public ReadableWav { +class WavHeaderBufferReader : public WavHeaderReader { public: - ReadableWavBuffer(const uint8_t* buf, size_t size, bool check_read_size) + WavHeaderBufferReader(const uint8_t* buf, size_t size, bool check_read_size) : buf_(buf), size_(size), pos_(0), buf_exhausted_(false), check_read_size_(check_read_size) {} - ~ReadableWavBuffer() override { + ~WavHeaderBufferReader() override { // Verify the entire buffer has been read. if (check_read_size_) EXPECT_EQ(size_, pos_); @@ -52,7 +52,7 @@ class ReadableWavBuffer : public ReadableWav { bool SeekForward(uint32_t num_bytes) override { // Verify we don't try to read outside of a properly sized header. - if (size_ >= kWavHeaderSize) + if (size_ >= kPcmWavHeaderSize) EXPECT_GE(size_, pos_ + num_bytes); EXPECT_FALSE(buf_exhausted_); @@ -69,6 +69,8 @@ class ReadableWavBuffer : public ReadableWav { return true; } + int64_t GetPosition() override { return pos_; } + private: const uint8_t* buf_; const size_t size_; @@ -81,34 +83,28 @@ class ReadableWavBuffer : public ReadableWav { // ones are accepted and the bad ones rejected. TEST(WavHeaderTest, CheckWavParameters) { // Try some really stupid values for one parameter at a time. - EXPECT_TRUE(CheckWavParameters(1, 8000, kWavFormatPcm, 1, 0)); - EXPECT_FALSE(CheckWavParameters(0, 8000, kWavFormatPcm, 1, 0)); - EXPECT_FALSE(CheckWavParameters(0x10000, 8000, kWavFormatPcm, 1, 0)); - EXPECT_FALSE(CheckWavParameters(1, 0, kWavFormatPcm, 1, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, WavFormat(0), 1, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, kWavFormatPcm, 0, 0)); - - // Try invalid format/bytes-per-sample combinations. - EXPECT_TRUE(CheckWavParameters(1, 8000, kWavFormatPcm, 2, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, kWavFormatPcm, 4, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, kWavFormatALaw, 2, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, kWavFormatMuLaw, 2, 0)); + EXPECT_TRUE(CheckWavParameters(1, 8000, WavFormat::kWavFormatPcm, 0)); + EXPECT_FALSE(CheckWavParameters(0, 8000, WavFormat::kWavFormatPcm, 0)); + EXPECT_FALSE(CheckWavParameters(0x10000, 8000, WavFormat::kWavFormatPcm, 0)); + EXPECT_FALSE(CheckWavParameters(1, 0, WavFormat::kWavFormatPcm, 0)); // Too large values. - EXPECT_FALSE(CheckWavParameters(1 << 20, 1 << 20, kWavFormatPcm, 1, 0)); - EXPECT_FALSE(CheckWavParameters(1, 8000, kWavFormatPcm, 1, + EXPECT_FALSE( + CheckWavParameters(1 << 20, 1 << 20, WavFormat::kWavFormatPcm, 0)); + EXPECT_FALSE(CheckWavParameters(1, 8000, WavFormat::kWavFormatPcm, std::numeric_limits::max())); // Not the same number of samples for each channel. - EXPECT_FALSE(CheckWavParameters(3, 8000, kWavFormatPcm, 1, 5)); + EXPECT_FALSE(CheckWavParameters(3, 8000, WavFormat::kWavFormatPcm, 5)); } TEST(WavHeaderTest, ReadWavHeaderWithErrors) { size_t num_channels = 0; int sample_rate = 0; - WavFormat format = kWavFormatPcm; + WavFormat format = WavFormat::kWavFormatPcm; size_t bytes_per_sample = 0; size_t num_samples = 0; + int64_t data_start_pos = 0; // Test a few ways the header can be invalid. We start with the valid header // used in WriteAndReadWavHeader, and invalidate one field per test. The @@ -123,7 +119,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 @@ -133,10 +129,11 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 // clang-format on }; - ReadableWavBuffer r(kBadRiffID, sizeof(kBadRiffID), - /*check_read_size=*/false); + WavHeaderBufferReader r(kBadRiffID, sizeof(kBadRiffID), + /*check_read_size=*/false); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kBadBitsPerSample[] = { @@ -147,7 +144,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 @@ -157,10 +154,11 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 // clang-format on }; - ReadableWavBuffer r(kBadBitsPerSample, sizeof(kBadBitsPerSample), - /*check_read_size=*/true); + WavHeaderBufferReader r(kBadBitsPerSample, sizeof(kBadBitsPerSample), + /*check_read_size=*/true); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kBadByteRate[] = { @@ -171,7 +169,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0x00, 0x33, 0x03, 0, // byte rate: *BAD* @@ -181,10 +179,11 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 // clang-format on }; - ReadableWavBuffer r(kBadByteRate, sizeof(kBadByteRate), - /*check_read_size=*/true); + WavHeaderBufferReader r(kBadByteRate, sizeof(kBadByteRate), + /*check_read_size=*/true); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kBadFmtHeaderSize[] = { @@ -195,7 +194,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 17, 0, 0, 0, // size of fmt block *BAD*. Only 16 and 18 permitted. - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 @@ -206,10 +205,11 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 // clang-format on }; - ReadableWavBuffer r(kBadFmtHeaderSize, sizeof(kBadFmtHeaderSize), - /*check_read_size=*/false); + WavHeaderBufferReader r(kBadFmtHeaderSize, sizeof(kBadFmtHeaderSize), + /*check_read_size=*/false); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kNonZeroExtensionField[] = { @@ -220,7 +220,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 18, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 @@ -231,10 +231,12 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 // clang-format on }; - ReadableWavBuffer r(kNonZeroExtensionField, sizeof(kNonZeroExtensionField), - /*check_read_size=*/false); + WavHeaderBufferReader r(kNonZeroExtensionField, + sizeof(kNonZeroExtensionField), + /*check_read_size=*/false); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kMissingDataChunk[] = { @@ -245,7 +247,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 @@ -253,10 +255,11 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 8, 0, // bits per sample: 1 * 8 // clang-format on }; - ReadableWavBuffer r(kMissingDataChunk, sizeof(kMissingDataChunk), - /*check_read_size=*/true); + WavHeaderBufferReader r(kMissingDataChunk, sizeof(kMissingDataChunk), + /*check_read_size=*/true); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } { constexpr uint8_t kMissingFmtAndDataChunks[] = { @@ -267,37 +270,40 @@ TEST(WavHeaderTest, ReadWavHeaderWithErrors) { 'W', 'A', 'V', 'E', // clang-format on }; - ReadableWavBuffer r(kMissingFmtAndDataChunks, - sizeof(kMissingFmtAndDataChunks), - /*check_read_size=*/true); + WavHeaderBufferReader r(kMissingFmtAndDataChunks, + sizeof(kMissingFmtAndDataChunks), + /*check_read_size=*/true); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, + &data_start_pos)); } } // Try writing and reading a valid WAV header and make sure it looks OK. TEST(WavHeaderTest, WriteAndReadWavHeader) { - constexpr int kSize = 4 + kWavHeaderSize + 4; + constexpr int kSize = 4 + kPcmWavHeaderSize + 4; uint8_t buf[kSize]; + size_t header_size; memset(buf, 0xa4, sizeof(buf)); - WriteWavHeader(buf + 4, 17, 12345, kWavFormatALaw, 1, 123457689); + WriteWavHeader(17, 12345, WavFormat::kWavFormatPcm, 123457689, buf + 4, + &header_size); constexpr uint8_t kExpectedBuf[] = { // clang-format off - // clang formatting doesn't respect inline comments. + // clang formatting doesn't respect inline comments. 0xa4, 0xa4, 0xa4, 0xa4, // untouched bytes before header 'R', 'I', 'F', 'F', - 0xbd, 0xd0, 0x5b, 0x07, // size of whole file - 8: 123457689 + 44 - 8 + 0x56, 0xa1, 0xb7, 0x0e, // size of whole file - 8: 123457689 + 44 - 8 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // size of fmt block - 8: 24 - 8 - 6, 0, // format: A-law (6) + 1, 0, // format: PCM (1) 17, 0, // channels: 17 0x39, 0x30, 0, 0, // sample rate: 12345 - 0xc9, 0x33, 0x03, 0, // byte rate: 1 * 17 * 12345 - 17, 0, // block align: NumChannels * BytesPerSample - 8, 0, // bits per sample: 1 * 8 + 0x92, 0x67, 0x06, 0, // byte rate: 2 * 17 * 12345 + 34, 0, // block align: NumChannels * BytesPerSample + 16, 0, // bits per sample: 2 * 8 'd', 'a', 't', 'a', - 0x99, 0xd0, 0x5b, 0x07, // size of payload: 123457689 + 0x32, 0xa1, 0xb7, 0x0e, // size of payload: 2 * 123457689 0xa4, 0xa4, 0xa4, 0xa4, // untouched bytes after header // clang-format on }; @@ -306,17 +312,18 @@ TEST(WavHeaderTest, WriteAndReadWavHeader) { size_t num_channels = 0; int sample_rate = 0; - WavFormat format = kWavFormatPcm; + WavFormat format = WavFormat::kWavFormatPcm; size_t bytes_per_sample = 0; size_t num_samples = 0; - ReadableWavBuffer r(buf + 4, sizeof(buf) - 8, - /*check_read_size=*/true); + int64_t data_start_pos = 0; + WavHeaderBufferReader r(buf + 4, sizeof(buf) - 8, + /*check_read_size=*/true); EXPECT_TRUE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, &data_start_pos)); EXPECT_EQ(17u, num_channels); EXPECT_EQ(12345, sample_rate); - EXPECT_EQ(kWavFormatALaw, format); - EXPECT_EQ(1u, bytes_per_sample); + EXPECT_EQ(WavFormat::kWavFormatPcm, format); + EXPECT_EQ(2u, bytes_per_sample); EXPECT_EQ(123457689u, num_samples); } @@ -332,7 +339,7 @@ TEST(WavHeaderTest, ReadAtypicalWavHeader) { 'f', 'm', 't', ' ', 18, 0, 0, 0, // Size of fmt block (with an atypical extension // size field). - 6, 0, // Format: A-law (6). + 1, 0, // Format: PCM (1). 17, 0, // Channels: 17. 0x39, 0x30, 0, 0, // Sample rate: 12345. 0xc9, 0x33, 0x03, 0, // Byte rate: 1 * 17 * 12345. @@ -346,15 +353,16 @@ TEST(WavHeaderTest, ReadAtypicalWavHeader) { size_t num_channels = 0; int sample_rate = 0; - WavFormat format = kWavFormatPcm; + WavFormat format = WavFormat::kWavFormatPcm; size_t bytes_per_sample = 0; size_t num_samples = 0; - ReadableWavBuffer r(kBuf, sizeof(kBuf), /*check_read_size=*/true); + int64_t data_start_pos = 0; + WavHeaderBufferReader r(kBuf, sizeof(kBuf), /*check_read_size=*/true); EXPECT_TRUE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, &data_start_pos)); EXPECT_EQ(17u, num_channels); EXPECT_EQ(12345, sample_rate); - EXPECT_EQ(kWavFormatALaw, format); + EXPECT_EQ(WavFormat::kWavFormatPcm, format); EXPECT_EQ(1u, bytes_per_sample); EXPECT_EQ(123457689u, num_samples); } @@ -372,7 +380,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithOptionalChunk) { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 16, 0, 0, 0, // Size of fmt block. - 6, 0, // Format: A-law (6). + 1, 0, // Format: PCM (1). 17, 0, // Channels: 17. 0x39, 0x30, 0, 0, // Sample rate: 12345. 0xc9, 0x33, 0x03, 0, // Byte rate: 1 * 17 * 12345. @@ -388,15 +396,16 @@ TEST(WavHeaderTest, ReadWavHeaderWithOptionalChunk) { size_t num_channels = 0; int sample_rate = 0; - WavFormat format = kWavFormatPcm; + WavFormat format = WavFormat::kWavFormatPcm; size_t bytes_per_sample = 0; size_t num_samples = 0; - ReadableWavBuffer r(kBuf, sizeof(kBuf), /*check_read_size=*/true); + int64_t data_start_pos = 0; + WavHeaderBufferReader r(kBuf, sizeof(kBuf), /*check_read_size=*/true); EXPECT_TRUE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, &data_start_pos)); EXPECT_EQ(17u, num_channels); EXPECT_EQ(12345, sample_rate); - EXPECT_EQ(kWavFormatALaw, format); + EXPECT_EQ(WavFormat::kWavFormatPcm, format); EXPECT_EQ(1u, bytes_per_sample); EXPECT_EQ(123457689u, num_samples); } @@ -415,7 +424,7 @@ TEST(WavHeaderTest, ReadWavHeaderWithDataBeforeFormat) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Data 16 bytes. 'f', 'm', 't', ' ', 16, 0, 0, 0, // Size of fmt block. - 6, 0, // Format: A-law (6). + 1, 0, // Format: Pcm (1). 1, 0, // Channels: 1. 60, 0, 0, 0, // Sample rate: 60. 60, 0, 0, 0, // Byte rate: 1 * 1 * 60. @@ -426,12 +435,13 @@ TEST(WavHeaderTest, ReadWavHeaderWithDataBeforeFormat) { size_t num_channels = 0; int sample_rate = 0; - WavFormat format = kWavFormatPcm; + WavFormat format = WavFormat::kWavFormatPcm; size_t bytes_per_sample = 0; size_t num_samples = 0; - ReadableWavBuffer r(kBuf, sizeof(kBuf), /*check_read_size=*/false); + int64_t data_start_pos = 0; + WavHeaderBufferReader r(kBuf, sizeof(kBuf), /*check_read_size=*/false); EXPECT_FALSE(ReadWavHeader(&r, &num_channels, &sample_rate, &format, - &bytes_per_sample, &num_samples)); + &bytes_per_sample, &num_samples, &data_start_pos)); } } // namespace webrtc diff --git a/modules/audio_processing/logging/apm_data_dumper.cc b/modules/audio_processing/logging/apm_data_dumper.cc index 6d84750d74..917df60c9c 100644 --- a/modules/audio_processing/logging/apm_data_dumper.cc +++ b/modules/audio_processing/logging/apm_data_dumper.cc @@ -76,12 +76,14 @@ FILE* ApmDataDumper::GetRawFile(const char* name) { WavWriter* ApmDataDumper::GetWavFile(const char* name, int sample_rate_hz, - int num_channels) { + int num_channels, + WavFile::SampleFormat format) { std::string filename = FormFileName(output_dir_, name, instance_index_, recording_set_index_, ".wav"); auto& f = wav_files_[filename]; if (!f) { - f.reset(new WavWriter(filename.c_str(), sample_rate_hz, num_channels)); + f.reset( + new WavWriter(filename.c_str(), sample_rate_hz, num_channels, format)); } return f.get(); } diff --git a/modules/audio_processing/logging/apm_data_dumper.h b/modules/audio_processing/logging/apm_data_dumper.h index 92adf86492..17a5c87716 100644 --- a/modules/audio_processing/logging/apm_data_dumper.h +++ b/modules/audio_processing/logging/apm_data_dumper.h @@ -242,7 +242,8 @@ class ApmDataDumper { int num_channels) { #if WEBRTC_APM_DEBUG_DUMP == 1 if (recording_activated_) { - WavWriter* file = GetWavFile(name, sample_rate_hz, num_channels); + WavWriter* file = GetWavFile(name, sample_rate_hz, num_channels, + WavFile::SampleFormat::kFloat); file->WriteSamples(v, v_length); } #endif @@ -271,7 +272,10 @@ class ApmDataDumper { std::unordered_map> wav_files_; FILE* GetRawFile(const char* name); - WavWriter* GetWavFile(const char* name, int sample_rate_hz, int num_channels); + WavWriter* GetWavFile(const char* name, + int sample_rate_hz, + int num_channels, + WavFile::SampleFormat format); #endif RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ApmDataDumper); }; diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index 84cd9a08b8..0201983f8c 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -348,7 +348,8 @@ void AudioProcessingSimulator::SetupOutput() { std::unique_ptr out_file( new WavWriter(filename, out_config_.sample_rate_hz(), - static_cast(out_config_.num_channels()))); + static_cast(out_config_.num_channels()), + settings_.wav_output_format)); buffer_file_writer_.reset(new ChannelBufferWavWriter(std::move(out_file))); } else if (settings_.aec_dump_input_string.has_value()) { buffer_memory_writer_ = std::make_unique( @@ -365,7 +366,8 @@ void AudioProcessingSimulator::SetupOutput() { } linear_aec_output_file_writer_.reset( - new WavWriter(filename, 16000, out_config_.num_channels())); + new WavWriter(filename, 16000, out_config_.num_channels(), + settings_.wav_output_format)); linear_aec_output_buf_.resize(out_config_.num_channels()); } @@ -381,7 +383,8 @@ void AudioProcessingSimulator::SetupOutput() { std::unique_ptr reverse_out_file( new WavWriter(filename, reverse_out_config_.sample_rate_hz(), - static_cast(reverse_out_config_.num_channels()))); + static_cast(reverse_out_config_.num_channels()), + settings_.wav_output_format)); reverse_buffer_file_writer_.reset( new ChannelBufferWavWriter(std::move(reverse_out_file))); } diff --git a/modules/audio_processing/test/audio_processing_simulator.h b/modules/audio_processing/test/audio_processing_simulator.h index c28dd6d9be..affb644cc5 100644 --- a/modules/audio_processing/test/audio_processing_simulator.h +++ b/modules/audio_processing/test/audio_processing_simulator.h @@ -93,6 +93,7 @@ struct SimulationSettings { bool store_intermediate_output = false; bool print_aec_parameter_values = false; bool dump_internal_data = false; + WavFile::SampleFormat wav_output_format = WavFile::SampleFormat::kInt16; absl::optional dump_internal_data_output_dir; absl::optional call_order_input_filename; absl::optional call_order_output_filename; diff --git a/modules/audio_processing/test/audioproc_float_impl.cc b/modules/audio_processing/test/audioproc_float_impl.cc index ec637c1dcb..0c08349e25 100644 --- a/modules/audio_processing/test/audioproc_float_impl.cc +++ b/modules/audio_processing/test/audioproc_float_impl.cc @@ -255,6 +255,10 @@ ABSL_FLAG(std::string, dump_data_output_dir, "", "Internal data dump output directory"); +ABSL_FLAG(bool, + float_wav_output, + false, + "Produce floating point wav output files."); namespace webrtc { namespace test { @@ -437,6 +441,9 @@ SimulationSettings CreateSettings() { settings.dump_internal_data = absl::GetFlag(FLAGS_dump_data); SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_data_output_dir), &settings.dump_internal_data_output_dir); + settings.wav_output_format = absl::GetFlag(FLAGS_float_wav_output) + ? WavFile::SampleFormat::kFloat + : WavFile::SampleFormat::kInt16; return settings; }