opus: add helper function to extract voice activity information
BUG=webrtc:11643 Change-Id: I3cebc40916de0e4b0f5e41f5fda97dd53f76e4e3 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176740 Commit-Queue: Philipp Hancke <philipp.hancke@googlemail.com> Reviewed-by: Minyue Li <minyue@webrtc.org> Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/master@{#31490}
This commit is contained in:
parent
0ca13d97d2
commit
0fd1ef135c
@ -678,33 +678,7 @@ int WebRtcOpus_FecDurationEst(const uint8_t* payload,
|
||||
return samples;
|
||||
}
|
||||
|
||||
// This method is based on Definition of the Opus Audio Codec
|
||||
// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on
|
||||
// parsing the LP layer of an Opus packet, particularly the LBRR flag.
|
||||
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload == NULL || payload_length_bytes == 0)
|
||||
return 0;
|
||||
|
||||
// In CELT_ONLY mode, packets should not have FEC.
|
||||
if (payload[0] & 0x80)
|
||||
return 0;
|
||||
|
||||
// Max number of frames in an Opus packet is 48.
|
||||
opus_int16 frame_sizes[48];
|
||||
const unsigned char* frame_data[48];
|
||||
|
||||
// Parse packet to get the frames. But we only care about the first frame,
|
||||
// since we can only decode the FEC from the first one.
|
||||
if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
|
||||
NULL, frame_data, frame_sizes, NULL) < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (frame_sizes[0] <= 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcOpus_NumSilkFrames(const uint8_t* payload) {
|
||||
// For computing the payload length in ms, the sample rate is not important
|
||||
// since it cancels out. We use 48 kHz, but any valid sample rate would work.
|
||||
int payload_length_ms =
|
||||
@ -727,10 +701,43 @@ int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
default:
|
||||
return 0; // It is actually even an invalid packet.
|
||||
}
|
||||
return silk_frames;
|
||||
}
|
||||
|
||||
// This method is based on Definition of the Opus Audio Codec
|
||||
// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on
|
||||
// parsing the LP layer of an Opus packet, particularly the LBRR flag.
|
||||
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload == NULL || payload_length_bytes == 0)
|
||||
return 0;
|
||||
|
||||
// In CELT_ONLY mode, packets should not have FEC.
|
||||
if (payload[0] & 0x80)
|
||||
return 0;
|
||||
|
||||
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
|
||||
if (silk_frames == 0)
|
||||
return 0; // Not valid.
|
||||
|
||||
const int channels = opus_packet_get_nb_channels(payload);
|
||||
RTC_DCHECK(channels == 1 || channels == 2);
|
||||
|
||||
// Max number of frames in an Opus packet is 48.
|
||||
opus_int16 frame_sizes[48];
|
||||
const unsigned char* frame_data[48];
|
||||
|
||||
// Parse packet to get the frames. But we only care about the first frame,
|
||||
// since we can only decode the FEC from the first one.
|
||||
if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
|
||||
NULL, frame_data, frame_sizes, NULL) < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (frame_sizes[0] < 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// A frame starts with the LP layer. The LP layer begins with two to eight
|
||||
// header bits.These consist of one VAD bit per SILK frame (up to 3),
|
||||
// followed by a single flag indicating the presence of LBRR frames.
|
||||
@ -748,3 +755,45 @@ int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload == NULL || payload_length_bytes == 0)
|
||||
return 0;
|
||||
|
||||
// In CELT_ONLY mode we can not determine whether there is VAD.
|
||||
if (payload[0] & 0x80)
|
||||
return -1;
|
||||
|
||||
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
|
||||
if (silk_frames == 0)
|
||||
return -1;
|
||||
|
||||
const int channels = opus_packet_get_nb_channels(payload);
|
||||
RTC_DCHECK(channels == 1 || channels == 2);
|
||||
|
||||
// Max number of frames in an Opus packet is 48.
|
||||
opus_int16 frame_sizes[48];
|
||||
const unsigned char* frame_data[48];
|
||||
|
||||
// Parse packet to get the frames.
|
||||
int frames =
|
||||
opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
|
||||
NULL, frame_data, frame_sizes, NULL);
|
||||
if (frames < 0)
|
||||
return -1;
|
||||
|
||||
// Iterate over all Opus frames which may contain multiple SILK frames.
|
||||
for (int frame = 0; frame < frames; frame++) {
|
||||
if (frame_sizes[frame] < 1) {
|
||||
continue;
|
||||
}
|
||||
if (frame_data[frame][0] >> (8 - silk_frames))
|
||||
return 1;
|
||||
if (channels == 2 &&
|
||||
(frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -510,6 +510,22 @@ int WebRtcOpus_FecDurationEst(const uint8_t* payload,
|
||||
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
size_t payload_length_bytes);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_PacketHasVoiceActivity(...)
|
||||
*
|
||||
* This function returns the SILK VAD information encoded in the opus packet.
|
||||
* For CELT-only packets that do not have VAD information, it returns -1.
|
||||
* Input:
|
||||
* - payload : Encoded data pointer
|
||||
* - payload_length_bytes : Bytes of encoded data
|
||||
*
|
||||
* Return value : 0 - no frame had the VAD flag set.
|
||||
* 1 - at least one frame had the VAD flag set.
|
||||
* -1 - VAD status could not be determined.
|
||||
*/
|
||||
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
|
||||
size_t payload_length_bytes);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
@ -949,4 +949,30 @@ TEST_P(OpusTest, OpusDecodeRepacketized) {
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, CeltUnknownStatus) {
|
||||
const uint8_t celt[] = {0x80};
|
||||
EXPECT_EQ(WebRtcOpus_PacketHasVoiceActivity(celt, 1), -1);
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Mono20msVadSet) {
|
||||
uint8_t silk20msMonoVad[] = {0x78, 0x80};
|
||||
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoVad, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Mono20MsVadUnset) {
|
||||
uint8_t silk20msMonoSilence[] = {0x78, 0x00};
|
||||
EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoSilence, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Stereo20MsVadOnSideChannel) {
|
||||
uint8_t silk20msStereoVadSideChannel[] = {0x78 | 0x04, 0x20};
|
||||
EXPECT_TRUE(
|
||||
WebRtcOpus_PacketHasVoiceActivity(silk20msStereoVadSideChannel, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) {
|
||||
uint8_t twoMonoFrames[] = {0x78 | 0x1, 0x00, 0x80};
|
||||
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user