Remove use of AcmReceiver in ChannelReceive

ChannelReceive is now owning and interfacing with NetEq directly. A new ResamplerHelper is added to acm_resampler.cc/.h, to do the audio resampling that was previously done inside AcmReceiver. AcmReceiver still remains, since it is used in other places for now. Bug: webrtc:14867 Change-Id: If3eb6415e06b9b5e729d393713f3fccb31b0570f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/361820 Auto-Submit: Henrik Lundin <henrik.lundin@webrtc.org> Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org> Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42974}
2024-09-06 11:53:14 +00:00 · 2024-09-06 11:53:14 +00:00 · c9aaf11985
commit c9aaf11985
parent 3ad2c8d717
6 changed files with 230 additions and 117 deletions
--- a/audio/BUILD.gn
+++ b/audio/BUILD.gn
@ -81,6 +81,7 @@ rtc_library("audio") {
    "../modules/audio_coding:audio_coding_module_typedefs",
    "../modules/audio_coding:audio_encoder_cng",
    "../modules/audio_coding:audio_network_adaptor_config",
+    "../modules/audio_coding:default_neteq_factory",
    "../modules/audio_coding:red",
    "../modules/audio_device",
    "../modules/audio_processing",
--- a/audio/channel_receive.cc
+++ b/audio/channel_receive.cc
@ -20,6 +20,7 @@
 #include "api/audio/audio_device.h"
 #include "api/crypto/frame_decryptor_interface.h"
 #include "api/frame_transformer_interface.h"
+#include "api/neteq/neteq.h"
 #include "api/rtc_event_log/rtc_event_log.h"
 #include "api/sequence_checker.h"
 #include "api/task_queue/pending_task_safety_flag.h"
@ -32,8 +33,10 @@
 #include "audio/utility/audio_frame_operations.h"
 #include "logging/rtc_event_log/events/rtc_event_audio_playout.h"
 #include "logging/rtc_event_log/events/rtc_event_neteq_set_minimum_delay.h"
-#include "modules/audio_coding/acm2/acm_receiver.h"
+#include "modules/audio_coding/acm2/acm_resampler.h"
+#include "modules/audio_coding/acm2/call_statistics.h"
 #include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h"
+#include "modules/audio_coding/neteq/default_neteq_factory.h"
 #include "modules/pacing/packet_router.h"
 #include "modules/rtp_rtcp/include/receive_statistics.h"
 #include "modules/rtp_rtcp/include/remote_ntp_time_estimator.h"
@ -67,23 +70,24 @@ constexpr double kAudioSampleDurationSeconds = 0.01;
 constexpr int kVoiceEngineMinMinPlayoutDelayMs = 0;
 constexpr int kVoiceEngineMaxMinPlayoutDelayMs = 10000;

-acm2::AcmReceiver::Config AcmConfig(
+std::unique_ptr<NetEq> CreateNetEq(
    NetEqFactory* neteq_factory,
-    rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
    std::optional<AudioCodecPairId> codec_pair_id,
    size_t jitter_buffer_max_packets,
    bool jitter_buffer_fast_playout,
-    int jitter_buffer_min_delay_ms) {
-  acm2::AcmReceiver::Config acm_config;
-  acm_config.neteq_factory = neteq_factory;
-  acm_config.decoder_factory = decoder_factory;
-  acm_config.neteq_config.codec_pair_id = codec_pair_id;
-  acm_config.neteq_config.max_packets_in_buffer = jitter_buffer_max_packets;
-  acm_config.neteq_config.enable_fast_accelerate = jitter_buffer_fast_playout;
-  acm_config.neteq_config.enable_muted_state = true;
-  acm_config.neteq_config.min_delay_ms = jitter_buffer_min_delay_ms;
-
-  return acm_config;
+    int jitter_buffer_min_delay_ms,
+    const Environment& env,
+    scoped_refptr<AudioDecoderFactory> decoder_factory) {
+  NetEq::Config config;
+  config.codec_pair_id = codec_pair_id;
+  config.max_packets_in_buffer = jitter_buffer_max_packets;
+  config.enable_fast_accelerate = jitter_buffer_fast_playout;
+  config.enable_muted_state = true;
+  config.min_delay_ms = jitter_buffer_min_delay_ms;
+  if (neteq_factory) {
+    return neteq_factory->Create(env, config, std::move(decoder_factory));
+  }
+  return DefaultNetEqFactory().Create(env, config, std::move(decoder_factory));
 }

 class ChannelReceive : public ChannelReceiveInterface,
@ -231,6 +235,7 @@ class ChannelReceive : public ChannelReceiveInterface,
  rtc::RaceChecker audio_thread_race_checker_;
  Mutex callback_mutex_;
  Mutex volume_settings_mutex_;
+  mutable Mutex call_stats_mutex_;

  bool playing_ RTC_GUARDED_BY(worker_thread_checker_) = false;

@ -249,8 +254,10 @@ class ChannelReceive : public ChannelReceiveInterface,
  std::optional<int64_t> last_received_rtp_system_time_ms_
      RTC_GUARDED_BY(&worker_thread_checker_);

-  // The AcmReceiver is thread safe, using its own lock.
-  acm2::AcmReceiver acm_receiver_;
+  const std::unique_ptr<NetEq> neteq_;  // NetEq is thread-safe; no lock needed.
+  acm2::ResamplerHelper resampler_helper_
+      RTC_GUARDED_BY(audio_thread_race_checker_);
+  acm2::CallStatistics call_stats_ RTC_GUARDED_BY(call_stats_mutex_);
  AudioSinkInterface* audio_sink_ = nullptr;
  AudioLevel _outputAudioLevel;

@ -340,17 +347,19 @@ void ChannelReceive::OnReceivedPayloadData(
    return;
  }

-  // Push the incoming payload (parsed and ready for decoding) into the ACM
-  if (acm_receiver_.InsertPacket(rtpHeader, payload, receive_time) != 0) {
+  // Push the incoming payload (parsed and ready for decoding) into NetEq.
+  if (payload.empty()) {
+    neteq_->InsertEmptyPacket(rtpHeader);
+  } else if (neteq_->InsertPacket(rtpHeader, payload, receive_time) < 0) {
    RTC_DLOG(LS_ERROR) << "ChannelReceive::OnReceivedPayloadData() unable to "
-                          "push data to the ACM";
+                          "insert packet into NetEq; PT = "
+                       << static_cast<int>(rtpHeader.payloadType);
    return;
  }

  TimeDelta round_trip_time = rtp_rtcp_->LastRtt().value_or(TimeDelta::Zero());

-  std::vector<uint16_t> nack_list =
-      acm_receiver_.GetNackList(round_trip_time.ms());
+  std::vector<uint16_t> nack_list = neteq_->GetNackList(round_trip_time.ms());
  if (!nack_list.empty()) {
    // Can't use nack_list.data() since it's not supported by all
    // compilers.
@ -390,8 +399,8 @@ AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo(

  env_.event_log().Log(std::make_unique<RtcEventAudioPlayout>(remote_ssrc_));

-  // Get 10ms raw PCM data from the ACM (mixer limits output frequency)
-  if (acm_receiver_.GetAudio(audio_frame->sample_rate_hz_, audio_frame) == -1) {
+  if ((neteq_->GetAudio(audio_frame) != NetEq::kOK) ||
+      !resampler_helper_.MaybeResample(sample_rate_hz, audio_frame)) {
    RTC_DLOG(LS_ERROR)
        << "ChannelReceive::GetAudioFrame() PlayoutData10Ms() failed!";
    // In all likelihood, the audio in this frame is garbage. We return an
@ -404,6 +413,11 @@ AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo(
    return AudioMixer::Source::AudioFrameInfo::kError;
  }

+  {
+    MutexLock lock(&call_stats_mutex_);
+    call_stats_.DecodedByNetEq(audio_frame->speech_type_, audio_frame->muted());
+  }
+
  {
    // Pass the audio buffers to an optional sink callback, before applying
    // scaling/panning, as that applies to the mix operation.
@ -497,8 +511,8 @@ AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo(
    worker_thread_->PostTask(SafeTask(worker_safety_.flag(), [this]() {
      RTC_DCHECK_RUN_ON(&worker_thread_checker_);
      RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.TargetJitterBufferDelayMs",
-                                acm_receiver_.TargetDelayMs());
-      const int jitter_buffer_delay = acm_receiver_.FilteredCurrentDelayMs();
+                                neteq_->TargetDelayMs());
+      const int jitter_buffer_delay = neteq_->FilteredCurrentDelayMs();
      RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverDelayEstimateMs",
                                jitter_buffer_delay + playout_delay_ms_);
      RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverJitterBufferDelayMs",
@ -516,9 +530,12 @@ AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo(

 int ChannelReceive::PreferredSampleRate() const {
  RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_);
+  const std::optional<NetEq::DecoderFormat> decoder =
+      neteq_->GetCurrentDecoderFormat();
+  const int last_packet_sample_rate_hz = decoder ? decoder->sample_rate_hz : 0;
  // Return the bigger of playout and receive frequency in the ACM.
-  return std::max(acm_receiver_.last_packet_sample_rate_hz().value_or(0),
-                  acm_receiver_.last_output_sample_rate_hz());
+  return std::max(last_packet_sample_rate_hz,
+                  neteq_->last_output_sample_rate_hz());
 }

 ChannelReceive::ChannelReceive(
@ -542,13 +559,13 @@ ChannelReceive::ChannelReceive(
      rtp_receive_statistics_(ReceiveStatistics::Create(&env_.clock())),
      remote_ssrc_(remote_ssrc),
      source_tracker_(&env_.clock()),
-      acm_receiver_(env_,
-                    AcmConfig(neteq_factory,
-                              decoder_factory,
-                              codec_pair_id,
-                              jitter_buffer_max_packets,
-                              jitter_buffer_fast_playout,
-                              jitter_buffer_min_delay_ms)),
+      neteq_(CreateNetEq(neteq_factory,
+                         codec_pair_id,
+                         jitter_buffer_max_packets,
+                         jitter_buffer_fast_playout,
+                         jitter_buffer_min_delay_ms,
+                         env_,
+                         decoder_factory)),
      _outputAudioLevel(),
      ntp_estimator_(&env_.clock()),
      playout_timestamp_rtp_(0),
@ -610,13 +627,18 @@ void ChannelReceive::StopPlayout() {
  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
  playing_ = false;
  _outputAudioLevel.ResetLevelFullRange();
-  acm_receiver_.FlushBuffers();
+  neteq_->FlushBuffers();
 }

 std::optional<std::pair<int, SdpAudioFormat>> ChannelReceive::GetReceiveCodec()
    const {
  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
-  return acm_receiver_.LastDecoder();
+  std::optional<NetEq::DecoderFormat> decoder =
+      neteq_->GetCurrentDecoderFormat();
+  if (!decoder) {
+    return std::nullopt;
+  }
+  return std::make_pair(decoder->payload_type, decoder->sdp_format);
 }

 void ChannelReceive::SetReceiveCodecs(
@ -627,7 +649,7 @@ void ChannelReceive::SetReceiveCodecs(
    payload_type_frequencies_[kv.first] = kv.second.clockrate_hz;
  }
  payload_type_map_ = codecs;
-  acm_receiver_.SetCodecs(codecs);
+  neteq_->SetCodecs(codecs);
 }

 void ChannelReceive::OnRtpPacket(const RtpPacketReceived& packet) {
@ -877,11 +899,11 @@ void ChannelReceive::SetNACKStatus(bool enable, int max_packets) {
  // None of these functions can fail.
  if (enable) {
    rtp_receive_statistics_->SetMaxReorderingThreshold(max_packets);
-    acm_receiver_.EnableNack(max_packets);
+    neteq_->EnableNack(max_packets);
  } else {
    rtp_receive_statistics_->SetMaxReorderingThreshold(
        kDefaultMaxReorderingThreshold);
-    acm_receiver_.DisableNack();
+    neteq_->DisableNack();
  }
 }

@ -959,22 +981,83 @@ uint32_t ChannelReceive::GetLocalSsrc() const {
 NetworkStatistics ChannelReceive::GetNetworkStatistics(
    bool get_and_clear_legacy_stats) const {
  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
-  NetworkStatistics stats;
-  acm_receiver_.GetNetworkStatistics(&stats, get_and_clear_legacy_stats);
-  return stats;
+  NetworkStatistics acm_stat;
+  NetEqNetworkStatistics neteq_stat;
+  if (get_and_clear_legacy_stats) {
+    // NetEq function always returns zero, so we don't check the return value.
+    neteq_->NetworkStatistics(&neteq_stat);
+
+    acm_stat.currentExpandRate = neteq_stat.expand_rate;
+    acm_stat.currentSpeechExpandRate = neteq_stat.speech_expand_rate;
+    acm_stat.currentPreemptiveRate = neteq_stat.preemptive_rate;
+    acm_stat.currentAccelerateRate = neteq_stat.accelerate_rate;
+    acm_stat.currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
+    acm_stat.currentSecondaryDiscardedRate =
+        neteq_stat.secondary_discarded_rate;
+    acm_stat.meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
+    acm_stat.maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
+  } else {
+    neteq_stat = neteq_->CurrentNetworkStatistics();
+    acm_stat.currentExpandRate = 0;
+    acm_stat.currentSpeechExpandRate = 0;
+    acm_stat.currentPreemptiveRate = 0;
+    acm_stat.currentAccelerateRate = 0;
+    acm_stat.currentSecondaryDecodedRate = 0;
+    acm_stat.currentSecondaryDiscardedRate = 0;
+    acm_stat.meanWaitingTimeMs = -1;
+    acm_stat.maxWaitingTimeMs = 1;
+  }
+  acm_stat.currentBufferSize = neteq_stat.current_buffer_size_ms;
+  acm_stat.preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
+  acm_stat.jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
+
+  NetEqLifetimeStatistics neteq_lifetime_stat = neteq_->GetLifetimeStatistics();
+  acm_stat.totalSamplesReceived = neteq_lifetime_stat.total_samples_received;
+  acm_stat.concealedSamples = neteq_lifetime_stat.concealed_samples;
+  acm_stat.silentConcealedSamples =
+      neteq_lifetime_stat.silent_concealed_samples;
+  acm_stat.concealmentEvents = neteq_lifetime_stat.concealment_events;
+  acm_stat.jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms;
+  acm_stat.jitterBufferTargetDelayMs =
+      neteq_lifetime_stat.jitter_buffer_target_delay_ms;
+  acm_stat.jitterBufferMinimumDelayMs =
+      neteq_lifetime_stat.jitter_buffer_minimum_delay_ms;
+  acm_stat.jitterBufferEmittedCount =
+      neteq_lifetime_stat.jitter_buffer_emitted_count;
+  acm_stat.delayedPacketOutageSamples =
+      neteq_lifetime_stat.delayed_packet_outage_samples;
+  acm_stat.relativePacketArrivalDelayMs =
+      neteq_lifetime_stat.relative_packet_arrival_delay_ms;
+  acm_stat.interruptionCount = neteq_lifetime_stat.interruption_count;
+  acm_stat.totalInterruptionDurationMs =
+      neteq_lifetime_stat.total_interruption_duration_ms;
+  acm_stat.insertedSamplesForDeceleration =
+      neteq_lifetime_stat.inserted_samples_for_deceleration;
+  acm_stat.removedSamplesForAcceleration =
+      neteq_lifetime_stat.removed_samples_for_acceleration;
+  acm_stat.fecPacketsReceived = neteq_lifetime_stat.fec_packets_received;
+  acm_stat.fecPacketsDiscarded = neteq_lifetime_stat.fec_packets_discarded;
+  acm_stat.totalProcessingDelayUs =
+      neteq_lifetime_stat.total_processing_delay_us;
+  acm_stat.packetsDiscarded = neteq_lifetime_stat.packets_discarded;
+
+  NetEqOperationsAndState neteq_operations_and_state =
+      neteq_->GetOperationsAndState();
+  acm_stat.packetBufferFlushes =
+      neteq_operations_and_state.packet_buffer_flushes;
+  return acm_stat;
 }

 AudioDecodingCallStats ChannelReceive::GetDecodingCallStatistics() const {
  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
-  AudioDecodingCallStats stats;
-  acm_receiver_.GetDecodingCallStatistics(&stats);
-  return stats;
+  MutexLock lock(&call_stats_mutex_);
+  return call_stats_.GetDecodingStatistics();
 }

 uint32_t ChannelReceive::GetDelayEstimate() const {
  RTC_DCHECK_RUN_ON(&worker_thread_checker_);
  // Return the current jitter buffer delay + playout delay.
-  return acm_receiver_.FilteredCurrentDelayMs() + playout_delay_ms_;
+  return neteq_->FilteredCurrentDelayMs() + playout_delay_ms_;
 }

 bool ChannelReceive::SetMinimumPlayoutDelay(int delay_ms) {
@ -986,9 +1069,10 @@ bool ChannelReceive::SetMinimumPlayoutDelay(int delay_ms) {
  // close as possible, instead of failing.
  delay_ms = rtc::SafeClamp(delay_ms, kVoiceEngineMinMinPlayoutDelayMs,
                            kVoiceEngineMaxMinPlayoutDelayMs);
-  if (acm_receiver_.SetMinimumDelay(delay_ms) != 0) {
+  if (!neteq_->SetMinimumDelay(delay_ms)) {
    RTC_DLOG(LS_ERROR)
-        << "SetMinimumPlayoutDelay() failed to set min playout delay";
+        << "SetMinimumPlayoutDelay() failed to set min playout delay "
+        << delay_ms;
    return false;
  }
  return true;
@ -1024,11 +1108,11 @@ std::optional<int64_t> ChannelReceive::GetCurrentEstimatedPlayoutNtpTimestampMs(
 bool ChannelReceive::SetBaseMinimumPlayoutDelayMs(int delay_ms) {
  env_.event_log().Log(
      std::make_unique<RtcEventNetEqSetMinimumDelay>(remote_ssrc_, delay_ms));
-  return acm_receiver_.SetBaseMinimumDelayMs(delay_ms);
+  return neteq_->SetBaseMinimumDelayMs(delay_ms);
 }

 int ChannelReceive::GetBaseMinimumPlayoutDelayMs() const {
-  return acm_receiver_.GetBaseMinimumDelayMs();
+  return neteq_->GetBaseMinimumDelayMs();
 }

 std::optional<Syncable::Info> ChannelReceive::GetSyncInfo() const {
@ -1052,7 +1136,7 @@ std::optional<Syncable::Info> ChannelReceive::GetSyncInfo() const {
  info.latest_received_capture_timestamp = *last_received_rtp_timestamp_;
  info.latest_receive_time_ms = *last_received_rtp_system_time_ms_;

-  int jitter_buffer_delay = acm_receiver_.FilteredCurrentDelayMs();
+  int jitter_buffer_delay = neteq_->FilteredCurrentDelayMs();
  info.current_delay_ms = jitter_buffer_delay + playout_delay_ms_;

  return info;
@ -1063,7 +1147,7 @@ void ChannelReceive::UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms) {
  // TODO(bugs.webrtc.org/11993): Expect to be called exclusively on the
  // network thread. Once that's done, we won't need video_sync_lock_.

-  jitter_buffer_playout_timestamp_ = acm_receiver_.GetPlayoutTimestamp();
+  jitter_buffer_playout_timestamp_ = neteq_->GetPlayoutTimestamp();

  if (!jitter_buffer_playout_timestamp_) {
    // This can happen if this channel has not received any RTP packets. In
@ -1093,17 +1177,18 @@ void ChannelReceive::UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms) {
 }

 int ChannelReceive::GetRtpTimestampRateHz() const {
-  const auto decoder = acm_receiver_.LastDecoder();
+  const auto decoder_format = neteq_->GetCurrentDecoderFormat();
+
  // Default to the playout frequency if we've not gotten any packets yet.
  // TODO(ossu): Zero clockrate can only happen if we've added an external
  // decoder for a format we don't support internally. Remove once that way of
  // adding decoders is gone!
-  // TODO(kwiberg): `decoder->second.clockrate_hz` is an RTP clockrate as it
-  // should, but `acm_receiver_.last_output_sample_rate_hz()` is a codec sample
-  // rate, which is not always the same thing.
-  return (decoder && decoder->second.clockrate_hz != 0)
-             ? decoder->second.clockrate_hz
-             : acm_receiver_.last_output_sample_rate_hz();
+  // TODO(kwiberg): `decoder_format->sdp_format.clockrate_hz` is an RTP
+  // clockrate as it should, but `neteq_->last_output_sample_rate_hz()` is a
+  // codec sample rate, which is not always the same thing.
+  return (decoder_format && decoder_format->sdp_format.clockrate_hz != 0)
+             ? decoder_format->sdp_format.clockrate_hz
+             : neteq_->last_output_sample_rate_hz();
 }

 std::vector<RtpSource> ChannelReceive::GetSources() const {
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@ -61,10 +61,7 @@ AcmReceiver::AcmReceiver(const Environment& env, Config config)
      neteq_(CreateNetEq(config.neteq_factory,
                         config.neteq_config,
                         env_,
-                         std::move(config.decoder_factory))),
-      resampled_last_output_frame_(true) {
-  ClearSamples(last_audio_buffer_);
-}
+                         std::move(config.decoder_factory))) {}

 AcmReceiver::~AcmReceiver() = default;

@ -128,60 +125,13 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
    RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
    return -1;
  }
-
-  RTC_DCHECK_NE(current_sample_rate_hz, 0);
-
-  // Update if resampling is required.
-  const bool need_resampling =
-      (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
+  RTC_DCHECK_EQ(audio_frame->sample_rate_hz_, current_sample_rate_hz);

  // Accessing members, take the lock.
  MutexLock lock(&mutex_);
-  if (need_resampling && !resampled_last_output_frame_) {
-    // Prime the resampler with the last frame.
-    int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
-    int samples_per_channel_int = resampler_.Resample10Msec(
-        last_audio_buffer_.data(), current_sample_rate_hz, desired_freq_hz,
-        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        temp_output);
-    if (samples_per_channel_int < 0) {
-      RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - "
-                           "Resampling last_audio_buffer_ failed.";
-      return -1;
-    }
+  if (!resampler_helper_.MaybeResample(desired_freq_hz, audio_frame)) {
+    return -1;
  }
-
-  // TODO(bugs.webrtc.org/3923) Glitches in the output may appear if the output
-  // rate from NetEq changes.
-  if (need_resampling) {
-    // TODO(yujo): handle this more efficiently for muted frames.
-    int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
-        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        audio_frame->mutable_data());
-    if (samples_per_channel_int < 0) {
-      RTC_LOG(LS_ERROR)
-          << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
-      return -1;
-    }
-    audio_frame->samples_per_channel_ =
-        static_cast<size_t>(samples_per_channel_int);
-    audio_frame->sample_rate_hz_ = desired_freq_hz;
-    RTC_DCHECK_EQ(
-        audio_frame->sample_rate_hz_,
-        rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
-    resampled_last_output_frame_ = true;
-  } else {
-    resampled_last_output_frame_ = false;
-    // We might end up here ONLY if codec is changed.
-  }
-
-  // Store current audio in `last_audio_buffer_` for next time.
-  // TODO: b/335805780 - Use CopySamples().
-  memcpy(last_audio_buffer_.data(), audio_frame->data(),
-         sizeof(int16_t) * audio_frame->samples_per_channel_ *
-             audio_frame->num_channels_);
-
  call_stats_.DecodedByNetEq(audio_frame->speech_type_, audio_frame->muted());
  return 0;
 }
--- a/modules/audio_coding/acm2/acm_receiver.h
+++ b/modules/audio_coding/acm2/acm_receiver.h
@ -43,6 +43,7 @@ struct RTPHeader;

 namespace acm2 {

+// This class is deprecated. See https://issues.webrtc.org/issues/42225167.
 class AcmReceiver {
 public:
  struct Config {
@ -223,12 +224,9 @@ class AcmReceiver {

  const Environment env_;
  mutable Mutex mutex_;
-  ACMResampler resampler_ RTC_GUARDED_BY(mutex_);
  CallStatistics call_stats_ RTC_GUARDED_BY(mutex_);
  const std::unique_ptr<NetEq> neteq_;  // NetEq is thread-safe; no lock needed.
-  bool resampled_last_output_frame_ RTC_GUARDED_BY(mutex_);
-  std::array<int16_t, AudioFrame::kMaxDataSizeSamples> last_audio_buffer_
-      RTC_GUARDED_BY(mutex_);
+  ResamplerHelper resampler_helper_ RTC_GUARDED_BY(mutex_);
 };

 }  // namespace acm2
--- a/modules/audio_coding/acm2/acm_resampler.cc
+++ b/modules/audio_coding/acm2/acm_resampler.cc
@ -56,5 +56,67 @@ int ACMResampler::Resample10Msec(const int16_t* in_audio,
  return static_cast<int>(dst.samples_per_channel());
 }

+ResamplerHelper::ResamplerHelper() {
+  ClearSamples(last_audio_buffer_);
+}
+
+bool ResamplerHelper::MaybeResample(int desired_sample_rate_hz,
+                                    AudioFrame* audio_frame) {
+  const int current_sample_rate_hz = audio_frame->sample_rate_hz_;
+  RTC_DCHECK_NE(current_sample_rate_hz, 0);
+
+  // Update if resampling is required.
+  const bool need_resampling =
+      (desired_sample_rate_hz != -1) &&
+      (current_sample_rate_hz != desired_sample_rate_hz);
+
+  if (need_resampling && !resampled_last_output_frame_) {
+    // Prime the resampler with the last frame.
+    int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
+    int samples_per_channel_int = resampler_.Resample10Msec(
+        last_audio_buffer_.data(), current_sample_rate_hz,
+        desired_sample_rate_hz, audio_frame->num_channels_,
+        AudioFrame::kMaxDataSizeSamples, temp_output);
+    if (samples_per_channel_int < 0) {
+      RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - "
+                           "Resampling last_audio_buffer_ failed.";
+      return false;
+    }
+  }
+
+  // TODO(bugs.webrtc.org/3923) Glitches in the output may appear if the output
+  // rate from NetEq changes.
+  if (need_resampling) {
+    // TODO(yujo): handle this more efficiently for muted frames.
+    int samples_per_channel_int = resampler_.Resample10Msec(
+        audio_frame->data(), current_sample_rate_hz, desired_sample_rate_hz,
+        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
+        audio_frame->mutable_data());
+    if (samples_per_channel_int < 0) {
+      RTC_LOG(LS_ERROR)
+          << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
+      return false;
+    }
+    audio_frame->samples_per_channel_ =
+        static_cast<size_t>(samples_per_channel_int);
+    audio_frame->sample_rate_hz_ = desired_sample_rate_hz;
+    RTC_DCHECK_EQ(
+        audio_frame->sample_rate_hz_,
+        rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
+    resampled_last_output_frame_ = true;
+  } else {
+    resampled_last_output_frame_ = false;
+    // We might end up here ONLY if codec is changed.
+  }
+
+  // Store current audio in `last_audio_buffer_` for next time.
+  // TODO: b/335805780 - Use CopySamples().
+  memcpy(last_audio_buffer_.data(), audio_frame->data(),
+         sizeof(int16_t) * audio_frame->samples_per_channel_ *
+             audio_frame->num_channels_);
+
+  return true;
+}
+
 }  // namespace acm2
 }  // namespace webrtc
--- a/modules/audio_coding/acm2/acm_resampler.h
+++ b/modules/audio_coding/acm2/acm_resampler.h
@ -14,6 +14,7 @@
 #include <stddef.h>
 #include <stdint.h>

+#include "api/audio/audio_frame.h"
 #include "common_audio/resampler/include/push_resampler.h"

 namespace webrtc {
@ -36,6 +37,22 @@ class ACMResampler {
  PushResampler<int16_t> resampler_;
 };

+// Helper class to perform resampling if needed, meant to be used after
+// receiving the audio_frame from NetEq. Provides reasonably glitch free
+// transitions between different output sample rates from NetEq.
+class ResamplerHelper {
+ public:
+  ResamplerHelper();
+
+  // Resamples audio_frame if it is not already in desired_sample_rate_hz.
+  bool MaybeResample(int desired_sample_rate_hz, AudioFrame* audio_frame);
+
+ private:
+  ACMResampler resampler_;
+  bool resampled_last_output_frame_ = true;
+  std::array<int16_t, AudioFrame::kMaxDataSizeSamples> last_audio_buffer_;
+};
+
 }  // namespace acm2
 }  // namespace webrtc