diff --git a/src/modules/interface/module_common_types.h b/src/modules/interface/module_common_types.h index 6d571a4f45..2e1beaa0fc 100644 --- a/src/modules/interface/module_common_types.h +++ b/src/modules/interface/module_common_types.h @@ -393,30 +393,24 @@ public: VideoCodecType codec; }; -// Video Content Metrics -struct VideoContentMetrics -{ - VideoContentMetrics(): motionMagnitudeNZ(0), sizeZeroMotion(0), spatialPredErr(0), - spatialPredErrH(0), spatialPredErrV(0), motionPredErr(0), - motionHorizontalness(0), motionClusterDistortion(0), - nativeWidth(0), nativeHeight(0), contentChange(false) { } - void Reset(){ motionMagnitudeNZ = 0; sizeZeroMotion = 0; spatialPredErr = 0; - spatialPredErrH = 0; spatialPredErrV = 0; motionPredErr = 0; - motionHorizontalness = 0; motionClusterDistortion = 0; - nativeWidth = 0; nativeHeight = 0; contentChange = false; } +struct VideoContentMetrics { + VideoContentMetrics() + : motion_magnitude(0.0f), + spatial_pred_err(0.0f), + spatial_pred_err_h(0.0f), + spatial_pred_err_v(0.0f) { + } - float motionMagnitudeNZ; - float sizeZeroMotion; - float spatialPredErr; - float spatialPredErrH; - float spatialPredErrV; - float motionPredErr; - float motionHorizontalness; - float motionClusterDistortion; - WebRtc_UWord32 nativeWidth; - WebRtc_UWord32 nativeHeight; - WebRtc_UWord32 nativeFrameRate; - bool contentChange; + void Reset() { + motion_magnitude = 0.0f; + spatial_pred_err = 0.0f; + spatial_pred_err_h = 0.0f; + spatial_pred_err_v = 0.0f; + } + float motion_magnitude; + float spatial_pred_err; + float spatial_pred_err_h; + float spatial_pred_err_v; }; /************************************************* diff --git a/src/modules/video_coding/main/source/content_metrics_processing.cc b/src/modules/video_coding/main/source/content_metrics_processing.cc index 0805178c58..99160c9adb 100644 --- a/src/modules/video_coding/main/source/content_metrics_processing.cc +++ b/src/modules/video_coding/main/source/content_metrics_processing.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,205 +8,118 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "content_metrics_processing.h" -#include "module_common_types.h" -#include "video_coding_defines.h" +#include "modules/video_coding/main/source/content_metrics_processing.h" #include -namespace webrtc { +#include "modules/interface/module_common_types.h" +#include "modules/video_coding/main/interface/video_coding_defines.h" +namespace webrtc { ////////////////////////////////// /// VCMContentMetricsProcessing // ////////////////////////////////// -VCMContentMetricsProcessing::VCMContentMetricsProcessing(): -_frameRate(0), -_recAvgFactor(1 / 150.0f), // matched to 30fps -_frameCntRecursiveAvg(0), -_frameCntUniformAvg(0), -_avgMotionLevel(0.0f), -_avgSpatialLevel(0.0f) -{ - _recursiveAvg = new VideoContentMetrics(); - _uniformAvg = new VideoContentMetrics(); +VCMContentMetricsProcessing::VCMContentMetricsProcessing() + : recursive_avg_factor_(1 / 150.0f), // matched to 30fps. + frame_cnt_uniform_avg_(0), + avg_motion_level_(0.0f), + avg_spatial_level_(0.0f) { + recursive_avg_ = new VideoContentMetrics(); + uniform_avg_ = new VideoContentMetrics(); } -VCMContentMetricsProcessing::~VCMContentMetricsProcessing() -{ - delete _recursiveAvg; - delete _uniformAvg; +VCMContentMetricsProcessing::~VCMContentMetricsProcessing() { + delete recursive_avg_; + delete uniform_avg_; } -WebRtc_Word32 -VCMContentMetricsProcessing::Reset() -{ - _recursiveAvg->Reset(); - _uniformAvg->Reset(); - _frameRate = 0; - _frameCntRecursiveAvg = 0; - _frameCntUniformAvg = 0; - _avgMotionLevel = 0.0f; - _avgSpatialLevel = 0.0f; +int VCMContentMetricsProcessing::Reset() { + recursive_avg_->Reset(); + uniform_avg_->Reset(); + frame_cnt_uniform_avg_ = 0; + avg_motion_level_ = 0.0f; + avg_spatial_level_ = 0.0f; + return VCM_OK; +} + +void VCMContentMetricsProcessing::UpdateFrameRate(uint32_t frameRate) { + // Update factor for recursive averaging. + recursive_avg_factor_ = static_cast (1000.0f) / + static_cast(frameRate * kQmMinIntervalMs); +} + +VideoContentMetrics* VCMContentMetricsProcessing::LongTermAvgData() { + return recursive_avg_; +} + +VideoContentMetrics* VCMContentMetricsProcessing::ShortTermAvgData() { + if (frame_cnt_uniform_avg_ == 0) { + return NULL; + } + // Two metrics are used: motion and spatial level. + uniform_avg_->motion_magnitude = avg_motion_level_ / + static_cast(frame_cnt_uniform_avg_); + uniform_avg_->spatial_pred_err = avg_spatial_level_ / + static_cast(frame_cnt_uniform_avg_); + return uniform_avg_; +} + +void VCMContentMetricsProcessing::ResetShortTermAvgData() { + // Reset. + avg_motion_level_ = 0.0f; + avg_spatial_level_ = 0.0f; + frame_cnt_uniform_avg_ = 0; +} + +int VCMContentMetricsProcessing::UpdateContentData( + const VideoContentMetrics *contentMetrics) { + if (contentMetrics == NULL) { return VCM_OK; + } + return ProcessContent(contentMetrics); } -void -VCMContentMetricsProcessing::UpdateFrameRate(WebRtc_UWord32 frameRate) -{ - _frameRate = frameRate; - // Update factor for recursive averaging. - _recAvgFactor = (float) 1000.0f / ((float)(_frameRate * kQmMinIntervalMs)); - +int VCMContentMetricsProcessing::ProcessContent( + const VideoContentMetrics *contentMetrics) { + // Update the recursive averaged metrics: average is over longer window + // of time: over QmMinIntervalMs ms. + UpdateRecursiveAvg(contentMetrics); + // Update the uniform averaged metrics: average is over shorter window + // of time: based on ~RTCP reports. + UpdateUniformAvg(contentMetrics); + return VCM_OK; } -VideoContentMetrics* -VCMContentMetricsProcessing::LongTermAvgData() -{ - if (_frameCntRecursiveAvg == 0) - { - return NULL; - } - return _recursiveAvg; +void VCMContentMetricsProcessing::UpdateUniformAvg( + const VideoContentMetrics *contentMetrics) { + // Update frame counter. + frame_cnt_uniform_avg_ += 1; + // Update averaged metrics: motion and spatial level are used. + avg_motion_level_ += contentMetrics->motion_magnitude; + avg_spatial_level_ += contentMetrics->spatial_pred_err; + return; } -VideoContentMetrics* -VCMContentMetricsProcessing::ShortTermAvgData() -{ - if (_frameCntUniformAvg == 0) - { - return NULL; - } +void VCMContentMetricsProcessing::UpdateRecursiveAvg( + const VideoContentMetrics *contentMetrics) { - // Two metrics are used: motion and spatial level. - _uniformAvg->motionMagnitudeNZ = _avgMotionLevel / - (float)(_frameCntUniformAvg); - _uniformAvg->spatialPredErr = _avgSpatialLevel / - (float)(_frameCntUniformAvg); + // Spatial metrics: 2x2, 1x2(H), 2x1(V). + recursive_avg_->spatial_pred_err = (1 - recursive_avg_factor_) * + recursive_avg_->spatial_pred_err + + recursive_avg_factor_ * contentMetrics->spatial_pred_err; - return _uniformAvg; + recursive_avg_->spatial_pred_err_h = (1 - recursive_avg_factor_) * + recursive_avg_->spatial_pred_err_h + + recursive_avg_factor_ * contentMetrics->spatial_pred_err_h; + + recursive_avg_->spatial_pred_err_v = (1 - recursive_avg_factor_) * + recursive_avg_->spatial_pred_err_v + + recursive_avg_factor_ * contentMetrics->spatial_pred_err_v; + + // Motion metric: Derived from NFD (normalized frame difference). + recursive_avg_->motion_magnitude = (1 - recursive_avg_factor_) * + recursive_avg_->motion_magnitude + + recursive_avg_factor_ * contentMetrics->motion_magnitude; } - -void -VCMContentMetricsProcessing::ResetShortTermAvgData() -{ - // Reset - _avgMotionLevel = 0.0f; - _avgSpatialLevel = 0.0f; - _frameCntUniformAvg = 0; -} - -WebRtc_Word32 -VCMContentMetricsProcessing::UpdateContentData(const VideoContentMetrics *contentMetrics) -{ - if (contentMetrics == NULL) - { - return VCM_OK; - } - return ProcessContent(contentMetrics); - -} - -WebRtc_UWord32 -VCMContentMetricsProcessing::ProcessContent(const VideoContentMetrics *contentMetrics) -{ - // Update the recursive averaged metrics - // average is over longer window of time: over QmMinIntervalMs ms. - UpdateRecursiveAvg(contentMetrics); - - // Update the uniform averaged metrics: - // average is over shorter window of time: based on ~RTCP reports. - UpdateUniformAvg(contentMetrics); - - return VCM_OK; -} - -void -VCMContentMetricsProcessing::UpdateUniformAvg(const VideoContentMetrics *contentMetrics) -{ - - // Update frame counter - _frameCntUniformAvg += 1; - - // Update averaged metrics: motion and spatial level are used. - _avgMotionLevel += contentMetrics->motionMagnitudeNZ; - _avgSpatialLevel += contentMetrics->spatialPredErr; - - return; - -} -void -VCMContentMetricsProcessing::UpdateRecursiveAvg(const VideoContentMetrics *contentMetrics) -{ - - // Threshold for size of zero motion cluster: - // Use for updating 3 motion vector derived metrics: - // motion magnitude, cluster distortion, and horizontalness. - float nonZeroMvThr = 0.1f; - - float tmpRecAvgFactor = _recAvgFactor; - - // Take value as is for first frame (no motion search in frame zero). - if (_frameCntRecursiveAvg < 1) - { - tmpRecAvgFactor = 1; - } - - _recursiveAvg->motionPredErr = (1 - tmpRecAvgFactor) * - _recursiveAvg->motionPredErr + - tmpRecAvgFactor * contentMetrics->motionPredErr; - - _recursiveAvg->sizeZeroMotion = (1 - tmpRecAvgFactor) * - _recursiveAvg->sizeZeroMotion + - tmpRecAvgFactor * contentMetrics->sizeZeroMotion; - - _recursiveAvg->spatialPredErr = (1 - tmpRecAvgFactor) * - _recursiveAvg->spatialPredErr + - tmpRecAvgFactor * contentMetrics->spatialPredErr; - - _recursiveAvg->spatialPredErrH = (1 - tmpRecAvgFactor) * - _recursiveAvg->spatialPredErrH + - tmpRecAvgFactor * contentMetrics->spatialPredErrH; - - _recursiveAvg->spatialPredErrV = (1 - tmpRecAvgFactor) * - _recursiveAvg->spatialPredErrV + - tmpRecAvgFactor * contentMetrics->spatialPredErrV; - - // motionMag metric is derived from NFD (normalized frame difference). - if (kNfdMetric == 1) - { - _recursiveAvg->motionMagnitudeNZ = (1 - tmpRecAvgFactor) * - _recursiveAvg->motionMagnitudeNZ + - tmpRecAvgFactor * contentMetrics->motionMagnitudeNZ; - } - - if (contentMetrics->sizeZeroMotion > nonZeroMvThr) - { - _recursiveAvg->motionClusterDistortion = (1 - tmpRecAvgFactor) * - _recursiveAvg->motionClusterDistortion + - tmpRecAvgFactor *contentMetrics->motionClusterDistortion; - - _recursiveAvg->motionHorizontalness = (1 - _recAvgFactor) * - _recursiveAvg->motionHorizontalness + - tmpRecAvgFactor * contentMetrics->motionHorizontalness; - - // motionMag metric is derived from motion vectors. - if (kNfdMetric == 0) - { - _recursiveAvg->motionMagnitudeNZ = (1 - tmpRecAvgFactor) * - _recursiveAvg->motionMagnitudeNZ + - tmpRecAvgFactor * contentMetrics->motionMagnitudeNZ; - } - } - - // Update native values: - // TODO (marpan): we don't need to update this every frame. - _recursiveAvg->nativeHeight = contentMetrics->nativeHeight; - _recursiveAvg->nativeWidth = contentMetrics->nativeWidth; - _recursiveAvg->nativeFrameRate = contentMetrics->nativeFrameRate; - - _frameCntRecursiveAvg++; - - return; -} -} //end of namespace +} // end of namespace diff --git a/src/modules/video_coding/main/source/content_metrics_processing.h b/src/modules/video_coding/main/source/content_metrics_processing.h index 155c4adc61..0317add775 100644 --- a/src/modules/video_coding/main/source/content_metrics_processing.h +++ b/src/modules/video_coding/main/source/content_metrics_processing.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -13,66 +13,64 @@ #include "typedefs.h" -namespace webrtc -{ +namespace webrtc { struct VideoContentMetrics; // QM interval time (in ms) -enum { kQmMinIntervalMs = 10000 }; +enum { + kQmMinIntervalMs = 10000 +}; // Flag for NFD metric vs motion metric -enum { kNfdMetric = 1 }; +enum { + kNfdMetric = 1 +}; /**********************************/ /* Content Metrics Processing */ /**********************************/ -class VCMContentMetricsProcessing -{ -public: - VCMContentMetricsProcessing(); - ~VCMContentMetricsProcessing(); +class VCMContentMetricsProcessing { + public: + VCMContentMetricsProcessing(); + ~VCMContentMetricsProcessing(); - // Update class with latest metrics - WebRtc_Word32 UpdateContentData(const VideoContentMetrics *contentMetrics); + // Update class with latest metrics. + int UpdateContentData(const VideoContentMetrics *contentMetrics); - // Reset the short-term averaged content data - void ResetShortTermAvgData(); + // Reset the short-term averaged content data. + void ResetShortTermAvgData(); - // Initialize to - WebRtc_Word32 Reset(); + // Initialize. + int Reset(); - // Inform class of current frame rate - void UpdateFrameRate(WebRtc_UWord32 frameRate); + // Inform class of current frame rate. + void UpdateFrameRate(uint32_t frameRate); - // Returns the long-term averaged content data: - // recursive average over longer time scale - VideoContentMetrics* LongTermAvgData(); + // Returns the long-term averaged content data: recursive average over longer + // time scale. + VideoContentMetrics* LongTermAvgData(); - // Returns the short-term averaged content data: - // uniform average over shorter time scale - VideoContentMetrics* ShortTermAvgData(); -private: + // Returns the short-term averaged content data: uniform average over + // shorter time scalE. + VideoContentMetrics* ShortTermAvgData(); - // Compute working avg - WebRtc_UWord32 ProcessContent(const VideoContentMetrics *contentMetrics); + private: + // Compute working average. + int ProcessContent(const VideoContentMetrics *contentMetrics); - // Update the recursive averaged metrics: longer time average (~5/10 secs). - void UpdateRecursiveAvg(const VideoContentMetrics *contentMetrics); + // Update the recursive averaged metrics: longer time average (~5/10 secs). + void UpdateRecursiveAvg(const VideoContentMetrics *contentMetrics); - // Update the uniform averaged metrics: shorter time average (~RTCP reports). - void UpdateUniformAvg(const VideoContentMetrics *contentMetrics); + // Update the uniform averaged metrics: shorter time average (~RTCP report). + void UpdateUniformAvg(const VideoContentMetrics *contentMetrics); - VideoContentMetrics* _recursiveAvg; - VideoContentMetrics* _uniformAvg; - WebRtc_UWord32 _frameRate; - float _recAvgFactor; - WebRtc_UWord32 _frameCntRecursiveAvg; - WebRtc_UWord32 _frameCntUniformAvg; - float _avgMotionLevel; - float _avgSpatialLevel; + VideoContentMetrics* recursive_avg_; + VideoContentMetrics* uniform_avg_; + float recursive_avg_factor_; + uint32_t frame_cnt_uniform_avg_; + float avg_motion_level_; + float avg_spatial_level_; }; - -} // namespace webrtc - -#endif // WEBRTC_MODULES_VIDEO_CODING_CONTENT_METRICS_PROCESSING_H_ +} // namespace webrtc +#endif // WEBRTC_MODULES_VIDEO_CODING_CONTENT_METRICS_PROCESSING_H_ diff --git a/src/modules/video_coding/main/source/media_opt_util.cc b/src/modules/video_coding/main/source/media_opt_util.cc index 2c6f50bdf2..b520278ac7 100644 --- a/src/modules/video_coding/main/source/media_opt_util.cc +++ b/src/modules/video_coding/main/source/media_opt_util.cc @@ -20,8 +20,6 @@ #include "modules/video_coding/main/source/er_tables_xor.h" #include "modules/video_coding/main/source/fec_tables_xor.h" #include "modules/video_coding/main/source/nack_fec_tables.h" -#include "modules/video_coding/main/source/qm_select_data.h" - namespace webrtc { diff --git a/src/modules/video_coding/main/source/media_optimization.cc b/src/modules/video_coding/main/source/media_optimization.cc index e2c4329803..cae491229f 100644 --- a/src/modules/video_coding/main/source/media_optimization.cc +++ b/src/modules/video_coding/main/source/media_optimization.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -24,6 +24,8 @@ _maxBitRate(0), _sendCodecType(kVideoCodecUnknown), _codecWidth(0), _codecHeight(0), +_initCodecWidth(0), +_initCodecHeight(0), _userFrameRate(0), _packetLossEnc(0), _fractionLost(0), @@ -64,7 +66,7 @@ WebRtc_Word32 VCMMediaOptimization::Reset() { memset(_incomingFrameTimes, -1, sizeof(_incomingFrameTimes)); - InputFrameRate(); // Resets _incomingFrameRate + _incomingFrameRate = 0.0; _frameDropper->Reset(); _lossProtLogic->Reset(_clock->MillisecondTimestamp()); _frameDropper->SetRates(0, 0); @@ -131,6 +133,7 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate, uint32_t protection_overhead_kbps = 0; // Update protection settings, when applicable + float sent_video_rate = 0.0f; if (selectedMethod) { // Update protection method with content metrics @@ -168,6 +171,7 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate, // Get the effective packet loss for encoder ER // when applicable, should be passed to encoder via fractionLost packetLossEnc = selectedMethod->RequiredPacketLossER(); + sent_video_rate = static_cast(sent_video_rate_bps / 1000.0); } // Source coding rate: total rate - protection overhead @@ -179,7 +183,7 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate, if (_enableQm && _numLayers == 1) { // Update QM with rates - _qmResolution->UpdateRates((float)_targetBitRate, _avgSentBitRateBps, + _qmResolution->UpdateRates((float)_targetBitRate, sent_video_rate, _incomingFrameRate, _fractionLost); // Check for QM selection bool selectQM = checkStatusForQMchange(); @@ -282,6 +286,8 @@ VCMMediaOptimization::SetEncodingData(VideoCodecType sendCodecType, _userFrameRate = static_cast(frameRate); _codecWidth = width; _codecHeight = height; + _initCodecWidth = width; + _initCodecHeight = height; _numLayers = (numLayers <= 1) ? 1 : numLayers; // Can also be zero. WebRtc_Word32 ret = VCM_OK; ret = _qmResolution->Initialize((float)_targetBitRate, _userFrameRate, @@ -525,7 +531,7 @@ VCMMediaOptimization::SelectQuality() WebRtc_Word32 ret = _qmResolution->SelectResolution(&qm); if (ret < 0) { - return ret; + return ret; } // Check for updates to spatial/temporal modes @@ -575,43 +581,50 @@ VCMMediaOptimization::QMUpdate(VCMResolutionScale* qm) // Check for no change if (qm->spatialHeightFact == 1 && qm->spatialWidthFact == 1 && - qm->temporalFact == 1) - { + qm->temporalFact == 1) { return false; } - // Content metrics hold native values - VideoContentMetrics* cm = _content->LongTermAvgData(); - // Temporal WebRtc_UWord32 frameRate = static_cast (_incomingFrameRate + 0.5f); // Check if go back up in temporal resolution - if (qm->temporalFact == 0) - { - frameRate = (WebRtc_UWord32) 2 * _incomingFrameRate; + if (qm->temporalFact == 0) { + // Currently only allow for 1/2 frame rate reduction per action. + // TODO (marpan): allow for 2/3 reduction. + frameRate = (WebRtc_UWord32) 2 * _incomingFrameRate; } // go down in temporal resolution - else - { - frameRate = (WebRtc_UWord32)(_incomingFrameRate / qm->temporalFact + 1); + else { + frameRate = (WebRtc_UWord32)(_incomingFrameRate / qm->temporalFact + 1); + } + // Reset _incomingFrameRate if temporal action was selected. + if (qm->temporalFact != 1) { + memset(_incomingFrameTimes, -1, sizeof(_incomingFrameTimes)); + _incomingFrameRate = frameRate; } // Spatial WebRtc_UWord32 height = _codecHeight; WebRtc_UWord32 width = _codecWidth; - // Check if go back up in spatial resolution - if (qm->spatialHeightFact == 0 && qm->spatialWidthFact == 0) - { - height = cm->nativeHeight; - width = cm->nativeWidth; - } - else - { - height = _codecHeight / qm->spatialHeightFact; - width = _codecWidth / qm->spatialWidthFact; + // Check if go back up in spatial resolution, and update frame sizes. + // Currently only allow for 2x2 spatial down-sampling. + // TODO (marpan): allow for 1x2, 2x1, and 4/3x4/3 (or 3/2x3/2). + if (qm->spatialHeightFact == 0 && qm->spatialWidthFact == 0) { + width = _codecWidth * 2; + height = _codecHeight * 2; + } else { + width = _codecWidth / qm->spatialWidthFact; + height = _codecHeight / qm->spatialHeightFact; } + _codecWidth = width; + _codecHeight = height; + + // New frame sizes should never exceed the original sizes + // from SetEncodingData(). + assert(_codecWidth <= _initCodecWidth); + assert(_codecHeight <= _initCodecHeight); WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideoCoding, _id, "Quality Mode Update: W = %d, H = %d, FR = %f", @@ -620,11 +633,12 @@ VCMMediaOptimization::QMUpdate(VCMResolutionScale* qm) // Update VPM with new target frame rate and size _videoQMSettingsCallback->SetVideoQMSettings(frameRate, width, height); + _content->UpdateFrameRate(frameRate); + _qmResolution->UpdateCodecFrameSize(width, height); + return true; } - - void VCMMediaOptimization::UpdateIncomingFrameRate() { @@ -671,10 +685,6 @@ VCMMediaOptimization::ProcessIncomingFrameRate(WebRtc_Word64 now) _incomingFrameRate = nrOfFrames * 1000.0f / static_cast(diff); } } - else - { - _incomingFrameRate = static_cast(nrOfFrames); - } } WebRtc_UWord32 diff --git a/src/modules/video_coding/main/source/media_optimization.h b/src/modules/video_coding/main/source/media_optimization.h index 7d87a6d041..14e5d1a2d7 100644 --- a/src/modules/video_coding/main/source/media_optimization.h +++ b/src/modules/video_coding/main/source/media_optimization.h @@ -168,6 +168,8 @@ private: VideoCodecType _sendCodecType; WebRtc_UWord16 _codecWidth; WebRtc_UWord16 _codecHeight; + WebRtc_UWord16 _initCodecWidth; + WebRtc_UWord16 _initCodecHeight; float _userFrameRate; VCMFrameDropper* _frameDropper; diff --git a/src/modules/video_coding/main/source/qm_select.cc b/src/modules/video_coding/main/source/qm_select.cc index 99fc89d1b1..c4bd707812 100644 --- a/src/modules/video_coding/main/source/qm_select.cc +++ b/src/modules/video_coding/main/source/qm_select.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,784 +8,657 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "qm_select.h" -#include "internal_defines.h" -#include "qm_select_data.h" - -#include "module_common_types.h" -#include "video_coding_defines.h" -#include "trace.h" +#include "modules/video_coding/main/source/qm_select.h" #include +#include "modules/interface/module_common_types.h" +#include "modules/video_coding/main/source/internal_defines.h" +#include "modules/video_coding/main/source/qm_select_data.h" +#include "modules/video_coding/main/interface/video_coding_defines.h" +#include "system_wrappers/interface/trace.h" + namespace webrtc { // QM-METHOD class VCMQmMethod::VCMQmMethod() - : _contentMetrics(new VideoContentMetrics()), + : _contentMetrics(NULL), _width(0), _height(0), _nativeWidth(0), _nativeHeight(0), - _nativeFrameRate(0), + _frameRateLevel(kDefault), _init(false) { ResetQM(); } -VCMQmMethod::~VCMQmMethod() -{ - delete _contentMetrics; +VCMQmMethod::~VCMQmMethod() { } -void -VCMQmMethod::ResetQM() -{ - _motion.Reset(); - _spatial.Reset(); - _coherence.Reset(); - _stationaryMotion = 0; - _aspectRatio = 1; - _imageType = 2; - return; +void VCMQmMethod::ResetQM() { + _aspectRatio = 1.0f; + _imageType = 2; + _motion.Reset(); + _spatial.Reset(); + _contentClass = 0; } -void -VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) -{ - _contentMetrics = contentMetrics; +uint8_t VCMQmMethod::ComputeContentClass() { + ComputeMotionNFD(); + ComputeSpatial(); + return _contentClass = 3 * _motion.level + _spatial.level; } -void -VCMQmMethod::MotionNFD() -{ - _motion.value = _contentMetrics->motionMagnitudeNZ; - - // Determine motion level - if (_motion.value < LOW_MOTION_NFD) - { - _motion.level = kLow; - } - else if (_motion.value > HIGH_MOTION_NFD) - { - _motion.level = kHigh; - } - else - { - _motion.level = kDefault; - } - +void VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) { + _contentMetrics = contentMetrics; } -void -VCMQmMethod::Motion() -{ - - float sizeZeroMotion = _contentMetrics->sizeZeroMotion; - float motionMagNZ = _contentMetrics->motionMagnitudeNZ; - - // Take product of size and magnitude with equal weight - _motion.value = (1.0f - sizeZeroMotion) * motionMagNZ; - - // Stabilize: motionMagNZ could be large when only a - // few motion blocks are non-zero - _stationaryMotion = false; - if (sizeZeroMotion > HIGH_ZERO_MOTION_SIZE) - { - _motion.value = 0.0f; - _stationaryMotion = true; - } - // Determine motion level - if (_motion.value < LOW_MOTION) - { - _motion.level = kLow; - } - else if (_motion.value > HIGH_MOTION) - { - _motion.level = kHigh; - } - else - { - _motion.level = kDefault; - } +void VCMQmMethod::ComputeMotionNFD() { + if (_contentMetrics) { + _motion.value = _contentMetrics->motion_magnitude; + } + // Determine motion level. + if (_motion.value < kLowMotionNfd) { + _motion.level = kLow; + } else if (_motion.value > kHighMotionNfd) { + _motion.level = kHigh; + } else { + _motion.level = kDefault; + } } +void VCMQmMethod::ComputeSpatial() { + float spatialErr = 0.0; + float spatialErrH = 0.0; + float spatialErrV = 0.0; + if (_contentMetrics) { + spatialErr = _contentMetrics->spatial_pred_err; + spatialErrH = _contentMetrics->spatial_pred_err_h; + spatialErrV = _contentMetrics->spatial_pred_err_v; + } + // Spatial measure: take average of 3 prediction errors. + _spatial.value = (spatialErr + spatialErrH + spatialErrV) / 3.0f; -void -VCMQmMethod::Spatial() -{ - float spatialErr = _contentMetrics->spatialPredErr; - float spatialErrH = _contentMetrics->spatialPredErrH; - float spatialErrV = _contentMetrics->spatialPredErrV; - // Spatial measure: take average of 3 prediction errors - _spatial.value = (spatialErr + spatialErrH + spatialErrV) / 3.0f; + // Reduce thresholds for large scenes/higher pixel correlation (~>=WHD). + float scale2 = _imageType > 3 ? kScaleTexture : 1.0; - float scale = 1.0f; - // Reduce thresholds for HD scenes - if (_imageType > 3) - { - scale = (float)SCALE_TEXTURE_HD; - } - - if (_spatial.value > scale * HIGH_TEXTURE) - { - _spatial.level = kHigh; - } - else if (_spatial.value < scale * LOW_TEXTURE) - { - _spatial.level = kLow; - } - else - { - _spatial.level = kDefault; - } + if (_spatial.value > scale2 * kHighTexture) { + _spatial.level = kHigh; + } else if (_spatial.value < scale2 * kLowTexture) { + _spatial.level = kLow; + } else { + _spatial.level = kDefault; + } } -void -VCMQmMethod::Coherence() -{ - float horizNZ = _contentMetrics->motionHorizontalness; - float distortionNZ = _contentMetrics->motionClusterDistortion; - - // Coherence measure: combine horizontalness with cluster distortion - _coherence.value = COH_MAX; - if (distortionNZ > 0.) - { - _coherence.value = horizNZ / distortionNZ; - } - _coherence.value = VCM_MIN(COH_MAX, _coherence.value); - - if (_coherence.value < COHERENCE_THR) - { - _coherence.level = kLow; - } - else - { - _coherence.level = kHigh; - } - +uint8_t VCMQmMethod::GetImageType(uint16_t width, + uint16_t height) { + // Get the closest image type for encoder frame size. + uint32_t imageSize = width * height; + if (imageSize < kFrameSizeTh[0]) { + return 0; // QCIF + } else if (imageSize < kFrameSizeTh[1]) { + return 1; // CIF + } else if (imageSize < kFrameSizeTh[2]) { + return 2; // VGA + } else if (imageSize < kFrameSizeTh[3]) { + return 3; // 4CIF + } else if (imageSize < kFrameSizeTh[4]) { + return 4; // 720,4:3 + } else if (imageSize < kFrameSizeTh[5]) { + return 5; // WHD + } else { + return 6; // HD + } } -WebRtc_Word8 -VCMQmMethod::GetImageType(WebRtc_UWord32 width, WebRtc_UWord32 height) -{ - // Match image type - WebRtc_UWord32 imageSize = width * height; - WebRtc_Word8 imageType; - - if (imageSize < kFrameSizeTh[0]) - { - imageType = 0; - } - else if (imageSize < kFrameSizeTh[1]) - { - imageType = 1; - } - else if (imageSize < kFrameSizeTh[2]) - { - imageType = 2; - } - else if (imageSize < kFrameSizeTh[3]) - { - imageType = 3; - } - else if (imageSize < kFrameSizeTh[4]) - { - imageType = 4; - } - else if (imageSize < kFrameSizeTh[5]) - { - imageType = 5; - } - else - { - imageType = 6; - } - - return imageType; +LevelClass VCMQmMethod::FrameRateLevel(float avgFrameRate) { + if (avgFrameRate < kLowFrameRate) { + return kLow; + } else if (avgFrameRate > kHighFrameRate) { + return kHigh; + } else { + return kDefault; + } } -// DONE WITH QM CLASS - - -//RESOLUTION CLASS +// RESOLUTION CLASS VCMQmResolution::VCMQmResolution() -{ - _qm = new VCMResolutionScale(); - Reset(); + : _qm(new VCMResolutionScale()) { + Reset(); } -VCMQmResolution::~VCMQmResolution() -{ - delete _qm; +VCMQmResolution::~VCMQmResolution() { + delete _qm; } -void -VCMQmResolution::ResetRates() -{ - _sumEncodedBytes = 0; - _sumTargetRate = 0.0f; - _sumIncomingFrameRate = 0.0f; - _sumFrameRateMM = 0.0f; - _sumSeqRateMM = 0.0f; - _sumPacketLoss = 0.0f; - _frameCnt = 0; - _frameCntDelta = 0; - _lowBufferCnt = 0; - _updateRateCnt = 0; - return; +void VCMQmResolution::ResetRates() { + _sumTargetRate = 0.0f; + _sumIncomingFrameRate = 0.0f; + _sumRateMM = 0.0f; + _sumRateMMSgn = 0; + _sumPacketLoss = 0.0f; + _frameCnt = 0; + _frameCntDelta = 0; + _lowBufferCnt = 0; + _updateRateCnt = 0; } -void -VCMQmResolution::Reset() -{ - _stateDecFactorSpatial = 1; - _stateDecFactorTemp = 1; - _bufferLevel = 0.0f; - _targetBitRate = 0.0f; - _incomingFrameRate = 0.0f; - _userFrameRate = 0.0f; - _perFrameBandwidth =0.0f; - ResetRates(); - ResetQM(); - return; +void VCMQmResolution::ResetDownSamplingState() { + _stateDecFactorSpatial = 1; + _stateDecFactorTemp = 1; } -// Initialize rate control quantities after reset of encoder -WebRtc_Word32 -VCMQmResolution::Initialize(float bitRate, float userFrameRate, - WebRtc_UWord32 width, WebRtc_UWord32 height) -{ - if (userFrameRate == 0.0f || width == 0 || height == 0) - { - return VCM_PARAMETER_ERROR; - } - _targetBitRate = bitRate; - _userFrameRate = userFrameRate; +void VCMQmResolution::Reset() { + _targetBitRate = 0.0f; + _userFrameRate = 0.0f; + _incomingFrameRate = 0.0f; + _perFrameBandwidth =0.0f; + _bufferLevel = 0.0f; + _avgTargetRate = 0.0f; + _avgIncomingFrameRate = 0.0f; + _avgRatioBufferLow = 0.0f; + _avgRateMisMatch = 0.0f; + _avgRateMisMatchSgn = 0.0f; + _avgPacketLoss = 0.0f; + _encoderState = kStableEncoding; + ResetRates(); + ResetDownSamplingState(); + ResetQM(); +} - // Encoder width and height - _width = width; - _height = height; +EncoderState VCMQmResolution::GetEncoderState() { + return _encoderState; +} - // Aspect ratio: used for selection of 1x2,2x1,2x2 - _aspectRatio = static_cast(_width) / static_cast(_height); +// Initialize state after re-initializing the encoder, +// i.e., after SetEncodingData() in mediaOpt. +int VCMQmResolution::Initialize(float bitRate, + float userFrameRate, + uint16_t width, + uint16_t height) { + if (userFrameRate == 0.0f || width == 0 || height == 0) { + return VCM_PARAMETER_ERROR; + } + Reset(); + _targetBitRate = bitRate; + _userFrameRate = userFrameRate; + _incomingFrameRate = userFrameRate; + UpdateCodecFrameSize(width, height); + _nativeWidth = width; + _nativeHeight = height; + // Initial buffer level. + _bufferLevel = kInitBufferLevel * _targetBitRate; + // Per-frame bandwidth. + _perFrameBandwidth = _targetBitRate / _userFrameRate; + _init = true; + return VCM_OK; +} - // Set the imageType for the encoder width/height. - _imageType = GetImageType(_width, _height); +void VCMQmResolution::UpdateCodecFrameSize(uint16_t width, uint16_t height) { + _width = width; + _height = height; + // Set the imageType for the encoder width/height. + _imageType = GetImageType(width, height); +} - // Initial buffer level - _bufferLevel = INIT_BUFFER_LEVEL * _targetBitRate; +// Update rate data after every encoded frame. +void VCMQmResolution::UpdateEncodedSize(int encodedSize, + FrameType encodedFrameType) { + _frameCnt++; + // Convert to Kbps. + float encodedSizeKbits = static_cast((encodedSize * 8.0) / 1000.0); - // Per-frame bandwidth - if ( _incomingFrameRate == 0 ) - { - _perFrameBandwidth = _targetBitRate / _userFrameRate; - _incomingFrameRate = _userFrameRate; - } - else - { - // Take average: this is due to delay in update of new encoder frame rate: - // userFrameRate is the new one, - // incomingFrameRate is the old one (based on previous ~ 1sec/RTCP report) - _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate + - _targetBitRate / _incomingFrameRate ); - } - _init = true; + // Update the buffer level: + // Note this is not the actual encoder buffer level. + // |_bufferLevel| is reset to 0 every time SelectResolution is called, and + // does not account for frame dropping by encoder or VCM. + _bufferLevel += _perFrameBandwidth - encodedSizeKbits; + // Counter for occurrences of low buffer level: + // low/negative values means encoder is likely dropping frames. + if (_bufferLevel <= kPercBufferThr * kOptBufferLevel * _targetBitRate) { + _lowBufferCnt++; + } +} +// Update various quantities after SetTargetRates in MediaOpt. +void VCMQmResolution::UpdateRates(float targetBitRate, + float encoderSentRate, + float incomingFrameRate, + uint8_t packetLoss) { + // Sum the target bitrate and incoming frame rate: + // these values are the encoder rates (from previous update ~1sec), + // i.e, before the update for next ~1sec. + _sumTargetRate += _targetBitRate; + _sumIncomingFrameRate += _incomingFrameRate; + _updateRateCnt++; + // Sum the received (from RTCP reports) packet loss rates. + _sumPacketLoss += static_cast(packetLoss / 255.0); + + // Sum the sequence rate mismatch: + // Mismatch here is based on the difference between the target rate + // used (in previous ~1sec) and the average actual encoding rate measured + // at previous ~1sec. + float diff = _targetBitRate - encoderSentRate; + if (_targetBitRate > 0.0) + _sumRateMM += fabs(diff) / _targetBitRate; + int sgnDiff = diff > 0 ? 1 : (diff < 0 ? -1 : 0); + // To check for consistent under(+)/over_shooting(-) of target rate. + _sumRateMMSgn += sgnDiff; + + // Update with the current new target and frame rate: + // these values are ones the encoder will use for the current/next ~1sec + _targetBitRate = targetBitRate; + _incomingFrameRate = incomingFrameRate; + + // Update the per_frame_bandwidth: + // this is the per_frame_bw for the current/next ~1sec + _perFrameBandwidth = 0.0f; + if (_incomingFrameRate > 0.0f) { + _perFrameBandwidth = _targetBitRate / _incomingFrameRate; + } +} + +// Select the resolution factors: frame size and frame rate change (qm scales). +// Selection is for going down in resolution, or for going back up +// (if a previous down-sampling action was taken). + +// In the current version the following constraints are imposed: +// 1) we only allow for one action (either down or back up) at a given time. +// 2) the possible down-sampling actions are: 2x2 spatial and 1/2 temporal. +// 3) the total amount of down-sampling (spatial and/or temporal) from the +// initial (native) resolution is limited by various factors. + +// TODO(marpan): extend to allow options for: 4/3x4/3, 1x2, 2x1 spatial, +// and 2/3 temporal (i.e., skip every third frame). +int VCMQmResolution::SelectResolution(VCMResolutionScale** qm) { + if (!_init) { + return VCM_UNINITIALIZED; + } + if (_contentMetrics == NULL) { + Reset(); + *qm = _qm; return VCM_OK; -} + } -// Update after every encoded frame -void -VCMQmResolution::UpdateEncodedSize(WebRtc_Word64 encodedSize, - FrameType encodedFrameType) -{ - // Update encoded size; - _sumEncodedBytes += encodedSize; - _frameCnt++; + // Default settings: no action. + _qm->spatialWidthFact = 1; + _qm->spatialHeightFact = 1; + _qm->temporalFact = 1; + *qm = _qm; - // Convert to Kbps - float encodedSizeKbits = (float)((encodedSize * 8.0) / 1000.0); + // Compute content class for selection. + _contentClass = ComputeContentClass(); - // Update the buffer level: - // per_frame_BW is updated when encoder is updated, every RTCP reports - _bufferLevel += _perFrameBandwidth - encodedSizeKbits; + // Compute various rate quantities for selection. + ComputeRatesForSelection(); - // Mismatch here is based on difference of actual encoded frame size and - // per-frame bandwidth, for delta frames - // This is a much stronger condition on rate mismatch than sumSeqRateMM - // Note: not used in this version - /* - const bool deltaFrame = (encodedFrameType != kVideoFrameKey && - encodedFrameType != kVideoFrameGolden); + // Get the encoder state. + ComputeEncoderState(); - // Sum the frame mismatch: - if (deltaFrame) - { - _frameCntDelta++; - if (encodedSizeKbits > 0) - _sumFrameRateMM += - (float) (fabs(encodedSizeKbits - _perFrameBandwidth) / - encodedSizeKbits); - } - */ - - // Counter for occurrences of low buffer level - if (_bufferLevel <= PERC_BUFFER_THR * OPT_BUFFER_LEVEL * _targetBitRate) - { - _lowBufferCnt++; - } - -} - -// Update various quantities after SetTargetRates in MediaOpt -void -VCMQmResolution::UpdateRates(float targetBitRate, float avgSentBitRate, - float incomingFrameRate, WebRtc_UWord8 packetLoss) -{ - - // Sum the target bitrate and incoming frame rate: - // these values are the encoder rates (from previous update ~1sec), - // i.e, before the update for next ~1sec - _sumTargetRate += _targetBitRate; - _sumIncomingFrameRate += _incomingFrameRate; - _updateRateCnt++; - - // Sum the received (from RTCP reports) packet loss rates - _sumPacketLoss += (float) packetLoss / 255.0f; - - // Convert average sent bitrate to kbps - float avgSentBitRatekbps = avgSentBitRate / 1000.0f; - - // Sum the sequence rate mismatch: - // Mismatch here is based on difference between target rate the encoder - // used (in previous ~1sec) and the average actual - // encoding rate measured at current time - if (fabs(_targetBitRate - avgSentBitRatekbps) < THRESH_SUM_MM && - _targetBitRate > 0.0 ) - { - _sumSeqRateMM += (float) - (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate ); - } - - // Update QM with the current new target and frame rate: - // these values are ones the encoder will use for the current/next ~1sec - _targetBitRate = targetBitRate; - _incomingFrameRate = incomingFrameRate; - - // Update QM with an (average) encoder per_frame_bandwidth: - // this is the per_frame_bw for the current/next ~1sec - _perFrameBandwidth = 0.0f; - if (_incomingFrameRate > 0.0f) - { - _perFrameBandwidth = _targetBitRate / _incomingFrameRate; - } - -} - -// Select the resolution factors: frame size and frame rate change: (QM modes) -// Selection is for going back up in resolution, or going down in. -WebRtc_Word32 -VCMQmResolution::SelectResolution(VCMResolutionScale** qm) -{ - if (!_init) - { - return VCM_UNINITIALIZED; - } - if (_contentMetrics == NULL) - { - Reset(); //default values - *qm = _qm; - return VCM_OK; - } - - // Default settings - _qm->spatialWidthFact = 1; - _qm->spatialHeightFact = 1; - _qm->temporalFact = 1; - - // Update native values - _nativeWidth = _contentMetrics->nativeWidth; - _nativeHeight = _contentMetrics->nativeHeight; - _nativeFrameRate = _contentMetrics->nativeFrameRate; - - float avgTargetRate = 0.0f; - float avgIncomingFrameRate = 0.0f; - float ratioBufferLow = 0.0f; - float rateMisMatch = 0.0f; - float avgPacketLoss = 0.0f; - if (_frameCnt > 0) - { - ratioBufferLow = (float)_lowBufferCnt / (float)_frameCnt; - } - if (_updateRateCnt > 0) - { - // Use seq-rate mismatch for now - rateMisMatch = (float)_sumSeqRateMM / (float)_updateRateCnt; - //rateMisMatch = (float)_sumFrameRateMM / (float)_frameCntDelta; - - // Average target and incoming frame rates - avgTargetRate = (float)_sumTargetRate / (float)_updateRateCnt; - avgIncomingFrameRate = (float)_sumIncomingFrameRate / - (float)_updateRateCnt; - - // Average received packet loss rate - avgPacketLoss = (float)_sumPacketLoss / (float)_updateRateCnt; - } - - // For QM selection below, may want to weight the average encoder rates - // with the current (for next ~1sec) rate values. - // Uniform average for now: - float w1 = 0.5f; - float w2 = 0.5f; - avgTargetRate = w1 * avgTargetRate + w2 * _targetBitRate; - avgIncomingFrameRate = w1 * avgIncomingFrameRate + w2 * _incomingFrameRate; - - // Set the maximum transitional rate and image type: - // for up-sampled spatial dimensions. - // This is needed to get the transRate for going back up in - // spatial resolution (only 2x2 allowed in this version). - WebRtc_UWord8 imageType2 = GetImageType(2 * _width, 2 * _height); - WebRtc_UWord32 maxRateQM2 = kMaxRateQm[imageType2]; - - // Set the maximum transitional rate and image type: - // for the encoder spatial dimensions. - WebRtc_UWord32 maxRateQM = kMaxRateQm[_imageType]; - - // Compute class state of the content. - MotionNFD(); - Spatial(); - - // - // Get transitional rate from table, based on image type and content class. - // - - // Get image class and content class: for going down spatially - WebRtc_UWord8 imageClass = 1; - if (_imageType <= 3) imageClass = 0; - WebRtc_UWord8 contentClass = 3 * _motion.level + _spatial.level; - WebRtc_UWord8 tableIndex = imageClass * 9 + contentClass; - float scaleTransRate = kScaleTransRateQm[tableIndex]; - - // Get image class and content class: for going up spatially - WebRtc_UWord8 imageClass2 = 1; - if (imageType2 <= 3) - { - imageClass2 = 0; - } - WebRtc_UWord8 tableIndex2 = imageClass2 * 9 + contentClass; - float scaleTransRate2 = kScaleTransRateQm[tableIndex2]; - - // Transitonal rate for going down - WebRtc_UWord32 estimatedTransRateDown = static_cast - (_incomingFrameRate * scaleTransRate * maxRateQM / 30); - - // Transitional rate for going up temporally - WebRtc_UWord32 estimatedTransRateUpT = static_cast - (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate * - scaleTransRate * maxRateQM / 30); - - // Transitional rate for going up spatially - WebRtc_UWord32 estimatedTransRateUpS = static_cast - (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate * - scaleTransRate2 * maxRateQM2 / 30); - - // - // Done with transitional rates - // - - // - //CHECK FOR GOING BACK UP IN RESOLUTION - // - bool selectedUp = false; - // Check if native has been spatially down-sampled - if (_stateDecFactorSpatial > 1) - { - // Check conditions on buffer level and rate_mismatch - if ( (avgTargetRate > estimatedTransRateUpS) && - (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM)) - { - // width/height scaled back up: - // setting 0 indicates scaling back to native - _qm->spatialHeightFact = 0; - _qm->spatialWidthFact = 0; - selectedUp = true; - } - } - //Check if native has been temporally down-sampled - if (_stateDecFactorTemp > 1) - { - if ( (avgTargetRate > estimatedTransRateUpT) && - (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM)) - { - // temporal scale back up: - // setting 0 indicates scaling back to native - _qm->temporalFact = 0; - selectedUp = true; - } - } - - // Leave QM if we selected to go back up in either spatial or temporal - if (selectedUp == true) - { - // Update down-sampling state - // Note: only temp reduction by 2 is allowed - if (_qm->temporalFact == 0) - { - _stateDecFactorTemp = _stateDecFactorTemp / 2; - } - // Update down-sampling state - // Note: only spatial reduction by 2x2 is allowed - if (_qm->spatialHeightFact == 0 && _qm->spatialWidthFact == 0 ) - { - _stateDecFactorSpatial = _stateDecFactorSpatial / 4; - } - *qm = _qm; - return VCM_OK; - } - - // - // Done with checking for going back up in resolution - // - - // - //CHECK FOR RESOLUTION REDUCTION - // - - // Resolution reduction if: - // (1) target rate is lower than transitional rate, or - // (2) buffer level is not stable, or - // (3) rate mismatch is larger than threshold - - // Bias down-sampling based on packet loss conditions - if (avgPacketLoss > LOSS_THR) - { - estimatedTransRateDown = LOSS_RATE_FAC * estimatedTransRateDown; - } - - if ((avgTargetRate < estimatedTransRateDown ) || - (ratioBufferLow > MAX_BUFFER_LOW) - || (rateMisMatch > MAX_RATE_MM)) - { - - WebRtc_UWord8 spatialFact = 1; - WebRtc_UWord8 tempFact = 1; - - // Get the action - spatialFact = kSpatialAction[contentClass]; - tempFact = kTemporalAction[contentClass]; - - switch(spatialFact) - { - case 4: - _qm->spatialWidthFact = 2; - _qm->spatialHeightFact = 2; - break; - case 2: - //default is 1x2 (H) - _qm->spatialWidthFact = 2; - _qm->spatialHeightFact = 1; - // Select 1x2,2x1, or back to 2x2 - // Note: directional selection not used in this version - // SelectSpatialDirectionMode((float) estimatedTransRateDown); - break; - default: - _qm->spatialWidthFact = 1; - _qm->spatialHeightFact = 1; - break; - } - _qm->temporalFact = tempFact; - - // Sanity check on ST QM selection: - // override the settings for too small image size and frame rate - // Also check the limit on current down-sampling state - - // No spatial sampling if image size is too small (QCIF) - if ( (_width * _height) <= MIN_IMAGE_SIZE || - _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT) - { - _qm->spatialWidthFact = 1; - _qm->spatialHeightFact = 1; - } - - // No frame rate reduction below some point: - // use the (average) incoming frame rate - if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM || - _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT) - { - _qm->temporalFact = 1; - } - - // No down-sampling if current downsampling state is above threshold - if (_stateDecFactorTemp * _stateDecFactorSpatial >= - MAX_SPATIAL_TEMP_DOWN_FACT) - { - _qm->spatialWidthFact = 1; - _qm->spatialHeightFact = 1; - _qm->temporalFact = 1; - } - // - // Done with sanity checks on ST QM selection - // - - // Update down-sampling states - _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact - * _qm->spatialHeightFact; - _stateDecFactorTemp = _stateDecFactorTemp * _qm->temporalFact; - - if (_qm->spatialWidthFact != 1 || _qm->spatialHeightFact != 1 || - _qm->temporalFact != 1) - { - - WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1, - "Resolution reduction occurred" - "Content Metrics are: Motion = %d , Spatial = %d, " - "Rates are: Est. Trans. BR = %d, Avg.Target BR = %f", - _motion.level, _spatial.level, - estimatedTransRateDown, avgTargetRate); - } - - } - else - { + // Check for going back up in resolution, if we have had some down-sampling + // relative to native state in Initialize (i.e., after SetEncodingData() + // in mediaOpt.). + if (_stateDecFactorSpatial > 1 || _stateDecFactorTemp > 1) { + if (GoingUpResolution()) { *qm = _qm; return VCM_OK; } - // Done with checking for resolution reduction - - *qm = _qm; - return VCM_OK; - + } + // Check for going down in resolution, only if current total amount of + // down-sampling state is below threshold. + if (_stateDecFactorTemp * _stateDecFactorSpatial < kMaxDownSample) { + if (GoingDownResolution()) { + *qm = _qm; + return VCM_OK; + } + } + return VCM_OK; } -WebRtc_Word32 -VCMQmResolution::SelectSpatialDirectionMode(float transRate) -{ - // Default is 1x2 (H) +void VCMQmResolution::ComputeRatesForSelection() { + _avgTargetRate = 0.0f; + _avgIncomingFrameRate = 0.0f; + _avgRatioBufferLow = 0.0f; + _avgRateMisMatch = 0.0f; + _avgRateMisMatchSgn = 0.0f; + _avgPacketLoss = 0.0f; + if (_frameCnt > 0) { + _avgRatioBufferLow = static_cast(_lowBufferCnt) / + static_cast(_frameCnt); + } + if (_updateRateCnt > 0) { + _avgRateMisMatch = static_cast(_sumRateMM) / + static_cast(_updateRateCnt); + _avgRateMisMatchSgn = static_cast(_sumRateMMSgn) / + static_cast(_updateRateCnt); + _avgTargetRate = static_cast(_sumTargetRate) / + static_cast(_updateRateCnt); + _avgIncomingFrameRate = static_cast(_sumIncomingFrameRate) / + static_cast(_updateRateCnt); + _avgPacketLoss = static_cast(_sumPacketLoss) / + static_cast(_updateRateCnt); + } + // For selection we may want to weight some quantities more heavily + // with the current (i.e., next ~1sec) rate values. + float weight = 0.7f; + _avgTargetRate = weight * _avgTargetRate + (1.0 - weight) * _targetBitRate; + _avgIncomingFrameRate = weight * _avgIncomingFrameRate + + (1.0 - weight) * _incomingFrameRate; + _frameRateLevel = FrameRateLevel(_avgIncomingFrameRate); +} - // For bit rates well below transitional rate, we select 2x2 - if ( _targetBitRate < transRate * RATE_RED_SPATIAL_2X2 ) - { +void VCMQmResolution::ComputeEncoderState() { + // Default. + _encoderState = kStableEncoding; + + // Assign stressed state if: + // 1) occurrences of low buffer levels is high, or + // 2) rate mis-match is high, and consistent over-shooting by encoder. + if ((_avgRatioBufferLow > kMaxBufferLow) || + ((_avgRateMisMatch > kMaxRateMisMatch) && + (_avgRateMisMatchSgn < -kRateOverShoot))) { + _encoderState = kStressedEncoding; + } + // Assign easy state if: + // 1) rate mis-match is high, and + // 2) consistent under-shooting by encoder. + if ((_avgRateMisMatch > kMaxRateMisMatch) && + (_avgRateMisMatchSgn > kRateUnderShoot)) { + _encoderState = kEasyEncoding; + } +} + +bool VCMQmResolution::GoingUpResolution() { + // Check if we should go up both spatially and temporally. + if (_stateDecFactorSpatial > 1 && _stateDecFactorTemp > 1) { + if (ConditionForGoingUp(2, 2, 2, kTransRateScaleUpSpatialTemp)) { + _qm->spatialHeightFact = 0; + _qm->spatialWidthFact = 0; + _qm->temporalFact = 0; + UpdateDownsamplingState(kUpResolution); + return true; + } + } else { + // Check if we should go up either spatially or temporally. + bool selectedUpS = false; + bool selectedUpT = false; + if (_stateDecFactorSpatial > 1) { + selectedUpS = ConditionForGoingUp(2, 2, 1, kTransRateScaleUpSpatial); + } + if (_stateDecFactorTemp > 1) { + selectedUpT = ConditionForGoingUp(1, 1, 2, kTransRateScaleUpTemp); + } + if (selectedUpS && !selectedUpT) { + _qm->spatialHeightFact = 0; + _qm->spatialWidthFact = 0; + UpdateDownsamplingState(kUpResolution); + return true; + } else if (!selectedUpS && selectedUpT) { + _qm->temporalFact = 0; + UpdateDownsamplingState(kUpResolution); + return true; + } else if (selectedUpS && selectedUpT) { + // TODO(marpan): which one to pick? + // pickSpatialOrTemporal() + // For now take spatial over temporal. + _qm->spatialHeightFact = 0; + _qm->spatialWidthFact = 0; + UpdateDownsamplingState(kUpResolution); + return true; + } + } + return false; +} + +bool VCMQmResolution::ConditionForGoingUp(uint8_t facWidth, + uint8_t facHeight, + uint8_t facTemp, + float scaleFac) { + float estimatedTransitionRateUp = GetTransitionRate(facWidth, facHeight, + facTemp, scaleFac); + // Go back up if: + // 1) target rate is above threshold and current encoder state is stable, or + // 2) encoder state is easy (encoder is significantly under-shooting target). + if (((_avgTargetRate > estimatedTransitionRateUp) && + (_encoderState == kStableEncoding)) || + (_encoderState == kEasyEncoding)) { + return true; + } else { + return false; + } +} + +bool VCMQmResolution::GoingDownResolution() { + float estimatedTransitionRateDown = GetTransitionRate(1, 1, 1, 1.0); + float maxRate = kFrameRateFac[_frameRateLevel] * kMaxRateQm[_imageType]; + + // TODO(marpan): Bias down-sampling based on packet loss conditions. + + // Resolution reduction if: + // (1) target rate is below transition rate, or + // (2) encoder is in stressed state and target rate below a max threshold. + if ((_avgTargetRate < estimatedTransitionRateDown ) || + (_encoderState == kStressedEncoding && _avgTargetRate < maxRate)) { + // Get the down-sampling action. + uint8_t spatialFact = kSpatialAction[_contentClass]; + uint8_t tempFact = kTemporalAction[_contentClass]; + + switch (spatialFact) { + case 4: { _qm->spatialWidthFact = 2; _qm->spatialHeightFact = 2; - return VCM_OK; - } - - // Otherwise check prediction errors, aspect ratio, horizontalness - - float spatialErr = _contentMetrics->spatialPredErr; - float spatialErrH = _contentMetrics->spatialPredErrH; - float spatialErrV = _contentMetrics->spatialPredErrV; - - // Favor 1x2 if aspect_ratio is 16:9 - if (_aspectRatio >= 16.0f / 9.0f ) - { - //check if 1x2 has lowest prediction error - if (spatialErrH < spatialErr && spatialErrH < spatialErrV) - { - return VCM_OK; - } - } - - // Check for 2x2 selection: favor 2x2 over 1x2 and 2x1 - if (spatialErr < spatialErrH * (1.0f + SPATIAL_ERR_2X2_VS_H) && - spatialErr < spatialErrV * (1.0f + SPATIAL_ERR_2X2_VS_V)) - { - _qm->spatialWidthFact = 2; - _qm->spatialHeightFact = 2; - return VCM_OK; - } - - // Check for 2x1 selection: - if (spatialErrV < spatialErrH * (1.0f - SPATIAL_ERR_V_VS_H) && - spatialErrV < spatialErr * (1.0f - SPATIAL_ERR_2X2_VS_V)) - { + break; + } + case 2: { + assert(false); // Currently not used. + // Select 1x2,2x1, or 4/3x4/3. + // SelectSpatialDirectionMode((float) estimatedTransitionRateDown); + break; + } + case 1: { _qm->spatialWidthFact = 1; - _qm->spatialHeightFact = 2; - return VCM_OK; + _qm->spatialHeightFact = 1; + break; + } + default: { + assert(false); + } + } + switch (tempFact) { + case 2: { + _qm->temporalFact = 2; + break; + } + case 1: { + _qm->temporalFact = 1; + break; + } + default: { + assert(false); + } + } + // Adjust some cases based on frame rate. + // TODO(marpan): will be modified when we add 1/2 spatial and 2/3 temporal. + AdjustAction(); + + // Sanity checks on down-sampling selection: + // override the settings for too small image size and/or frame rate. + // Also check the limit on current down-sampling states. + + // No spatial sampling if current frame size is too small (QCIF), + // or if amount of spatial down-sampling is already too much. + if ((_width * _height) <= kMinImageSize || + _stateDecFactorSpatial >= kMaxSpatialDown) { + _qm->spatialWidthFact = 1; + _qm->spatialHeightFact = 1; + } + // No frame rate reduction if average frame rate is below some point, + // or if the amount of temporal down-sampling is already too much. + if (_avgIncomingFrameRate <= kMinFrameRate || + _stateDecFactorTemp >= kMaxTempDown) { + _qm->temporalFact = 1; } - return VCM_OK; + // Update down-sampling state. + if (_qm->spatialWidthFact != 1 || _qm->spatialHeightFact != 1 || + _qm->temporalFact != 1) { + UpdateDownsamplingState(kDownResolution); + return true; + } + } + return false; } -// DONE WITH RESOLUTION CLASS +float VCMQmResolution::GetTransitionRate(uint8_t facWidth, + uint8_t facHeight, + uint8_t facTemp, + float scaleFac) { + uint8_t imageType = GetImageType(facWidth * _width, + facHeight * _height); + LevelClass frameRateLevel = FrameRateLevel(facTemp * _avgIncomingFrameRate); + // The maximum allowed rate below which down-sampling is allowed: + // Nominal values based on image format (frame size and frame rate). + float maxRate = kFrameRateFac[frameRateLevel] * kMaxRateQm[imageType]; + + uint8_t imageClass = imageType > 3 ? 1: 0; + uint8_t tableIndex = imageClass * 9 + _contentClass; + // Scale factor for down-sampling transition threshold: + // factor based on the content class and the image size. + float scaleTransRate = kScaleTransRateQm[tableIndex]; + + // Threshold bitrate for resolution action. + return static_cast (scaleFac * facTemp * _incomingFrameRate * + scaleTransRate * maxRate / 30); +} + +void VCMQmResolution::UpdateDownsamplingState(ResolutionAction action) { + // Assumes for now only actions are 1/2 frame rate of 2x2 spatial. + if (action == kUpResolution) { + if (_qm->spatialHeightFact == 0 && _qm->spatialWidthFact == 0) { + _stateDecFactorSpatial = _stateDecFactorSpatial / 4; + assert(_stateDecFactorSpatial >= 1); + } + if (_qm->temporalFact == 0) { + _stateDecFactorTemp = _stateDecFactorTemp / 2; + assert(_stateDecFactorTemp >= 1); + } + } else if (action == kDownResolution) { + _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact + * _qm->spatialHeightFact; + _stateDecFactorTemp = _stateDecFactorTemp * _qm->temporalFact; + assert(_stateDecFactorSpatial >= 1); + assert(_stateDecFactorTemp >= 1); + } else { + assert(false); + } +} + +void VCMQmResolution::AdjustAction() { + if (_spatial.level == kDefault && _motion.level != kHigh && + _frameRateLevel == kHigh) { + _qm->temporalFact = 2; + _qm->spatialWidthFact = 1; + _qm->spatialHeightFact = 1; + } +} + +// TODO(marpan): Update this when we allow for 1/2 spatial down-sampling. +void VCMQmResolution::SelectSpatialDirectionMode(float transRate) { + // Default is 1x2 (H) + // For bit rates well below transitional rate, we select 2x2. + if (_targetBitRate < transRate * kRateRedSpatial2X2) { + _qm->spatialWidthFact = 2; + _qm->spatialHeightFact = 2; + } + // Otherwise check prediction errors and aspect ratio. + float spatialErr = 0.0; + float spatialErrH = 0.0; + float spatialErrV = 0.0; + if (_contentMetrics) { + spatialErr = _contentMetrics->spatial_pred_err; + spatialErrH = _contentMetrics->spatial_pred_err_h; + spatialErrV = _contentMetrics->spatial_pred_err_v; + } + + // Favor 1x2 if aspect_ratio is 16:9. + if (_aspectRatio >= 16.0f / 9.0f) { + // Check if 1x2 has lowest prediction error. + if (spatialErrH < spatialErr && spatialErrH < spatialErrV) { + _qm->spatialWidthFact = 2; + _qm->spatialHeightFact = 1; + } + } + // Check for 2x2 selection: favor 2x2 over 1x2 and 2x1. + if (spatialErr < spatialErrH * (1.0f + kSpatialErr2x2VsHoriz) && + spatialErr < spatialErrV * (1.0f + kSpatialErr2X2VsVert)) { + _qm->spatialWidthFact = 2; + _qm->spatialHeightFact = 2; + } + // Check for 2x1 selection. + if (spatialErrV < spatialErrH * (1.0f - kSpatialErrVertVsHoriz) && + spatialErrV < spatialErr * (1.0f - kSpatialErr2X2VsVert)) { + _qm->spatialWidthFact = 1; + _qm->spatialHeightFact = 2; + } +} // ROBUSTNESS CLASS -VCMQmRobustness::VCMQmRobustness() -{ - Reset(); +VCMQmRobustness::VCMQmRobustness() { + Reset(); } -VCMQmRobustness::~VCMQmRobustness() -{ - +VCMQmRobustness::~VCMQmRobustness() { } -void -VCMQmRobustness::Reset() -{ - _prevTotalRate = 0.0f; - _prevRttTime = 0; - _prevPacketLoss = 0; - _prevCodeRateDelta = 0; - ResetQM(); - return; +void VCMQmRobustness::Reset() { + _prevTotalRate = 0.0f; + _prevRttTime = 0; + _prevPacketLoss = 0; + _prevCodeRateDelta = 0; + ResetQM(); } // Adjust the FEC rate based on the content and the network state // (packet loss rate, total rate/bandwidth, round trip time). // Note that packetLoss here is the filtered loss value. -float -VCMQmRobustness::AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate, - float frameRate,WebRtc_UWord32 rttTime, - WebRtc_UWord8 packetLoss) -{ - // Default: no adjustment - float adjustFec = 1.0f; - - if (_contentMetrics == NULL) - { - return adjustFec; - } - - // Compute class state of the content. - MotionNFD(); - Spatial(); - - // TODO (marpan): Set FEC adjustment factor - - // Keep track of previous values of network state: - // adjustment may be also based on pattern of changes in network state - _prevTotalRate = totalRate; - _prevRttTime = rttTime; - _prevPacketLoss = packetLoss; - - _prevCodeRateDelta = codeRateDelta; - +float VCMQmRobustness::AdjustFecFactor(uint8_t codeRateDelta, + float totalRate, + float frameRate, + uint32_t rttTime, + uint8_t packetLoss) { + // Default: no adjustment + float adjustFec = 1.0f; + if (_contentMetrics == NULL) { return adjustFec; + } + // Compute class state of the content. + ComputeMotionNFD(); + ComputeSpatial(); + // TODO(marpan): Set FEC adjustment factor. + + // Keep track of previous values of network state: + // adjustment may be also based on pattern of changes in network state. + _prevTotalRate = totalRate; + _prevRttTime = rttTime; + _prevPacketLoss = packetLoss; + _prevCodeRateDelta = codeRateDelta; + return adjustFec; } -// Set the UEP (unequal-protection) on/off for the FEC -bool -VCMQmRobustness::SetUepProtection(WebRtc_UWord8 codeRateDelta, float totalRate, - WebRtc_UWord8 packetLoss, bool frameType) -{ - // Default: - bool uepProtection = false; - - if (_contentMetrics == NULL) - { - return uepProtection; - } - - - return uepProtection; +// Set the UEP (unequal-protection across packets) on/off for the FEC. +bool VCMQmRobustness::SetUepProtection(uint8_t codeRateDelta, + float totalRate, + uint8_t packetLoss, + bool frameType) { + // Default. + return false; } - -} // end of namespace +} // end of namespace diff --git a/src/modules/video_coding/main/source/qm_select.h b/src/modules/video_coding/main/source/qm_select.h index 3fb9040361..1859530adb 100644 --- a/src/modules/video_coding/main/source/qm_select.h +++ b/src/modules/video_coding/main/source/qm_select.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,194 +11,249 @@ #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ -#include "typedefs.h" #include "common_types.h" +#include "typedefs.h" + /******************************************************/ /* Quality Modes: Resolution and Robustness settings */ /******************************************************/ -namespace webrtc -{ - +namespace webrtc { struct VideoContentMetrics; -struct VCMResolutionScale -{ - VCMResolutionScale(): spatialWidthFact(1), spatialHeightFact(1), - temporalFact(1){} - - WebRtc_UWord16 spatialWidthFact; - WebRtc_UWord16 spatialHeightFact; - WebRtc_UWord16 temporalFact; +struct VCMResolutionScale { + VCMResolutionScale() + : spatialWidthFact(1), + spatialHeightFact(1), + temporalFact(1) { + } + uint8_t spatialWidthFact; + uint8_t spatialHeightFact; + uint8_t temporalFact; }; -enum VCMMagValues -{ - kLow, - kHigh, - kDefault //default do nothing mode +enum LevelClass { + kLow, + kHigh, + kDefault }; -struct VCMContFeature -{ - VCMContFeature(): value(0.0f), level(kDefault){} +struct VCMContFeature { + VCMContFeature() + : value(0.0f), + level(kDefault) { + } + void Reset() { + value = 0.0f; + level = kDefault; + } + float value; + LevelClass level; +}; - void Reset() - { - value = 0.0f; - level = kDefault; - } +enum ResolutionAction { + kDownResolution, + kUpResolution, + kNoChangeResolution +}; - float value; - VCMMagValues level; +enum EncoderState { + kStableEncoding, // Low rate mis-match, stable buffer levels. + kStressedEncoding, // Significant over-shooting of target rate, + // Buffer under-flow, etc. + kEasyEncoding // Significant under-shooting of target rate. }; // QmMethod class: main class for resolution and robustness settings -class VCMQmMethod -{ -public: - VCMQmMethod(); - virtual ~VCMQmMethod(); +class VCMQmMethod { + public: + VCMQmMethod(); + virtual ~VCMQmMethod(); - // Reset values - void ResetQM(); - virtual void Reset() = 0; + // Reset values + void ResetQM(); + virtual void Reset() = 0; - // Update with the content metrics - void UpdateContent(const VideoContentMetrics* contentMetrics); + // Compute content class. + uint8_t ComputeContentClass(); - // Compute spatial texture magnitude and level - void Spatial(); + // Update with the content metrics. + void UpdateContent(const VideoContentMetrics* contentMetrics); - // Compute motion magnitude and level - void Motion(); + // Compute spatial texture magnitude and level. + // Spatial texture is a spatial prediction error measure. + void ComputeSpatial(); - // Compute motion magnitude and level for NFD metric - void MotionNFD(); + // Compute motion magnitude and level for NFD metric. + // NFD is normalized frame difference (normalized by spatial variance). + void ComputeMotionNFD(); - // Compute coherence magnitude and level - void Coherence(); + // Get the imageType (CIF, VGA, HD, etc) for the system width/height. + uint8_t GetImageType(uint16_t width, uint16_t height); - // Get the imageType (CIF, VGA, HD, etc) for the system width/height - WebRtc_Word8 GetImageType(WebRtc_UWord32 width, WebRtc_UWord32 height); + // Get the frame rate level. + LevelClass FrameRateLevel(float frame_rate); - // Content Data - const VideoContentMetrics* _contentMetrics; - - // Encoder and native frame sizes, frame rate, aspect ratio, imageType - WebRtc_UWord32 _width; - WebRtc_UWord32 _height; - WebRtc_UWord32 _nativeWidth; - WebRtc_UWord32 _nativeHeight; - WebRtc_UWord32 _nativeFrameRate; - float _aspectRatio; - // Image type for the current encoder system size. - WebRtc_UWord8 _imageType; - - // Content L/M/H values. stationary flag - VCMContFeature _motion; - VCMContFeature _spatial; - VCMContFeature _coherence; - bool _stationaryMotion; - bool _init; + protected: + // Content Data. + const VideoContentMetrics* _contentMetrics; + // Encoder frame sizes and native frame sizes. + uint16_t _width; + uint16_t _height; + uint16_t _nativeWidth; + uint16_t _nativeHeight; + float _aspectRatio; + // Image type and frame rate leve, for the current encoder resolution. + uint8_t _imageType; + LevelClass _frameRateLevel; + // Content class data. + VCMContFeature _motion; + VCMContFeature _spatial; + uint8_t _contentClass; + bool _init; }; // Resolution settings class -class VCMQmResolution : public VCMQmMethod -{ -public: - VCMQmResolution(); - ~VCMQmResolution(); +class VCMQmResolution : public VCMQmMethod { + public: + VCMQmResolution(); + virtual ~VCMQmResolution(); - // Reset all quantities - virtual void Reset(); + // Reset all quantities. + virtual void Reset(); - // Reset rate quantities and counter values after every Select Quality call - void ResetRates(); + // Reset rate quantities and counters after every SelectResolution() call. + void ResetRates(); - // Initialize rate control quantities after re-init of encoder. - WebRtc_Word32 Initialize(float bitRate, float userFrameRate, - WebRtc_UWord32 width, WebRtc_UWord32 height); + // Reset down-sampling state. + void ResetDownSamplingState(); - // Update QM with actual bit rate (size of the latest encoded frame) - // and frame type, after every encoded frame. - void UpdateEncodedSize(WebRtc_Word64 encodedSize, - FrameType encodedFrameType); + // Get the encoder state. + EncoderState GetEncoderState(); - // Update QM with new bit/frame/loss rates every ~1 sec from SetTargetRates - void UpdateRates(float targetBitRate, float avgSentRate, - float incomingFrameRate, WebRtc_UWord8 packetLoss); + // Initialize after SetEncodingData in media_opt. + int Initialize(float bitRate, float userFrameRate, + uint16_t width, uint16_t height); - // Extract ST (spatio-temporal) QM behavior and make decision - // Inputs: qm: Reference to the quality modes pointer - // Output: the spatial and/or temporal scale change - WebRtc_Word32 SelectResolution(VCMResolutionScale** qm); + // Update the encoder frame size. + void UpdateCodecFrameSize(uint16_t width, uint16_t height); - // Select 1x2,2x2,2x2 spatial sampling mode - WebRtc_Word32 SelectSpatialDirectionMode(float transRate); + // Update with actual bit rate (size of the latest encoded frame) + // and frame type, after every encoded frame. + void UpdateEncodedSize(int encodedSize, + FrameType encodedFrameType); -private: - // Encoder rate control parameter - float _targetBitRate; - float _userFrameRate; - float _incomingFrameRate; - float _perFrameBandwidth; - float _bufferLevel; + // Update with new target bitrate, actual encoder sent rate, frame_rate, + // loss rate: every ~1 sec from SetTargetRates in media_opt. + void UpdateRates(float targetBitRate, float encoderSentRate, + float incomingFrameRate, uint8_t packetLoss); - // Data accumulated every ~1sec from MediaOpt - float _sumTargetRate; - float _sumIncomingFrameRate; - float _sumSeqRateMM; - float _sumFrameRateMM; - float _sumPacketLoss; - WebRtc_Word64 _sumEncodedBytes; + // Extract ST (spatio-temporal) resolution action. + // Inputs: qm: Reference to the quality modes pointer. + // Output: the spatial and/or temporal scale change. + int SelectResolution(VCMResolutionScale** qm); - // Resolution state parameters - WebRtc_UWord8 _stateDecFactorSpatial; - WebRtc_UWord8 _stateDecFactorTemp; + // Compute rates for the selection of down-sampling action. + void ComputeRatesForSelection(); - // Counters - WebRtc_UWord32 _frameCnt; - WebRtc_UWord32 _frameCntDelta; - WebRtc_UWord32 _updateRateCnt; - WebRtc_UWord32 _lowBufferCnt; + // Compute the encoder state. + void ComputeEncoderState(); - VCMResolutionScale* _qm; + // Return true if the action is to go back up in resolution. + bool GoingUpResolution(); + + // Return true if the action is to go down in resolution. + bool GoingDownResolution(); + + // Check the condition for going up in resolution by the scale factors: + // |facWidth|, |facHeight|, |facTemp|. + // |scaleFac| is a scale factor for the transition rate. + bool ConditionForGoingUp(uint8_t facWidth, uint8_t facHeight, + uint8_t facTemp, + float scaleFac); + + // Get the bitrate threshold for the resolution action. + // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action. + // |scaleFac| is a scale factor for the transition rate. + float GetTransitionRate(uint8_t facWidth, uint8_t facHeight, + uint8_t facTemp, float scaleFac); + + // Update the downsampling state. + void UpdateDownsamplingState(ResolutionAction action); + + void AdjustAction(); + + // Select the directional (1x2 or 2x1) spatial down-sampling action. + void SelectSpatialDirectionMode(float transRate); + + private: + VCMResolutionScale* _qm; + // Encoder rate control parameters. + float _targetBitRate; + float _userFrameRate; + float _incomingFrameRate; + float _perFrameBandwidth; + float _bufferLevel; + + // Data accumulated every ~1sec from MediaOpt. + float _sumTargetRate; + float _sumIncomingFrameRate; + float _sumRateMM; + float _sumRateMMSgn; + float _sumPacketLoss; + // Counters. + uint32_t _frameCnt; + uint32_t _frameCntDelta; + uint32_t _updateRateCnt; + uint32_t _lowBufferCnt; + + // Resolution state parameters. + uint8_t _stateDecFactorSpatial; + uint8_t _stateDecFactorTemp; + + // Quantities used for selection. + float _avgTargetRate; + float _avgIncomingFrameRate; + float _avgRatioBufferLow; + float _avgRateMisMatch; + float _avgRateMisMatchSgn; + float _avgPacketLoss; + EncoderState _encoderState; }; -// Robustness settings class +// Robustness settings class. -class VCMQmRobustness : public VCMQmMethod -{ -public: - VCMQmRobustness(); - ~VCMQmRobustness(); +class VCMQmRobustness : public VCMQmMethod { + public: + VCMQmRobustness(); + ~VCMQmRobustness(); - virtual void Reset(); + virtual void Reset(); - // Adjust FEC rate based on content: every ~1 sec from SetTargetRates. - // Returns an adjustment factor. - float AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate, - float frameRate, WebRtc_UWord32 rttTime, - WebRtc_UWord8 packetLoss); + // Adjust FEC rate based on content: every ~1 sec from SetTargetRates. + // Returns an adjustment factor. + float AdjustFecFactor(uint8_t codeRateDelta, + float totalRate, + float frameRate, + uint32_t rttTime, + uint8_t packetLoss); - // Set the UEP protection on/off - bool SetUepProtection(WebRtc_UWord8 codeRateDelta, float totalRate, - WebRtc_UWord8 packetLoss, bool frameType); + // Set the UEP protection on/off. + bool SetUepProtection(uint8_t codeRateDelta, + float totalRate, + uint8_t packetLoss, + bool frameType); -private: - // Previous state of network parameters - float _prevTotalRate; - WebRtc_UWord32 _prevRttTime; - WebRtc_UWord8 _prevPacketLoss; - - // Previous FEC rate - WebRtc_UWord8 _prevCodeRateDelta; + private: + // Previous state of network parameters. + float _prevTotalRate; + uint32_t _prevRttTime; + uint8_t _prevPacketLoss; + uint8_t _prevCodeRateDelta; }; +} // namespace webrtc +#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ -} // namespace webrtc - -#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ diff --git a/src/modules/video_coding/main/source/qm_select_data.h b/src/modules/video_coding/main/source/qm_select_data.h index 64870eabed..d4af642291 100644 --- a/src/modules/video_coding/main/source/qm_select_data.h +++ b/src/modules/video_coding/main/source/qm_select_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -18,171 +18,167 @@ #include "typedefs.h" -namespace webrtc -{ - +namespace webrtc { // // PARAMETERS FOR RESOLUTION ADAPTATION // -// Initial level of buffer in secs: should corresponds to wrapper settings -#define INIT_BUFFER_LEVEL 0.5 +// Initial level of buffer in secs: should corresponds to wrapper settings. +const float kInitBufferLevel = 0.5f; -// Optimal level of buffer in secs: should corresponds to wrapper settings -#define OPT_BUFFER_LEVEL 0.6 +// Optimal level of buffer in secs: should corresponds to wrapper settings. +const float kOptBufferLevel = 0.6f; -// Threshold of (max) buffer size below which we consider too low (underflow) -#define PERC_BUFFER_THR 0.10 +// Threshold of (max) buffer size below which we consider too low (underflow). +const float kPercBufferThr = 0.10f; + +// Threshold on the occurrences of low buffer levels. +const float kMaxBufferLow = 0.5f; // Threshold on rate mismatch -#define MAX_RATE_MM 0.5 +const float kMaxRateMisMatch = 0.5f; -// Avoid outliers in seq-rate MM -#define THRESH_SUM_MM 1000 +// Threshold on amount of under/over encoder shooting. +const float kRateOverShoot = 0.75f; +const float kRateUnderShoot = 0.75f; -// Threshold on the occurrences of low buffer levels -#define MAX_BUFFER_LOW 0.5 +// Factor for transitional rate for going back up in resolution. +const float kTransRateScaleUpSpatial = 1.25f; +const float kTransRateScaleUpTemp = 1.25f; +const float kTransRateScaleUpSpatialTemp = 1.25f; -// Factor for transitional rate for going back up in resolution -#define TRANS_RATE_SCALE_UP_SPATIAL 1.25 -#define TRANS_RATE_SCALE_UP_TEMP 1.25 +// Threshold on packet loss rate, above which favor resolution reduction. +const float kPacketLossThr = 0.1f; -// Threshold on packet loss rate, above which favor resolution reduction -#define LOSS_THR 0.1 - -// Factor for reducing transitonal bitrate under packet loss -#define LOSS_RATE_FAC 1.0 +// Factor for reducing transitonal bitrate under packet loss. +const float kPacketLossRateFac = 1.0f; // Maximum possible transitional rate for down-sampling: -// (units in kbps), for 30fps -const WebRtc_UWord16 kMaxRateQm[7] = { - 100, //QCIF - 500, //CIF - 800, //VGA - 1500, //4CIF - 2000, //720 HD 4:3, - 2500, //720 HD 16:9 - 3000 //1080HD +// (units in kbps), for 30fps. +const uint16_t kMaxRateQm[7] = { + 100, // QCIF + 250, // CIF + 500, // VGA + 800, // 4CIF + 1000, // 720 HD 4:3, + 1500, // 720 HD 16:9 + 2000 // 1080HD +}; + +// Frame rate scale for maximum transition rate. +const float kFrameRateFac[3] = { + 0.7f, // L + 1.0f, // H + 0.8f // D }; // Scale for transitional rate: based on content class // motion=L/H/D,spatial==L/H/D: for low, high, middle levels const float kScaleTransRateQm[18] = { - //4CIF and lower - 0.25f, // L, L - 0.75f, // L, H - 0.75f, // L, D - 0.75f, // H ,L - 0.50f, // H, H - 0.50f, // H, D + // 4CIF and lower + 0.50f, // L, L + 0.50f, // L, H + 0.50f, // L, D + 0.50f, // H ,L + 0.25f, // H, H + 0.25f, // H, D 0.50f, // D, L - 0.63f, // D, D + 0.50f, // D, D 0.25f, // D, H - //over 4CIF: WHD, HD - 0.25f, // L, L - 0.75f, // L, H - 0.75f, // L, D - 0.75f, // H ,L - 0.50f, // H, H - 0.50f, // H, D + // over 4CIF: WHD, HD + 0.50f, // L, L + 0.50f, // L, H + 0.50f, // L, D + 0.50f, // H ,L + 0.25f, // H, H + 0.25f, // H, D 0.50f, // D, L - 0.63f, // D, D - 0.25f // D, H + 0.50f, // D, D + 0.25f, // D, H }; // Action for down-sampling: // motion=L/H/D,spatial==L/H/D: for low, high, middle levels -const WebRtc_UWord8 kSpatialAction[9] = { - 1, // L, L - 1, // L, H - 1, // L, D - 4, // H ,L - 1, // H, H - 4, // H, D - 4, // D, L - 1, // D, D - 1, // D, H +const uint8_t kSpatialAction[9] = { + 1, // L, L + 1, // L, H + 1, // L, D + 4, // H ,L + 1, // H, H + 4, // H, D + 4, // D, L + 1, // D, H + 1, // D, D }; -const WebRtc_UWord8 kTemporalAction[9] = { - 1, // L, L - 2, // L, H - 2, // L, D - 1, // H ,L - 2, // H, H - 1, // H, D - 1, // D, L - 2, // D, D - 1, // D, H +const uint8_t kTemporalAction[9] = { + 1, // L, L + 2, // L, H + 2, // L, D + 1, // H ,L + 2, // H, H + 1, // H, D + 1, // D, L + 2, // D, H + 1, // D, D }; -// Control the total amount of down-sampling allowed -#define MAX_SPATIAL_DOWN_FACT 4 -#define MAX_TEMP_DOWN_FACT 4 -#define MAX_SPATIAL_TEMP_DOWN_FACT 8 +// Control the total amount of down-sampling allowed. +const int kMaxSpatialDown = 16; +const int kMaxTempDown = 4; +const int kMaxDownSample = 16; -// Minimum image size for a spatial down-sampling: -// no spatial down-sampling if input size <= MIN_IMAGE_SIZE -#define MIN_IMAGE_SIZE 25344 //176*144 +// Minimum image size for a spatial down-sampling. +const int kMinImageSize= 176 * 144; // Minimum frame rate for temporal down-sampling: // no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE -#define MIN_FRAME_RATE_QM 8 +const int kMinFrameRate = 8; // Boundaries for the closest standard frame size -const WebRtc_UWord32 kFrameSizeTh[6] = { - 63360, //between 176*144 and 352*288 - 204288, //between 352*288 and 640*480 - 356352, //between 640*480 and 704*576 - 548352, //between 704*576 and 960*720 - 806400, //between 960*720 and 1280*720 +const uint32_t kFrameSizeTh[6] = { + 63360, // between 176*144 and 352*288 + 204288, // between 352*288 and 640*480 + 356352, // between 640*480 and 704*576 + 548352, // between 704*576 and 960*720 + 806400, // between 960*720 and 1280*720 1497600, // between 1280*720 and 1920*1080 }; - // // PARAMETERS FOR FEC ADJUSTMENT: TODO (marpan) // - // // PARAMETETS FOR SETTING LOW/HIGH STATES OF CONTENT METRICS: // -// Threshold to determine if high amount of zero_motion -#define HIGH_ZERO_MOTION_SIZE 0.95 - -// Thresholds for motion: -// motion level is derived from motion vectors: motion = size_nz*magn_nz -#define HIGH_MOTION 0.7 -#define LOW_MOTION 0.4 +// Thresholds for frame rate: +const int kLowFrameRate = 10; +const int kHighFrameRate = 25; // Thresholds for motion: motion level is from NFD -#define HIGH_MOTION_NFD 0.075 -#define LOW_MOTION_NFD 0.04 +const float kHighMotionNfd = 0.075f; +const float kLowMotionNfd = 0.04f; // Thresholds for spatial prediction error: -// this is appLied on the min(2x2,1x2,2x1) -#define HIGH_TEXTURE 0.035 -#define LOW_TEXTURE 0.025 +// this is applied on the min(2x2,1x2,2x1) +const float kHighTexture = 0.035f; +const float kLowTexture = 0.025f; -// Used to reduce thresholds for HD scenes: correction factor since higher -// correlation in HD scenes means lower spatial prediction error -#define SCALE_TEXTURE_HD 0.9; - -// Thresholds for distortion and horizontalness: -// applied on product: horiz_nz/dist_nz -#define COHERENCE_THR 1.0 -#define COH_MAX 10 +// Used to reduce thresholds for larger/HD scenes: correction factor since +// higher correlation in HD scenes means lower spatial prediction error. +const float kScaleTexture = 0.9f; // percentage reduction in transitional bitrate for 2x2 selected over 1x2/2x1 -#define RATE_RED_SPATIAL_2X2 0.6 +const float kRateRedSpatial2X2 = 0.6f; -#define SPATIAL_ERR_2X2_VS_H 0.1 //percentage to favor 2x2 -#define SPATIAL_ERR_2X2_VS_V 0.1 //percentage to favor 2x2 over V -#define SPATIAL_ERR_V_VS_H 0.1 //percentage to favor H over V +const float kSpatialErr2x2VsHoriz = 0.1f; // percentage to favor 2x2 over H +const float kSpatialErr2X2VsVert = 0.1f; // percentage to favor 2x2 over V +const float kSpatialErrVertVsHoriz = 0.1f; // percentage to favor H over V -} // namespace webrtc +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_CODING_SOURCE_QM_SELECT_DATA_H_ -#endif // WEBRTC_MODULES_VIDEO_CODING_SOURCE_QM_SELECT_DATA_H_ diff --git a/src/modules/video_coding/main/source/qm_select_unittest.cc b/src/modules/video_coding/main/source/qm_select_unittest.cc new file mode 100644 index 0000000000..3e0cef4d6d --- /dev/null +++ b/src/modules/video_coding/main/source/qm_select_unittest.cc @@ -0,0 +1,834 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file includes unit tests the QmResolution class + * In particular, for the selection of spatial and/or temporal down-sampling. + */ + +#include + +#include "modules/video_coding/main/source/qm_select.h" +#include "modules/interface/module_common_types.h" + +namespace webrtc { + +class QmSelectTest : public ::testing::Test { + protected: + QmSelectTest() + : qm_resolution_(new VCMQmResolution()), + content_metrics_(new VideoContentMetrics()), + qm_scale_(NULL) { + } + VCMQmResolution* qm_resolution_; + VideoContentMetrics* content_metrics_; + VCMResolutionScale* qm_scale_; + + void InitQmNativeData(float initial_bit_rate, int user_frame_rate, + int native_width, int native_height); + + void UpdateQmEncodedFrame(int* encoded_size, int num_updates); + + void UpdateQmRateData(int* target_rate, + int* encoder_sent_rate, + int* incoming_frame_rate, + uint8_t* fraction_lost, + int num_updates); + + void UpdateQmContentData(float motion_metric, + float spatial_metric, + float spatial_metric_horiz, + float spatial_metric_vert); + + bool IsSelectedActionCorrect(VCMResolutionScale* qm_scale, + uint8_t fac_width, + uint8_t fac_height, + uint8_t fac_temp); + + void TearDown() { + delete qm_resolution_; + delete content_metrics_; + } +}; + +TEST_F(QmSelectTest, HandleInputs) { + // Expect parameter error. Initialize with invalid inputs. + EXPECT_EQ(-4, qm_resolution_->Initialize(1000, 0, 640, 480)); + EXPECT_EQ(-4, qm_resolution_->Initialize(1000, 30, 640, 0)); + EXPECT_EQ(-4, qm_resolution_->Initialize(1000, 30, 0, 480)); + + // Expect uninitialized error.: No valid initialization before selection. + EXPECT_EQ(-7, qm_resolution_->SelectResolution(&qm_scale_)); + + VideoContentMetrics* content_metrics = NULL; + EXPECT_EQ(0, qm_resolution_->Initialize(1000, 30, 640, 480)); + qm_resolution_->UpdateContent(content_metrics); + // Content metrics are NULL: Expect success and no down-sampling action. + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// No down-sampling action at high rates. +TEST_F(QmSelectTest, NoActionHighRate) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(800, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {800, 800, 800}; + int encoder_sent_rate[] = {800, 800, 800}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + UpdateQmContentData(0.01, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(0, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// Rate is well below transition, down-sampling action is taken, +// depending on the content state. +TEST_F(QmSelectTest, DownActionLowRate) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial: 2x2 spatial expected. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, low spatial: no action expected: content is too low. + UpdateQmContentData(0.01, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(0, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Medium motion, low spatial: 2x2 spatial expected. + UpdateQmContentData(0.06, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(6, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + qm_resolution_->ResetDownSamplingState(); + // High motion, high spatial: 1/2 temporal expected. + UpdateQmContentData(0.1, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(4, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, high spatial: 1/2 temporal expected. + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + qm_resolution_->ResetDownSamplingState(); + // Medium motion, high spatial: 1/2 temporal expected. + UpdateQmContentData(0.06, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(7, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + qm_resolution_->ResetDownSamplingState(); + // High motion, medium spatial: 2x2 spatial expected. + UpdateQmContentData(0.1, 0.03, 0.03, 0.03); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(5, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, medium spatial: high frame rate, so 1/2 temporal expected. + UpdateQmContentData(0.01, 0.03, 0.03, 0.03); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(2, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + qm_resolution_->ResetDownSamplingState(); + // Medium motion, medium spatial: high frame rate, so 1/2 temporal expected. + UpdateQmContentData(0.06, 0.03, 0.03, 0.03); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(8, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); +} + +// Rate mis-match is high, and we have over-shooting. +// since target rate is below max for down-sampling, down-sampling is selected. +TEST_F(QmSelectTest, DownActionHighRateMMOvershoot) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(450, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {450, 450, 450}; + int encoder_sent_rate[] = {900, 900, 900}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStressedEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, high spatial + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); +} + +// Rate mis-match is high, target rate is below max for down-sampling, +// but since we have consistent under-shooting, no down-sampling action. +TEST_F(QmSelectTest, NoActionHighRateMMUndershoot) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(450, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {450, 450, 450}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kEasyEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, high spatial + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// Buffer is underflowing, and target rate is below max for down-sampling, +// so action is taken. +TEST_F(QmSelectTest, DownActionBufferUnderflow) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(450, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update with encoded size over a number of frames. + // per-frame bandwidth = 15 = 450/30: simulate (decoder) buffer underflow: + int encoded_size[] = {200, 100, 50, 30, 60, 40, 20, 30, 20, 40}; + UpdateQmEncodedFrame(encoded_size, 10); + + // Update rates for a sequence of intervals. + int target_rate[] = {450, 450, 450}; + int encoder_sent_rate[] = {450, 450, 450}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStressedEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, high spatial + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); +} + +// Target rate is below max for down-sampling, but buffer level is stable, +// so no action is taken. +TEST_F(QmSelectTest, NoActionBufferStable) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(450, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update with encoded size over a number of frames. + // per-frame bandwidth = 15 = 450/30: simulate stable (decoder) buffer levels. + int32_t encoded_size[] = {40, 10, 10, 16, 18, 20, 17, 20, 16, 15}; + UpdateQmEncodedFrame(encoded_size, 10); + + // Update rates for a sequence of intervals. + int target_rate[] = {450, 450, 450}; + int encoder_sent_rate[] = {450, 450, 450}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); + + qm_resolution_->ResetDownSamplingState(); + // Low motion, high spatial + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// Very low rate, but no spatial down-sampling below some size (QCIF). +TEST_F(QmSelectTest, LimitDownSpatialAction) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(10, 30, 176, 144); + + // Update with encoder frame size. + uint16_t codec_width = 176; + uint16_t codec_height = 144; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(0, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {10, 10, 10}; + int encoder_sent_rate[] = {10, 10, 10}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// Very low rate, but no frame reduction below some frame_rate (8fps). +TEST_F(QmSelectTest, LimitDownTemporalAction) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(10, 8, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {10, 10, 10}; + int encoder_sent_rate[] = {10, 10, 10}; + int incoming_frame_rate[] = {8, 8, 8}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // Low motion, medium spatial. + UpdateQmContentData(0.01, 0.03, 0.03, 0.03); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(2, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +// Two stages: spatial down-sample and then back up spatially, +// as rate as increased. +TEST_F(QmSelectTest, 2StageDownSpatialUpSpatial) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset and go up in rate: expected to go back up. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(320, 240); + EXPECT_EQ(1, qm_resolution_->GetImageType(320, 240)); + // Update rates for a sequence of intervals. + int target_rate2[] = {400, 400, 400, 400, 400}; + int encoder_sent_rate2[] = {400, 400, 400, 400, 400}; + int incoming_frame_rate2[] = {30, 30, 30, 30, 30}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 0, 0, 1)); +} + +// Two stages: spatial down-sample and then back up spatially, since encoder +// is under-shooting target even though rate has not increased much. +TEST_F(QmSelectTest, 2StageDownSpatialUpSpatialUndershoot) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset rates and simulate under-shooting scenario.: expect to go back up. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(320, 240); + EXPECT_EQ(1, qm_resolution_->GetImageType(320, 240)); + // Update rates for a sequence of intervals. + int target_rate2[] = {200, 200, 200, 200, 200}; + int encoder_sent_rate2[] = {50, 50, 50, 50, 50}; + int incoming_frame_rate2[] = {30, 30, 30, 30, 30}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kEasyEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 0, 0, 1)); +} + +// Two stages: spatial down-sample and then no action to go up, +// as encoding rate mis-match is too high. +TEST_F(QmSelectTest, 2StageDownSpatialNoActionUp) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset and simulate large rate mis-match: expect no action to go back up. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(320, 240); + EXPECT_EQ(1, qm_resolution_->GetImageType(320, 240)); + // Update rates for a sequence of intervals. + int target_rate2[] = {400, 400, 400, 400, 400}; + int encoder_sent_rate2[] = {1000, 1000, 1000, 1000, 1000}; + int incoming_frame_rate2[] = {30, 30, 30, 30, 30}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kStressedEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} +// Two stages: temporally down-sample and then back up temporally, +// as rate as increased. +TEST_F(QmSelectTest, 2StatgeDownTemporalUpTemporal) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // Low motion, high spatial. + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + // Reset rates and go up in rate: expect to go back up. + qm_resolution_->ResetRates(); + // Update rates for a sequence of intervals. + int target_rate2[] = {400, 400, 400, 400, 400}; + int encoder_sent_rate2[] = {400, 400, 400, 400, 400}; + int incoming_frame_rate2[] = {15, 15, 15, 15, 15}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 0)); +} + +// Two stages: temporal down-sample and then back up temporally, since encoder +// is under-shooting target even though rate has not increased much. +TEST_F(QmSelectTest, 2StatgeDownTemporalUpTemporalUndershoot) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // Low motion, high spatial. + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + // Reset rates and simulate under-shooting scenario.: expect to go back up. + qm_resolution_->ResetRates(); + // Update rates for a sequence of intervals. + int target_rate2[] = {200, 200, 200, 200, 200}; + int encoder_sent_rate2[] = {50, 50, 50, 50, 50}; + int incoming_frame_rate2[] = {15, 15, 15, 15, 15}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kEasyEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 0)); +} + +// Two stages: temporal down-sample and then no action to go up, +// as encoding rate mis-match is too high. +TEST_F(QmSelectTest, 2StageDownTemporalNoActionUp) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // Low motion, high spatial. + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + // Reset and simulate large rate mis-match: expect no action to go back up. + qm_resolution_->ResetRates(); + // Update rates for a sequence of intervals. + int target_rate2[] = {600, 600, 600, 600, 600}; + int encoder_sent_rate2[] = {1000, 1000, 1000, 1000, 1000}; + int incoming_frame_rate2[] = {15, 15, 15, 15, 15}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(kStressedEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} +// 3 stages: spatial down-sample, followed by temporal down-sample, +// and then go up to full state, as encoding rate has increased. +TEST_F(QmSelectTest, 3StageDownSpatialTemporlaUpSpatialTemporal) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(100, 30, 640, 480); + + // Update with encoder frame size. + uint16_t codec_width = 640; + uint16_t codec_height = 480; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(2, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {100, 100, 100}; + int encoder_sent_rate[] = {100, 100, 100}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset rate and change content data: expect temporal down-sample. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(320, 240); + EXPECT_EQ(1, qm_resolution_->GetImageType(320, 240)); + + // Update content: motion level, and 3 spatial prediction errors. + // Low motion, high spatial. + UpdateQmContentData(0.01, 0.1, 0.1, 0.1); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 2)); + + // Reset rates and go high up in rate: expect to go back up both spatial + // and temporally. + qm_resolution_->ResetRates(); + // Update rates for a sequence of intervals. + int target_rate2[] = {1000, 1000, 1000, 1000, 1000}; + int encoder_sent_rate2[] = {1000, 1000, 1000, 1000, 1000}; + int incoming_frame_rate2[] = {15, 15, 15, 15, 15}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(1, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 0, 0, 0)); +} + +// No down-sampling below some totol amount (factor of 16) +TEST_F(QmSelectTest, NoActionTooMuchDownSampling) { + // Initialize with bitrate, frame rate, and native system width/height. + InitQmNativeData(400, 30, 1280, 720); + + // Update with encoder frame size. + uint16_t codec_width = 1280; + uint16_t codec_height = 720; + qm_resolution_->UpdateCodecFrameSize(codec_width, codec_height); + EXPECT_EQ(5, qm_resolution_->GetImageType(codec_width, codec_height)); + + // Update rates for a sequence of intervals. + int target_rate[] = {400, 400, 400}; + int encoder_sent_rate[] = {400, 400, 400}; + int incoming_frame_rate[] = {30, 30, 30}; + uint8_t fraction_lost[] = {10, 10, 10}; + UpdateQmRateData(target_rate, encoder_sent_rate, incoming_frame_rate, + fraction_lost, 3); + + // Update content: motion level, and 3 spatial prediction errors. + // High motion, low spatial: 2x2 spatial expected. + UpdateQmContentData(0.1, 0.01, 0.01, 0.01); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset and lower rates to get another spatial action. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(640, 360); + EXPECT_EQ(2, qm_resolution_->GetImageType(640, 360)); + // Update rates for a sequence of intervals. + int target_rate2[] = {100, 100, 100, 100, 100}; + int encoder_sent_rate2[] = {100, 100, 100, 100, 100}; + int incoming_frame_rate2[] = {30, 30, 30, 30, 30}; + uint8_t fraction_lost2[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate2, encoder_sent_rate2, incoming_frame_rate2, + fraction_lost2, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 2, 2, 1)); + + // Reset and go to low rate: no action should be taken, + // we went down too much already. + qm_resolution_->ResetRates(); + qm_resolution_->UpdateCodecFrameSize(320, 180); + EXPECT_EQ(0, qm_resolution_->GetImageType(320, 180)); + // Update rates for a sequence of intervals. + int target_rate3[] = {10, 10, 10, 10, 10}; + int encoder_sent_rate3[] = {10, 10, 10, 10, 10}; + int incoming_frame_rate3[] = {30, 30, 30, 30, 30}; + uint8_t fraction_lost3[] = {10, 10, 10, 10, 10}; + UpdateQmRateData(target_rate3, encoder_sent_rate3, incoming_frame_rate3, + fraction_lost3, 5); + EXPECT_EQ(0, qm_resolution_->SelectResolution(&qm_scale_)); + EXPECT_EQ(3, qm_resolution_->ComputeContentClass()); + EXPECT_EQ(kStableEncoding, qm_resolution_->GetEncoderState()); + EXPECT_TRUE(IsSelectedActionCorrect(qm_scale_, 1, 1, 1)); +} + +void QmSelectTest::InitQmNativeData(float initial_bit_rate, + int user_frame_rate, + int native_width, + int native_height) { + EXPECT_EQ(0, qm_resolution_->Initialize(initial_bit_rate, user_frame_rate, + native_width, native_height)); +} + +void QmSelectTest::UpdateQmContentData(float motion_metric, + float spatial_metric, + float spatial_metric_horiz, + float spatial_metric_vert) { + content_metrics_->motion_magnitude = motion_metric; + content_metrics_->spatial_pred_err = spatial_metric; + content_metrics_->spatial_pred_err_h = spatial_metric_horiz; + content_metrics_->spatial_pred_err_v = spatial_metric_vert; + qm_resolution_->UpdateContent(content_metrics_); +} + +void QmSelectTest::UpdateQmEncodedFrame(int* encoded_size, int num_updates) { + FrameType frame_type = kVideoFrameDelta; + for (int i = 0; i < num_updates; i++) { + // Convert to bytes. + int32_t encoded_size_update = 1000 * encoded_size[i] / 8; + qm_resolution_->UpdateEncodedSize(encoded_size_update, frame_type); + } +} + +void QmSelectTest::UpdateQmRateData(int* target_rate, + int* encoder_sent_rate, + int* incoming_frame_rate, + uint8_t* fraction_lost, + int num_updates) { + for (int i = 0; i < num_updates; i++) { + float target_rate_update = target_rate[i]; + float encoder_sent_rate_update = encoder_sent_rate[i]; + float incoming_frame_rate_update = incoming_frame_rate[i]; + uint8_t fraction_lost_update = fraction_lost[i]; + qm_resolution_->UpdateRates(target_rate_update, + encoder_sent_rate_update, + incoming_frame_rate_update, + fraction_lost_update); + } +} + +// Check is the selected action from the QmResolution class is the same +// as the expected scales from |fac_width|, |fac_height|, |fac_temp|. +bool QmSelectTest::IsSelectedActionCorrect(VCMResolutionScale* qm_scale, + uint8_t fac_width, + uint8_t fac_height, + uint8_t fac_temp) { + if (qm_scale->spatialWidthFact == fac_width && + qm_scale->spatialHeightFact == fac_height && + qm_scale->temporalFact == fac_temp) { + return true; + } else { + return false; + } +} +} // namespace webrtc diff --git a/src/modules/video_coding/main/source/video_coding_test.gypi b/src/modules/video_coding/main/source/video_coding_test.gypi index a0ff037c52..7ab265c59f 100644 --- a/src/modules/video_coding/main/source/video_coding_test.gypi +++ b/src/modules/video_coding/main/source/video_coding_test.gypi @@ -83,6 +83,7 @@ 'jitter_buffer_unittest.cc', 'session_info_unittest.cc', 'video_coding_robustness_unittest.cc', + 'qm_select_unittest.cc', ], }, ], diff --git a/src/modules/video_processing/main/source/content_analysis.cc b/src/modules/video_processing/main/source/content_analysis.cc index 32ee09a18f..8ea319c24d 100644 --- a/src/modules/video_processing/main/source/content_analysis.cc +++ b/src/modules/video_processing/main/source/content_analysis.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -23,14 +23,10 @@ _width(0), _height(0), _skipNum(1), _border(8), -_motionMagnitudeNZ(0.0f), +_motionMagnitude(0.0f), _spatialPredErr(0.0f), _spatialPredErrH(0.0f), _spatialPredErrV(0.0f), -_sizeZeroMotion(0.0f), -_motionPredErr(0.0f), -_motionHorizontalness(0.0f), -_motionClusterDistortion(0.0f), _firstFrame(true), _CAInit(false), _cMetrics(NULL) @@ -224,7 +220,7 @@ VPMContentAnalysis::TemporalDiffMetric_C() } // default - _motionMagnitudeNZ = 0.0f; + _motionMagnitude = 0.0f; if (tempDiffSum == 0) { @@ -240,7 +236,7 @@ VPMContentAnalysis::TemporalDiffMetric_C() if (contrast > 0.0) { contrast = sqrt(contrast); - _motionMagnitudeNZ = tempDiffAvg/contrast; + _motionMagnitude = tempDiffAvg/contrast; } return VPM_OK; @@ -329,18 +325,11 @@ VPMContentAnalysis::ContentMetrics() return NULL; } - - _cMetrics->spatialPredErr = _spatialPredErr; - _cMetrics->spatialPredErrH = _spatialPredErrH; - _cMetrics->spatialPredErrV = _spatialPredErrV; - // normalized temporal difference (MAD) - _cMetrics->motionMagnitudeNZ = _motionMagnitudeNZ; - - // Set to zero: not computed - _cMetrics->motionPredErr = _motionPredErr; - _cMetrics->sizeZeroMotion = _sizeZeroMotion; - _cMetrics->motionHorizontalness = _motionHorizontalness; - _cMetrics->motionClusterDistortion = _motionClusterDistortion; + _cMetrics->spatial_pred_err = _spatialPredErr; + _cMetrics->spatial_pred_err_h = _spatialPredErrH; + _cMetrics->spatial_pred_err_v = _spatialPredErrV; + // Motion metric: normalized temporal difference (MAD) + _cMetrics->motion_magnitude = _motionMagnitude; return _cMetrics; diff --git a/src/modules/video_processing/main/source/content_analysis.h b/src/modules/video_processing/main/source/content_analysis.h index 5051650291..588712a94c 100644 --- a/src/modules/video_processing/main/source/content_analysis.h +++ b/src/modules/video_processing/main/source/content_analysis.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -74,15 +74,10 @@ private: // Content Metrics: // stores the local average of the metrics - float _motionMagnitudeNZ; // motion class + float _motionMagnitude; // motion class float _spatialPredErr; // spatial class float _spatialPredErrH; // spatial class float _spatialPredErrV; // spatial class - float _sizeZeroMotion; // motion class - float _motionPredErr; // complexity class: - float _motionHorizontalness; // coherence class - float _motionClusterDistortion; // coherence class - bool _firstFrame; bool _CAInit; diff --git a/src/modules/video_processing/main/source/content_analysis_sse2.cc b/src/modules/video_processing/main/source/content_analysis_sse2.cc index 810c3cc0fd..f505850d05 100644 --- a/src/modules/video_processing/main/source/content_analysis_sse2.cc +++ b/src/modules/video_processing/main/source/content_analysis_sse2.cc @@ -102,7 +102,7 @@ VPMContentAnalysis::TemporalDiffMetric_SSE2() const WebRtc_UWord32 tempDiffSum = sad_final_64[0] + sad_final_64[1]; // default - _motionMagnitudeNZ = 0.0f; + _motionMagnitude = 0.0f; if (tempDiffSum == 0) { @@ -118,7 +118,7 @@ VPMContentAnalysis::TemporalDiffMetric_SSE2() if (contrast > 0.0) { contrast = sqrt(contrast); - _motionMagnitudeNZ = tempDiffAvg/contrast; + _motionMagnitude = tempDiffAvg/contrast; } return VPM_OK; diff --git a/src/modules/video_processing/main/source/frame_preprocessor.cc b/src/modules/video_processing/main/source/frame_preprocessor.cc index fa13b2df7e..76fdac8384 100644 --- a/src/modules/video_processing/main/source/frame_preprocessor.cc +++ b/src/modules/video_processing/main/source/frame_preprocessor.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -16,8 +16,6 @@ namespace webrtc { VPMFramePreprocessor::VPMFramePreprocessor(): _id(0), _contentMetrics(NULL), -_nativeHeight(0), -_nativeWidth(0), _maxFrameRate(0), _resampledFrame(), _enableCA(false) @@ -46,8 +44,6 @@ VPMFramePreprocessor::ChangeUniqueId(const WebRtc_Word32 id) void VPMFramePreprocessor::Reset() { - _nativeWidth = 0; - _nativeHeight = 0; _ca->Release(); _vd->Reset(); _contentMetrics = NULL; @@ -172,11 +168,6 @@ VPMFramePreprocessor::PreprocessFrame(const VideoFrame* frame, VideoFrame** proc } else { _contentMetrics = _ca->ComputeContentMetrics(&_resampledFrame); } - // Update native values: - _contentMetrics->nativeHeight = frame->Height(); - _contentMetrics->nativeWidth = frame->Width(); - // Max value as set by user - _contentMetrics->nativeFrameRate = _maxFrameRate; } return VPM_OK; } diff --git a/src/modules/video_processing/main/source/frame_preprocessor.h b/src/modules/video_processing/main/source/frame_preprocessor.h index cb5f97d4bb..3c07a47e54 100644 --- a/src/modules/video_processing/main/source/frame_preprocessor.h +++ b/src/modules/video_processing/main/source/frame_preprocessor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -66,8 +66,6 @@ private: WebRtc_Word32 _id; VideoContentMetrics* _contentMetrics; - WebRtc_UWord32 _nativeHeight; - WebRtc_UWord32 _nativeWidth; WebRtc_UWord32 _maxFrameRate; VideoFrame _resampledFrame; VPMSpatialResampler* _spatialResampler; diff --git a/src/modules/video_processing/main/test/unit_test/content_metrics_test.cc b/src/modules/video_processing/main/test/unit_test/content_metrics_test.cc index b25c45fc6d..54a13900df 100644 --- a/src/modules/video_processing/main/test/unit_test/content_metrics_test.cc +++ b/src/modules/video_processing/main/test/unit_test/content_metrics_test.cc @@ -29,10 +29,10 @@ TEST_F(VideoProcessingModuleTest, ContentAnalysis) _cM_c = _ca_c.ComputeContentMetrics(&_videoFrame); _cM_SSE = _ca_sse.ComputeContentMetrics(&_videoFrame); - ASSERT_EQ(_cM_c->spatialPredErr, _cM_SSE->spatialPredErr); - ASSERT_EQ(_cM_c->spatialPredErrV, _cM_SSE->spatialPredErrV); - ASSERT_EQ(_cM_c->spatialPredErrH, _cM_SSE->spatialPredErrH); - ASSERT_EQ(_cM_c->motionMagnitudeNZ, _cM_SSE->motionMagnitudeNZ); + ASSERT_EQ(_cM_c->spatial_pred_err, _cM_SSE->spatial_pred_err); + ASSERT_EQ(_cM_c->spatial_pred_err_v, _cM_SSE->spatial_pred_err_v); + ASSERT_EQ(_cM_c->spatial_pred_err_h, _cM_SSE->spatial_pred_err_h); + ASSERT_EQ(_cM_c->motion_magnitude, _cM_SSE->motion_magnitude); } ASSERT_NE(0, feof(_sourceFile)) << "Error reading source file"; }