From e02b57e397bc2848ea45812c9537c4ce7f716af8 Mon Sep 17 00:00:00 2001 From: "marpan@google.com" Date: Tue, 28 Jun 2011 00:02:51 +0000 Subject: [PATCH] Updates to qm_select: Function to update content state, and function for FEC rate adjustment. Added packetLoss parameter to qm_select, and some code clean-up. Review URL: http://webrtc-codereview.appspot.com/44009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@128 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../main/source/media_optimization.cc | 8 +- modules/video_coding/main/source/qm_select.cc | 404 ++++++++++-------- modules/video_coding/main/source/qm_select.h | 52 ++- .../video_coding/main/source/qm_select_data.h | 159 ++++--- 4 files changed, 367 insertions(+), 256 deletions(-) diff --git a/modules/video_coding/main/source/media_optimization.cc b/modules/video_coding/main/source/media_optimization.cc index e879720bae..4cb1480565 100644 --- a/modules/video_coding/main/source/media_optimization.cc +++ b/modules/video_coding/main/source/media_optimization.cc @@ -197,7 +197,8 @@ VCMMediaOptimization::SetTargetRates(WebRtc_UWord32 bitRate, if (_enableQm) { //Update QM with rates - _qms->UpdateRates((float)_targetBitRate, _avgSentBitRateBps, _incomingFrameRate); + _qms->UpdateRates((float)_targetBitRate, _avgSentBitRateBps, + _incomingFrameRate, _fractionLost); //Check for QM selection bool selectQM = checkStatusForQMchange(); if (selectQM) @@ -537,9 +538,12 @@ VCMMediaOptimization::SelectQuality() // Reset quantities for QM select _qms->ResetQM(); + // Update QM will long-term averaged content metrics. + _qms->UpdateContent(_content->LongTermAvgData()); + // Select quality mode VCMQualityMode* qm = NULL; - WebRtc_Word32 ret = _qms->SelectQuality(_content->LongTermAvgData(), &qm); + WebRtc_Word32 ret = _qms->SelectQuality(&qm); if (ret < 0) { return ret; diff --git a/modules/video_coding/main/source/qm_select.cc b/modules/video_coding/main/source/qm_select.cc index 9e165f253e..3220db08a2 100644 --- a/modules/video_coding/main/source/qm_select.cc +++ b/modules/video_coding/main/source/qm_select.cc @@ -23,12 +23,14 @@ namespace webrtc { VCMQmSelect::VCMQmSelect() { _qm = new VCMQualityMode(); + _contentMetrics = new VideoContentMetrics(); Reset(); } VCMQmSelect::~VCMQmSelect() { delete _qm; + delete _contentMetrics; } void @@ -50,10 +52,11 @@ void VCMQmSelect::ResetRates() { _sumEncodedBytes = 0; - _sumTargetRate = 0; - _sumIncomingFrameRate = 0; - _sumFrameRateMM = 0; - _sumSeqRateMM = 0; + _sumTargetRate = 0.0f; + _sumIncomingFrameRate = 0.0f; + _sumFrameRateMM = 0.0f; + _sumSeqRateMM = 0.0f; + _sumPacketLoss = 0.0f; _frameCnt = 0; _frameCntDelta = 0; _lowBufferCnt = 0; @@ -64,21 +67,25 @@ VCMQmSelect::ResetRates() void VCMQmSelect::Reset() { - _stateDecFactorSpatial = 1; - _stateDecFactorTemp = 1; - _bufferLevel = 0; - _targetBitRate = 0; - _incomingFrameRate = 0; - _userFrameRate = 0; - _perFrameBandwidth =0; - ResetQM(); - ResetRates(); - return; + _stateDecFactorSpatial = 1; + _stateDecFactorTemp = 1; + _bufferLevel = 0.0f; + _targetBitRate = 0.0f; + _incomingFrameRate = 0.0f; + _userFrameRate = 0.0f; + _perFrameBandwidth =0.0f; + _prevTotalRate = 0.0f; + _prevRttTime = 0; + _prevPacketLoss = 0; + ResetQM(); + ResetRates(); + return; } //Initialize after reset of encoder WebRtc_Word32 -VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width, WebRtc_UWord32 height) +VCMQmSelect::Initialize(float bitRate, float userFrameRate, + WebRtc_UWord32 width, WebRtc_UWord32 height) { if (userFrameRate == 0.0f || width == 0 || height == 0) { @@ -86,11 +93,15 @@ VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width } _targetBitRate = bitRate; _userFrameRate = userFrameRate; - //Encoder width and height + + // Encoder width and height _width = width; _height = height; - //Initial buffer level + + // Initial buffer level _bufferLevel = INIT_BUFFER_LEVEL * _targetBitRate; + + // Per-frame bandwidth if ( _incomingFrameRate == 0 ) { _perFrameBandwidth = _targetBitRate / _userFrameRate; @@ -98,9 +109,11 @@ VCMQmSelect::Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width } else { - //Take average: this is due to delay in update of new frame rate in encoder: - //userFrameRate is the new one, incomingFrameRate is the old one (based on previous ~ 1sec) - _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate + _targetBitRate / _incomingFrameRate ); + // Take average: this is due to delay in update of new encoder frame rate: + // userFrameRate is the new one, + // incomingFrameRate is the old one (based on previous ~ 1sec/RTCP report) + _perFrameBandwidth = 0.5 *( _targetBitRate / _userFrameRate + + _targetBitRate / _incomingFrameRate ); } _init = true; @@ -112,7 +125,7 @@ WebRtc_Word32 VCMQmSelect::SetPreferences(WebRtc_Word8 resolPref) { // Preference setting for temporal over spatial resolution - // 100 means temporal, 0 means spatial, 50 is neutral (we decide) + // 100 means temporal, 0 means spatial, 50 is neutral _userResolutionPref = resolPref; return VCM_OK; @@ -120,69 +133,84 @@ VCMQmSelect::SetPreferences(WebRtc_Word8 resolPref) //Update after every encoded frame void -VCMQmSelect::UpdateEncodedSize(WebRtc_Word64 encodedSize, FrameType encodedFrameType) +VCMQmSelect::UpdateEncodedSize(WebRtc_Word64 encodedSize, + FrameType encodedFrameType) { - //Update encoded size; + // Update encoded size; _sumEncodedBytes += encodedSize; _frameCnt++; - //Convert to Kbps + // Convert to Kbps float encodedSizeKbits = (float)((encodedSize * 8.0) / 1000.0); - //Update the buffer level: per_frame_BW is updated when encoder is updated, every ~1sec + // Update the buffer level: + // per_frame_BW is updated when encoder is updated, every RTCP reports _bufferLevel += _perFrameBandwidth - encodedSizeKbits; + // Mismatch here is based on difference of actual encoded frame size and + // per-frame bandwidth, for delta frames + // This is a much stronger condition on rate mismatch than sumSeqRateMM + // Note: not used in this version + /* const bool deltaFrame = (encodedFrameType != kVideoFrameKey && encodedFrameType != kVideoFrameGolden); - //Sum the frame mismatch: - //Mismatch here is based on difference of actual encoded frame size and per-frame bandwidth, for delta frames - //This is a much stronger condition on rate mismatch than sumSeqRateMM - // Note: not used in this version - /* + // Sum the frame mismatch: if (deltaFrame) { _frameCntDelta++; if (encodedSizeKbits > 0) - _sumFrameRateMM += (float) (fabs(encodedSizeKbits - _perFrameBandwidth) / encodedSizeKbits); + _sumFrameRateMM += + (float) (fabs(encodedSizeKbits - _perFrameBandwidth) / + encodedSizeKbits); } */ - //Counter for occurrences of low buffer level + // Counter for occurrences of low buffer level if (_bufferLevel <= PERC_BUFFER_THR * INIT_BUFFER_LEVEL * _targetBitRate) { _lowBufferCnt++; } - - } -//Update after SetTargetRates in MediaOpt (every ~1sec) +//Update various quantities after SetTargetRates in MediaOpt void -VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate, float incomingFrameRate) +VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate, + float incomingFrameRate, WebRtc_UWord8 packetLoss) { - //Sum the target bitrate and incoming frame rate: these values are the encoder rates (from previous ~1sec), - //i.e, before the update for next ~1sec + // Sum the target bitrate and incoming frame rate: + // these values are the encoder rates (from previous update ~1sec), + // i.e, before the update for next ~1sec _sumTargetRate += _targetBitRate; _sumIncomingFrameRate += _incomingFrameRate; _updateRateCnt++; - //Convert to kbps + // Sum the received (from RTCP reports) packet loss rates + _sumPacketLoss += (float) packetLoss / 255.0f; + + // Convert average sent bitrate to kbps float avgSentBitRatekbps = avgSentBitRate / 1000.0f; - //Sum the sequence rate mismatch: - //Mismatch here is based on difference between target rate the encoder used (in previous ~1sec) and the average actual - //encoding rate at current time - if (fabs(_targetBitRate - avgSentBitRatekbps) < THRESH_SUM_MM && _targetBitRate > 0.0 ) - _sumSeqRateMM += (float) (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate ); + // Sum the sequence rate mismatch: + // Mismatch here is based on difference between target rate the encoder + // used (in previous ~1sec) and the average actual + // encoding rate measured at current time + if (fabs(_targetBitRate - avgSentBitRatekbps) < THRESH_SUM_MM && + _targetBitRate > 0.0 ) + { + _sumSeqRateMM += (float) + (fabs(_targetBitRate - avgSentBitRatekbps) / _targetBitRate ); + } - //Update QM with the current new target and frame rate: these values are ones the encoder will use for the current/next ~1sec + // Update QM with the current new target and frame rate: + // these values are ones the encoder will use for the current/next ~1sec _targetBitRate = targetBitRate; _incomingFrameRate = incomingFrameRate; - //Update QM with an (average) encoder per_frame_bandwidth: this is the per_frame_bw for the next ~1sec + // Update QM with an (average) encoder per_frame_bandwidth: + // this is the per_frame_bw for the current/next ~1sec _perFrameBandwidth = 0.0f; if (_incomingFrameRate > 0.0f) { @@ -191,119 +219,164 @@ VCMQmSelect::UpdateRates(float targetBitRate, float avgSentBitRate, float incomi } +// Adjust the FEC rate based on the content and the network state +// (packet loss rate, total rate/bandwidth, round trip time). +// Note that packetLoss here is the filtered loss value. +WebRtc_UWord8 +VCMQmSelect::AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate, + float frameRate,WebRtc_UWord16 rttTime, + WebRtc_UWord8 packetLoss) +{ + // Default: no adjustment + WebRtc_UWord8 codeRateDeltaAdjust = codeRateDelta; + float adjustFec = 1.0f; + + // TODO (marpan): + // Set FEC adjustment factor + + codeRateDeltaAdjust = static_cast(codeRateDelta * adjustFec); + + // Keep track of previous values of network state: + // adjustment may be also based on pattern of changes in network state + _prevTotalRate = totalRate; + _prevRttTime = rttTime; + _prevPacketLoss = packetLoss; + + return codeRateDeltaAdjust; +} + +void +VCMQmSelect::UpdateContent(const VideoContentMetrics* contentMetrics) +{ + _contentMetrics = contentMetrics; +} + +// Select the resolution factors: frame size and frame rate change: (QM modes) +// Selection is for going back up in resolution, or going down in. WebRtc_Word32 -VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQualityMode** qm) +VCMQmSelect::SelectQuality(VCMQualityMode** qm) { if (!_init) { return VCM_UNINITIALIZED; } - if (contentMetrics == NULL) + if (_contentMetrics == NULL) { Reset(); //default values *qm = _qm; return VCM_OK; } - //Default settings + // Default settings _qm->spatialWidthFact = 1; _qm->spatialHeightFact = 1; _qm->temporalFact = 1; - _contentMetrics = contentMetrics; - //Update native values + // Update native values _nativeWidth = _contentMetrics->nativeWidth; _nativeHeight = _contentMetrics->nativeHeight; _nativeFrameRate = _contentMetrics->nativeFrameRate; - //Aspect ratio: used for selection of 1x2,2x1,2x2 + // Aspect ratio: used for selection of 1x2,2x1,2x2 _aspectRatio = (float)_width / (float)_height; float avgTargetRate = 0.0f; float avgIncomingFrameRate = 0.0f; float ratioBufferLow = 0.0f; float rateMisMatch = 0.0f; + float avgPacketLoss = 0.0f; if (_frameCnt > 0) { ratioBufferLow = (float)_lowBufferCnt / (float)_frameCnt; } if (_updateRateCnt > 0) { - //use seq-rate mismatch for now + // Use seq-rate mismatch for now rateMisMatch = (float)_sumSeqRateMM / (float)_updateRateCnt; //rateMisMatch = (float)_sumFrameRateMM / (float)_frameCntDelta; - //average target and incoming frame rates + // Average target and incoming frame rates avgTargetRate = (float)_sumTargetRate / (float)_updateRateCnt; - avgIncomingFrameRate = (float)_sumIncomingFrameRate / (float)_updateRateCnt; + avgIncomingFrameRate = (float)_sumIncomingFrameRate / + (float)_updateRateCnt; + + // Average received packet loss rate + avgPacketLoss = (float)_sumPacketLoss / (float)_updateRateCnt; } - //For qm selection below, may want to weight the average encoder rates with the current (for next ~1sec) rate values - //uniform average for now: + // For QM selection below, may want to weight the average encoder rates + // with the current (for next ~1sec) rate values. + // Uniform average for now: float w1 = 0.5f; float w2 = 0.5f; avgTargetRate = w1 * avgTargetRate + w2 * _targetBitRate; avgIncomingFrameRate = w1 * avgIncomingFrameRate + w2 * _incomingFrameRate; - //Set the maximum transitional rate and image type: for up-sampled spatial dimensions - //Needed to get the transRate for going back up in spatial resolution (only 2x2 allowed in this version) + // Set the maximum transitional rate and image type: + // for up-sampled spatial dimensions. + // This is needed to get the transRate for going back up in + // spatial resolution (only 2x2 allowed in this version). SetMaxRateForQM(2 * _width, 2 * _height); WebRtc_UWord8 imageType2 = _imageType; WebRtc_UWord32 maxRateQM2 = _maxRateQM; - //Set the maximum transitional rate and image type: for the input/encoder spatial dimensions + // Set the maximum transitional rate and image type: + // for the encoder spatial dimensions. SetMaxRateForQM(_width, _height); - //Compute metric features + // Compute class state of the content. MotionNFD(); Spatial(); // - //Get transitional rate from table, based on image type and content class + // Get transitional rate from table, based on image type and content class. // - //Get image size class: map _imageType to 2 classes + // Get image class and content class: for going down spatially WebRtc_UWord8 imageClass = 1; if (_imageType <= 3) imageClass = 0; - WebRtc_UWord8 contentClass = 3 * _motion.level + _spatial.level; WebRtc_UWord8 tableIndex = imageClass * 9 + contentClass; float scaleTransRate = kScaleTransRateQm[tableIndex]; - // for transRate for going back up spatially + // Get image class and content class: for going up spatially WebRtc_UWord8 imageClass2 = 1; if (imageType2 <= 3) imageClass2 = 0; WebRtc_UWord8 tableIndex2 = imageClass2 * 9 + contentClass; float scaleTransRate2 = kScaleTransRateQm[tableIndex2]; - // - WebRtc_UWord32 estimatedTransRateDown = (WebRtc_UWord32) (_incomingFrameRate * scaleTransRate * _maxRateQM / 30); - WebRtc_UWord32 estimatedTransRateUpT = (WebRtc_UWord32) (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate * scaleTransRate * _maxRateQM / 30); - WebRtc_UWord32 estimatedTransRateUpS = (WebRtc_UWord32) (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate * scaleTransRate2 * maxRateQM2 / 30); + // Transitonal rate for going down + WebRtc_UWord32 estimatedTransRateDown = static_cast + (_incomingFrameRate * scaleTransRate * _maxRateQM / 30); + + // Transitional rate for going up temporally + WebRtc_UWord32 estimatedTransRateUpT = static_cast + (TRANS_RATE_SCALE_UP_TEMP * 2 * _incomingFrameRate * + scaleTransRate * _maxRateQM / 30); + + // Transitional rate for going up spatially + WebRtc_UWord32 estimatedTransRateUpS = static_cast + (TRANS_RATE_SCALE_UP_SPATIAL * _incomingFrameRate * + scaleTransRate2 * maxRateQM2 / 30); // - //done with transitional rate + // Done with transitional rates // - WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1, - "Content Metrics: Motion = %d , Spatial = %d, Est. Trans. BR = %d", - _motion.level, _spatial.level, estimatedTransRateDown); - - - // //CHECK FOR GOING BACK UP IN RESOLUTION // bool selectedUp = false; - //Check if native has been spatially down-sampled + // Check if native has been spatially down-sampled if (_stateDecFactorSpatial > 1) { - //check conditions on frame_skip and rate_mismatch + // Check conditions on buffer level and rate_mismatch if ( (avgTargetRate > estimatedTransRateUpS) && - (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM) ) + (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM)) { - //width/height scaled back up: setting 0 indicates scaling back to native + // width/height scaled back up: + // setting 0 indicates scaling back to native _qm->spatialHeightFact = 0; _qm->spatialWidthFact = 0; selectedUp = true; @@ -313,25 +386,26 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality if (_stateDecFactorTemp > 1) { if ( (avgTargetRate > estimatedTransRateUpT) && - (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM) ) + (ratioBufferLow < MAX_BUFFER_LOW) && (rateMisMatch < MAX_RATE_MM)) { - //temporal scale back up: setting 0 indicates scaling back to native + // temporal scale back up: + // setting 0 indicates scaling back to native _qm->temporalFact = 0; selectedUp = true; } } - //leave QM if we selected to go back up in either spatial or temporal resolution + // Leave QM if we selected to go back up in either spatial or temporal if (selectedUp == true) { - //Update down-sampling state - //Note: only temp reduction by 2 is allowed + // Update down-sampling state + // Note: only temp reduction by 2 is allowed if (_qm->temporalFact == 0) { _stateDecFactorTemp = _stateDecFactorTemp / 2; } - //Update down-sampling state - //Note: only spatial reduction by 2x2 is allowed + // Update down-sampling state + // Note: only spatial reduction by 2x2 is allowed if (_qm->spatialHeightFact == 0 && _qm->spatialWidthFact == 0 ) { _stateDecFactorSpatial = _stateDecFactorSpatial / 4; @@ -341,73 +415,36 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality } // - //done with checking for going back up + // Done with checking for going back up in resolution // // //CHECK FOR RESOLUTION REDUCTION // - //ST QM extraction if: - // (1) target rate is lower than transitional rate (with safety margin), or - // (2) frame skip is larger than threshold, or + // Resolution reduction if: + // (1) target rate is lower than transitional rate, or + // (2) buffer level is not stable, or // (3) rate mismatch is larger than threshold - if ( (avgTargetRate < estimatedTransRateDown ) || (ratioBufferLow > MAX_BUFFER_LOW) - || (rateMisMatch > MAX_RATE_MM) ) + // Bias down-sampling based on packet loss conditions + if (avgPacketLoss > LOSS_THR) + { + estimatedTransRateDown = LOSS_RATE_FAC * estimatedTransRateDown; + } + + if ((avgTargetRate < estimatedTransRateDown ) || + (ratioBufferLow > MAX_BUFFER_LOW) + || (rateMisMatch > MAX_RATE_MM)) { WebRtc_UWord8 spatialFact = 1; WebRtc_UWord8 tempFact = 1; - //Get the Action: - //Note: only consider spatial by 2x2 OR temporal reduction by 2 in this version - if (_motion.level == kLow && _spatial.level == kLow) - { - spatialFact = 1; - tempFact = 1; - } - else if (_motion.level == kLow && _spatial.level == kHigh) - { - spatialFact = 1; - tempFact = 2; - } - else if (_motion.level == kLow && _spatial.level == kDefault) - { - spatialFact = 1; - tempFact = 2; - } - else if (_motion.level == kHigh && _spatial.level == kLow) - { - spatialFact = 4; - tempFact = 1; - } - else if (_motion.level == kHigh && _spatial.level == kHigh) - { - spatialFact = 1; - tempFact = 2; - } - else if (_motion.level == kHigh && _spatial.level == kDefault) - { - spatialFact = 4; - tempFact = 1; - } - else if (_motion.level == kDefault && _spatial.level == kLow) - { - spatialFact = 4; - tempFact = 1; - } - else if (_motion.level == kDefault && _spatial.level == kHigh) - { - spatialFact = 1; - tempFact = 2; - } - else if (_motion.level == kDefault && _spatial.level == kDefault) - { - spatialFact = 1; - tempFact = 1; - } - // + // Get the action + spatialFact = kSpatialAction[contentClass]; + tempFact = kTemporalAction[contentClass]; + switch(spatialFact) { case 4: @@ -418,9 +455,9 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality //default is 1x2 (H) _qm->spatialWidthFact = 2; _qm->spatialHeightFact = 1; - //Select 1x2,2x1, or back to 2x2: depends on prediction errors, aspect ratio, and horizontalness of motion - //Note: directional selection not used in this version - //SelectSpatialDirectionMode((float) estimatedTransRateDown); + // Select 1x2,2x1, or back to 2x2 + // Note: directional selection not used in this version + // SelectSpatialDirectionMode((float) estimatedTransRateDown); break; default: _qm->spatialWidthFact = 1; @@ -429,48 +466,62 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality } _qm->temporalFact = tempFact; - //Sanity check on ST QM selection: override the settings for too small image size and frame rate - //Also check limit the current down-sampling state + // Sanity check on ST QM selection: + // override the settings for too small image size and frame rate + // Also check the limit on current down-sampling state - //No spatial sampling if image size is too small (QCIF) - if ( (_width * _height) <= MIN_IMAGE_SIZE || _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT) + // No spatial sampling if image size is too small (QCIF) + if ( (_width * _height) <= MIN_IMAGE_SIZE || + _stateDecFactorSpatial >= MAX_SPATIAL_DOWN_FACT) { _qm->spatialWidthFact = 1; _qm->spatialHeightFact = 1; } - //No frame rate reduction below some point: use the (average) incoming frame rate - if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM || _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT) + // No frame rate reduction below some point: + // use the (average) incoming frame rate + if ( avgIncomingFrameRate <= MIN_FRAME_RATE_QM || + _stateDecFactorTemp >= MAX_TEMP_DOWN_FACT) { _qm->temporalFact = 1; } - //No down-sampling if current spatial-temporal downsampling state is above threshold - if (_stateDecFactorTemp * _stateDecFactorSpatial >= MAX_SPATIAL_TEMP_DOWN_FACT) + // No down-sampling if current downsampling state is above threshold + if (_stateDecFactorTemp * _stateDecFactorSpatial >= + MAX_SPATIAL_TEMP_DOWN_FACT) { _qm->spatialWidthFact = 1; _qm->spatialHeightFact = 1; _qm->temporalFact = 1; } // - //done with sanity checks on ST QM selection + // Done with sanity checks on ST QM selection // - //Note: to disable spatial down-sampling - // _qm->spatialWidthFact = 1; - // _qm->spatialHeightFact = 1; - - //Update down-sampling states - _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact * _qm->spatialHeightFact; + // Update down-sampling states + _stateDecFactorSpatial = _stateDecFactorSpatial * _qm->spatialWidthFact + * _qm->spatialHeightFact; _stateDecFactorTemp = _stateDecFactorTemp * _qm->temporalFact; + if (_qm->spatialWidthFact != 1 || _qm->spatialHeightFact != 1 || + _qm->temporalFact != 1) + { + + WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, -1, + "Resolution reduction occurred" + "Content Metrics are: Motion = %d , Spatial = %d, " + "Rates are: Est. Trans. BR = %d, Avg.Target BR = %f", + _motion.level, _spatial.level, + estimatedTransRateDown, avgTargetRate); + } + } else { *qm = _qm; return VCM_OK; } - // done with checking for resolution reduction + // Done with checking for resolution reduction *qm = _qm; return VCM_OK; @@ -481,9 +532,9 @@ VCMQmSelect::SelectQuality(const VideoContentMetrics* contentMetrics, VCMQuality WebRtc_Word32 VCMQmSelect::SelectSpatialDirectionMode(float transRate) { - //Default is 1x2 (H) + // Default is 1x2 (H) - //For bit rates well below transitional rate, we select 2x2 + // For bit rates well below transitional rate, we select 2x2 if ( _targetBitRate < transRate * RATE_RED_SPATIAL_2X2 ) { _qm->spatialWidthFact = 2; @@ -491,13 +542,13 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate) return VCM_OK; } - //Otherwise check prediction errors, aspect ratio, horizonalness of motion + // Otherwise check prediction errors, aspect ratio, horizontalness float spatialErr = _contentMetrics->spatialPredErr; float spatialErrH = _contentMetrics->spatialPredErrH; float spatialErrV = _contentMetrics->spatialPredErrV; - //favor 1x2 if aspect_ratio is 16:9 + // Favor 1x2 if aspect_ratio is 16:9 if (_aspectRatio >= 16.0f / 9.0f ) { //check if 1x2 has lowest prediction error @@ -507,7 +558,7 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate) } } - //check for 2x2 selection: favor 2x2 over 1x2 and 2x1 + // Check for 2x2 selection: favor 2x2 over 1x2 and 2x1 if (spatialErr < spatialErrH * (1.0f + SPATIAL_ERR_2X2_VS_H) && spatialErr < spatialErrV * (1.0f + SPATIAL_ERR_2X2_VS_V)) { @@ -516,7 +567,7 @@ VCMQmSelect::SelectSpatialDirectionMode(float transRate) return VCM_OK; } - //check for 2x1 selection: + // Check for 2x1 selection: if (spatialErrV < spatialErrH * (1.0f - SPATIAL_ERR_V_VS_H) && spatialErrV < spatialErr * (1.0f - SPATIAL_ERR_2X2_VS_V)) { @@ -534,7 +585,7 @@ VCMQmSelect::Coherence() float horizNZ = _contentMetrics->motionHorizontalness; float distortionNZ = _contentMetrics->motionClusterDistortion; - //Coherence measure: combine horizontalness with cluster distortion + // Coherence measure: combine horizontalness with cluster distortion _coherence.value = COH_MAX; if (distortionNZ > 0.) { @@ -558,7 +609,7 @@ VCMQmSelect::MotionNFD() { _motion.value = _contentMetrics->motionMagnitudeNZ; - // determine motion level + // Determine motion level if (_motion.value < LOW_MOTION_NFD) { _motion.level = kLow; @@ -581,17 +632,18 @@ VCMQmSelect::Motion() float sizeZeroMotion = _contentMetrics->sizeZeroMotion; float motionMagNZ = _contentMetrics->motionMagnitudeNZ; - //take product of size and magnitude with equal weight for now + // Take product of size and magnitude with equal weight _motion.value = (1.0f - sizeZeroMotion) * motionMagNZ; - //stabilize: motionMagNZ could be large when only few motion blocks are non-zero + // Stabilize: motionMagNZ could be large when only a + // few motion blocks are non-zero _stationaryMotion = false; if (sizeZeroMotion > HIGH_ZERO_MOTION_SIZE) { _motion.value = 0.0f; _stationaryMotion = true; } - // determine motion level + // Determine motion level if (_motion.value < LOW_MOTION) { _motion.level = kLow; @@ -613,11 +665,11 @@ VCMQmSelect::Spatial() float spatialErr = _contentMetrics->spatialPredErr; float spatialErrH = _contentMetrics->spatialPredErrH; float spatialErrV = _contentMetrics->spatialPredErrV; - //Spatial measure: take average of 3 prediction errors + // Spatial measure: take average of 3 prediction errors _spatial.value = (spatialErr + spatialErrH + spatialErrV) / 3.0f; float scale = 1.0f; - //Reduce thresholds for HD scenes + // Reduce thresholds for HD scenes if (_imageType > 3) { scale = (float)SCALE_TEXTURE_HD; @@ -635,8 +687,6 @@ VCMQmSelect::Spatial() { _spatial.level = kDefault; } - - } @@ -675,10 +725,10 @@ VCMQmSelect::SetMaxRateForQM(WebRtc_UWord32 width, WebRtc_UWord32 height) _imageType = 6; } - // set max rate based on image size + // Set max rate based on image size _maxRateQM = kMaxRateQm[_imageType]; return VCM_OK; } -} +} // end of namespace diff --git a/modules/video_coding/main/source/qm_select.h b/modules/video_coding/main/source/qm_select.h index a3d49f1323..3bca4bce90 100644 --- a/modules/video_coding/main/source/qm_select.h +++ b/modules/video_coding/main/source/qm_select.h @@ -24,7 +24,8 @@ struct VideoContentMetrics; struct VCMQualityMode { - VCMQualityMode():spatialWidthFact(1), spatialHeightFact(1), temporalFact(1){} + VCMQualityMode():spatialWidthFact(1), spatialHeightFact(1), + temporalFact(1){} void Reset() { spatialWidthFact = 1; @@ -65,22 +66,33 @@ public: ~VCMQmSelect(); // Initialize: - WebRtc_Word32 Initialize(float bitRate, float userFrameRate, WebRtc_UWord32 width, WebRtc_UWord32 height); + WebRtc_Word32 Initialize(float bitRate, float userFrameRate, + WebRtc_UWord32 width, WebRtc_UWord32 height); // Allow the user to set preferences: favor frame rate/resolution WebRtc_Word32 SetPreferences(WebRtc_Word8 resolPref); - // Extract ST QM behavior and make decision - // Inputs: Content Metrics per frame (averaged over time) - // qm: Reference to the quality modes pointer - WebRtc_Word32 SelectQuality(const VideoContentMetrics* contentMetrics, VCMQualityMode** qm); + // Extract ST (spatio-temporal) QM behavior and make decision + // Inputs: qm: Reference to the quality modes pointer + WebRtc_Word32 SelectQuality(VCMQualityMode** qm); - // Update QMselect with actual bit rate (size of the latest encoded frame) and frame type - // -> update buffer level and frame-mismatch - void UpdateEncodedSize(WebRtc_Word64 encodedSize, FrameType encodedFrameType); + // Update QM with actual bit rate + // (size of the latest encoded frame) and frame type. + void UpdateEncodedSize(WebRtc_Word64 encodedSize, + FrameType encodedFrameType); + + // Update QM with new bit/frame/loss rates from SetTargetRates + void UpdateRates(float targetBitRate, float avgSentRate, + float incomingFrameRate, WebRtc_UWord8 packetLoss); + + // Update QM with the content metrics + void UpdateContent(const VideoContentMetrics* contentMetrics); + + // Adjust FEC rate based on content + WebRtc_UWord8 AdjustFecFactor(WebRtc_UWord8 codeRateDelta, float totalRate, + float frameRate, WebRtc_UWord16 rttTime, + WebRtc_UWord8 packetLoss); - // Update QM with new rates from SetTargetRates - void UpdateRates(float targetBitRate, float avgSentRate, float incomingFrameRate); // Select 1x2,2x2,2x2 spatial sampling mode WebRtc_Word32 SelectSpatialDirectionMode(float transRate); @@ -113,7 +125,7 @@ private: // Content Data const VideoContentMetrics* _contentMetrics; - // Encoder stats/rate-control metrics + // Encoder rate control parameters, network parameters float _targetBitRate; float _userFrameRate; float _incomingFrameRate; @@ -123,9 +135,13 @@ private: float _sumIncomingFrameRate; float _sumSeqRateMM; float _sumFrameRateMM; + float _sumPacketLoss; + float _prevTotalRate; + WebRtc_UWord16 _prevRttTime; + WebRtc_UWord8 _prevPacketLoss; WebRtc_Word64 _sumEncodedBytes; - //Encoder and native frame sizes + // Encoder and native frame sizes WebRtc_UWord32 _width; WebRtc_UWord32 _height; WebRtc_UWord32 _nativeWidth; @@ -135,26 +151,26 @@ private: WebRtc_UWord32 _nativeFrameRate; WebRtc_UWord8 _stateDecFactorTemp; - //Counters + // Counters WebRtc_UWord32 _frameCnt; WebRtc_UWord32 _frameCntDelta; WebRtc_UWord32 _updateRateCnt; WebRtc_UWord32 _lowBufferCnt; - //Content L/M/H values + // Content L/M/H values VCMContFeature _motion; VCMContFeature _spatial; VCMContFeature _coherence; bool _stationaryMotion; - //aspect ratio + // Aspect ratio float _aspectRatio; - //Max rate to saturate the transitionalRate + // Max rate to saturate the transitionalRate WebRtc_UWord32 _maxRateQM; WebRtc_UWord8 _imageType; - //User preference for resolution or qmax change + // User preference for resolution or qmax change WebRtc_UWord8 _userResolutionPref; bool _init; VCMQualityMode* _qm; diff --git a/modules/video_coding/main/source/qm_select_data.h b/modules/video_coding/main/source/qm_select_data.h index 7f056adf5c..813c1109f8 100644 --- a/modules/video_coding/main/source/qm_select_data.h +++ b/modules/video_coding/main/source/qm_select_data.h @@ -13,7 +13,7 @@ /*************************************************************** *QMSelectData.h -* This file includes parameters used by the Quality Modes selection process +* This file includes parameters for content-aware media optimization ****************************************************************/ #include "typedefs.h" @@ -21,38 +21,48 @@ namespace webrtc { -//Initial level of buffer in secs: should corresponds to wrapper settings +// +// PARAMETERS FOR RESOLUTION ADAPTATION +// + +// Initial level of buffer in secs: should corresponds to wrapper settings #define INIT_BUFFER_LEVEL 0.5 -// -//PARAMETERS FOR QM SELECTION -// - -//Threshold of (max) buffer size below which we consider too low (underflow) +// Threshold of (max) buffer size below which we consider too low (underflow) #define PERC_BUFFER_THR 0.10 -//Threshold on rate mismatch +// Threshold on rate mismatch #define MAX_RATE_MM 0.5 -//Threshold on the occurrences of low buffer levels +// Avoid outliers in seq-rate MM +#define THRESH_SUM_MM 1000 + +// Threshold on the occurrences of low buffer levels #define MAX_BUFFER_LOW 0.5 -//Factor for transitional rate for going back up in resolution +// Factor for transitional rate for going back up in resolution #define TRANS_RATE_SCALE_UP_SPATIAL 1.25 #define TRANS_RATE_SCALE_UP_TEMP 1.25 -//Maximum possible transitional rate: (units in kbps), for 30fps +// Threshold on packet loss rate, above which favor resolution reduction +#define LOSS_THR 0.1 + +// Factor for reducing transitonal bitrate under packet loss +#define LOSS_RATE_FAC 1.0 + +// Maximum possible transitional rate for down-sampling: +// (units in kbps), for 30fps const WebRtc_UWord16 kMaxRateQm[7] = { 100, //QCIF - 500, //CIF - 800, //VGA + 500, //CIF + 800, //VGA 1500, //4CIF 2000, //720 HD 4:3, 2500, //720 HD 16:9 3000 //1080HD }; -//Scale for transitional rate: based on content class +// Scale for transitional rate: based on content class // motion=L/H/D,spatial==L/H/D: for low, high, middle levels const float kScaleTransRateQm[18] = { //4CIF and lower @@ -63,7 +73,7 @@ const float kScaleTransRateQm[18] = { 0.50f, // H, H 0.50f, // H, D 0.50f, // D, L - 0.625f, // D, D + 0.63f, // D, D 0.25f, // D, H //over 4CIF: WHD, HD @@ -74,62 +84,51 @@ const float kScaleTransRateQm[18] = { 0.50f, // H, H 0.50f, // H, D 0.50f, // D, L - 0.625f, // D, D + 0.63f, // D, D 0.25f // D, H }; -//Control the total amount of down-sampling allowed +// Action for down-sampling: +// motion=L/H/D,spatial==L/H/D: for low, high, middle levels +const WebRtc_UWord8 kSpatialAction[9] = { + 1, // L, L + 1, // L, H + 1, // L, D + 4, // H ,L + 1, // H, H + 4, // H, D + 4, // D, L + 1, // D, D + 1, // D, H +}; + +const WebRtc_UWord8 kTemporalAction[9] = { + 1, // L, L + 2, // L, H + 2, // L, D + 1, // H ,L + 2, // H, H + 1, // H, D + 1, // D, L + 2, // D, D + 1, // D, H +}; + +// Control the total amount of down-sampling allowed #define MAX_SPATIAL_DOWN_FACT 4 #define MAX_TEMP_DOWN_FACT 4 #define MAX_SPATIAL_TEMP_DOWN_FACT 8 -// -// -// - -//PARAMETETS FOR SETTING LOW/HIGH VALUES OF METRICS: -// -//Threshold to determine if high amount of zero_motion -#define HIGH_ZERO_MOTION_SIZE 0.95 - -//Thresholds for motion: motion level is derived from motion vectors: motion = size_nz*magn_nz -#define HIGH_MOTION 0.7 -#define LOW_MOTION 0.4 - -//Thresholds for motion: motion level is from NFD -#define HIGH_MOTION_NFD 0.075 -#define LOW_MOTION_NFD 0.04 - -//Thresholds for spatial prediction error: this is appLied on the min(2x2,1x2,2x1) -#define HIGH_TEXTURE 0.035 -#define LOW_TEXTURE 0.025 - -//Used to reduce thresholds for HD scenes: correction factor since higher -//correlation in HD scenes means lower spatial prediction error -#define SCALE_TEXTURE_HD 0.9; - -//Thresholds for distortion and horizontalness: applied on product: horiz_nz/dist_nz -#define COHERENCE_THR 1.0 -#define COH_MAX 10 -// -// -#define RATE_RED_SPATIAL_2X2 0.6 //percentage reduction in transitional bitrate where 2x2 is selected over 1x2/2x1 -#define SPATIAL_ERR_2X2_VS_H 0.1 //percentage to favor 2x2 -#define SPATIAL_ERR_2X2_VS_V 0.1 //percentage to favor 2x2 over V -#define SPATIAL_ERR_V_VS_H 0.1 //percentage to favor H over V - - -//Minimum image size for a spatial mode selection: no spatial down-sampling if input size <= MIN_IMAGE_SIZE +// Minimum image size for a spatial down-sampling: +// no spatial down-sampling if input size <= MIN_IMAGE_SIZE #define MIN_IMAGE_SIZE 25344 //176*144 -//Minimum frame rate for temporal mode: no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE +// Minimum frame rate for temporal down-sampling: +// no frame rate reduction if incomingFrameRate <= MIN_FRAME_RATE #define MIN_FRAME_RATE_QM 8 -//Avoid outliers in seq-rate MM -#define THRESH_SUM_MM 1000 - +// Boundaries for the closest standard frame size const WebRtc_UWord32 kFrameSizeTh[6] = { - // boundaries for the closest standard frame size 63360, //between 176*144 and 352*288 204288, //between 352*288 and 640*480 356352, //between 640*480 and 704*576 @@ -139,6 +138,48 @@ const WebRtc_UWord32 kFrameSizeTh[6] = { }; +// +// PARAMETERS FOR FEC ADJUSTMENT: TODO (marpan) +// + + +// +// PARAMETETS FOR SETTING LOW/HIGH STATES OF CONTENT METRICS: +// + +// Threshold to determine if high amount of zero_motion +#define HIGH_ZERO_MOTION_SIZE 0.95 + +// Thresholds for motion: +// motion level is derived from motion vectors: motion = size_nz*magn_nz +#define HIGH_MOTION 0.7 +#define LOW_MOTION 0.4 + +// Thresholds for motion: motion level is from NFD +#define HIGH_MOTION_NFD 0.075 +#define LOW_MOTION_NFD 0.04 + +// Thresholds for spatial prediction error: +// this is appLied on the min(2x2,1x2,2x1) +#define HIGH_TEXTURE 0.035 +#define LOW_TEXTURE 0.025 + +// Used to reduce thresholds for HD scenes: correction factor since higher +// correlation in HD scenes means lower spatial prediction error +#define SCALE_TEXTURE_HD 0.9; + +// Thresholds for distortion and horizontalness: +// applied on product: horiz_nz/dist_nz +#define COHERENCE_THR 1.0 +#define COH_MAX 10 + +// percentage reduction in transitional bitrate for 2x2 selected over 1x2/2x1 +#define RATE_RED_SPATIAL_2X2 0.6 + +#define SPATIAL_ERR_2X2_VS_H 0.1 //percentage to favor 2x2 +#define SPATIAL_ERR_2X2_VS_V 0.1 //percentage to favor 2x2 over V +#define SPATIAL_ERR_V_VS_H 0.1 //percentage to favor H over V + } // namespace webrtc #endif // WEBRTC_MODULES_VIDEO_CODING_SOURCE_QM_SELECT_DATA_H_