/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "webrtc/modules/video_processing/main/source/content_analysis.h" #include #include #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" #include "webrtc/system_wrappers/interface/tick_util.h" namespace webrtc { VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection) : orig_frame_(NULL), prev_frame_(NULL), width_(0), height_(0), skip_num_(1), border_(8), motion_magnitude_(0.0f), spatial_pred_err_(0.0f), spatial_pred_err_h_(0.0f), spatial_pred_err_v_(0.0f), first_frame_(true), ca_Init_(false), content_metrics_(NULL) { ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C; TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C; if (runtime_cpu_detection) { #if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2; TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2; } #endif } Release(); } VPMContentAnalysis::~VPMContentAnalysis() { Release(); } VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics( const I420VideoFrame& inputFrame) { if (inputFrame.IsZeroSize()) return NULL; // Init if needed (native dimension change). if (width_ != inputFrame.width() || height_ != inputFrame.height()) { if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height())) return NULL; } // Only interested in the Y plane. orig_frame_ = inputFrame.buffer(kYPlane); // Compute spatial metrics: 3 spatial prediction errors. (this->*ComputeSpatialMetrics)(); // Compute motion metrics if (first_frame_ == false) ComputeMotionMetrics(); // Saving current frame as previous one: Y only. memcpy(prev_frame_, orig_frame_, width_ * height_); first_frame_ = false; ca_Init_ = true; return ContentMetrics(); } int32_t VPMContentAnalysis::Release() { if (content_metrics_ != NULL) { delete content_metrics_; content_metrics_ = NULL; } if (prev_frame_ != NULL) { delete [] prev_frame_; prev_frame_ = NULL; } width_ = 0; height_ = 0; first_frame_ = true; return VPM_OK; } int32_t VPMContentAnalysis::Initialize(int width, int height) { width_ = width; height_ = height; first_frame_ = true; // skip parameter: # of skipped rows: for complexity reduction // temporal also currently uses it for column reduction. skip_num_ = 1; // use skipNum = 2 for 4CIF, WHD if ( (height_ >= 576) && (width_ >= 704) ) { skip_num_ = 2; } // use skipNum = 4 for FULLL_HD images if ( (height_ >= 1080) && (width_ >= 1920) ) { skip_num_ = 4; } if (content_metrics_ != NULL) { delete content_metrics_; } if (prev_frame_ != NULL) { delete [] prev_frame_; } // Spatial Metrics don't work on a border of 8. Minimum processing // block size is 16 pixels. So make sure the width and height support this. if (width_ <= 32 || height_ <= 32) { ca_Init_ = false; return VPM_PARAMETER_ERROR; } content_metrics_ = new VideoContentMetrics(); if (content_metrics_ == NULL) { return VPM_MEMORY; } prev_frame_ = new uint8_t[width_ * height_]; // Y only. if (prev_frame_ == NULL) return VPM_MEMORY; return VPM_OK; } // Compute motion metrics: magnitude over non-zero motion vectors, // and size of zero cluster int32_t VPMContentAnalysis::ComputeMotionMetrics() { // Motion metrics: only one is derived from normalized // (MAD) temporal difference (this->*TemporalDiffMetric)(); return VPM_OK; } // Normalized temporal difference (MAD): used as a motion level metric // Normalize MAD by spatial contrast: images with more contrast // (pixel variance) likely have larger temporal difference // To reduce complexity, we compute the metric for a reduced set of points. int32_t VPMContentAnalysis::TemporalDiffMetric_C() { // size of original frame int sizei = height_; int sizej = width_; uint32_t tempDiffSum = 0; uint32_t pixelSum = 0; uint64_t pixelSqSum = 0; uint32_t num_pixels = 0; // Counter for # of pixels. const int width_end = ((width_ - 2*border_) & -16) + border_; for (int i = border_; i < sizei - border_; i += skip_num_) { for (int j = border_; j < width_end; j++) { num_pixels += 1; int ssn = i * sizej + j; uint8_t currPixel = orig_frame_[ssn]; uint8_t prevPixel = prev_frame_[ssn]; tempDiffSum += (uint32_t)abs((int16_t)(currPixel - prevPixel)); pixelSum += (uint32_t) currPixel; pixelSqSum += (uint64_t) (currPixel * currPixel); } } // Default. motion_magnitude_ = 0.0f; if (tempDiffSum == 0) return VPM_OK; // Normalize over all pixels. float const tempDiffAvg = (float)tempDiffSum / (float)(num_pixels); float const pixelSumAvg = (float)pixelSum / (float)(num_pixels); float const pixelSqSumAvg = (float)pixelSqSum / (float)(num_pixels); float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg); if (contrast > 0.0) { contrast = sqrt(contrast); motion_magnitude_ = tempDiffAvg/contrast; } return VPM_OK; } // Compute spatial metrics: // To reduce complexity, we compute the metric for a reduced set of points. // The spatial metrics are rough estimates of the prediction error cost for // each QM spatial mode: 2x2,1x2,2x1 // The metrics are a simple estimate of the up-sampling prediction error, // estimated assuming sub-sampling for decimation (no filtering), // and up-sampling back up with simple bilinear interpolation. int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() { const int sizei = height_; const int sizej = width_; // Pixel mean square average: used to normalize the spatial metrics. uint32_t pixelMSA = 0; uint32_t spatialErrSum = 0; uint32_t spatialErrVSum = 0; uint32_t spatialErrHSum = 0; // make sure work section is a multiple of 16 const int width_end = ((sizej - 2*border_) & -16) + border_; for (int i = border_; i < sizei - border_; i += skip_num_) { for (int j = border_; j < width_end; j++) { int ssn1= i * sizej + j; int ssn2 = (i + 1) * sizej + j; // bottom int ssn3 = (i - 1) * sizej + j; // top int ssn4 = i * sizej + j + 1; // right int ssn5 = i * sizej + j - 1; // left uint16_t refPixel1 = orig_frame_[ssn1] << 1; uint16_t refPixel2 = orig_frame_[ssn1] << 2; uint8_t bottPixel = orig_frame_[ssn2]; uint8_t topPixel = orig_frame_[ssn3]; uint8_t rightPixel = orig_frame_[ssn4]; uint8_t leftPixel = orig_frame_[ssn5]; spatialErrSum += (uint32_t) abs((int16_t)(refPixel2 - (uint16_t)(bottPixel + topPixel + leftPixel + rightPixel))); spatialErrVSum += (uint32_t) abs((int16_t)(refPixel1 - (uint16_t)(bottPixel + topPixel))); spatialErrHSum += (uint32_t) abs((int16_t)(refPixel1 - (uint16_t)(leftPixel + rightPixel))); pixelMSA += orig_frame_[ssn1]; } } // Normalize over all pixels. const float spatialErr = (float)(spatialErrSum >> 2); const float spatialErrH = (float)(spatialErrHSum >> 1); const float spatialErrV = (float)(spatialErrVSum >> 1); const float norm = (float)pixelMSA; // 2X2: spatial_pred_err_ = spatialErr / norm; // 1X2: spatial_pred_err_h_ = spatialErrH / norm; // 2X1: spatial_pred_err_v_ = spatialErrV / norm; return VPM_OK; } VideoContentMetrics* VPMContentAnalysis::ContentMetrics() { if (ca_Init_ == false) return NULL; content_metrics_->spatial_pred_err = spatial_pred_err_; content_metrics_->spatial_pred_err_h = spatial_pred_err_h_; content_metrics_->spatial_pred_err_v = spatial_pred_err_v_; // Motion metric: normalized temporal difference (MAD). content_metrics_->motion_magnitude = motion_magnitude_; return content_metrics_; } } // namespace webrtc