diff --git a/webrtc/modules/video_processing/BUILD.gn b/webrtc/modules/video_processing/BUILD.gn index 43df0802ad..edeac947c7 100644 --- a/webrtc/modules/video_processing/BUILD.gn +++ b/webrtc/modules/video_processing/BUILD.gn @@ -6,6 +6,7 @@ # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. +import("//build/config/arm.gni") import("../../build/webrtc.gni") build_video_processing_sse2 = current_cpu == "x86" || current_cpu == "x64" @@ -26,8 +27,16 @@ source_set("video_processing") { "include/video_processing_defines.h", "spatial_resampler.cc", "spatial_resampler.h", + "util/denoiser_filter.cc", + "util/denoiser_filter.h", + "util/denoiser_filter_c.cc", + "util/denoiser_filter_c.h", + "util/skin_detection.cc", + "util/skin_detection.h", "video_decimator.cc", "video_decimator.h", + "video_denoiser.cc", + "video_denoiser.h", "video_processing_impl.cc", "video_processing_impl.h", ] @@ -41,6 +50,9 @@ source_set("video_processing") { if (build_video_processing_sse2) { deps += [ ":video_processing_sse2" ] } + if (rtc_build_with_neon) { + deps += [ ":video_processing_neon" ] + } configs += [ "../..:common_config" ] public_configs = [ "../..:common_inherited_config" ] @@ -56,6 +68,8 @@ if (build_video_processing_sse2) { source_set("video_processing_sse2") { sources = [ "content_analysis_sse2.cc", + "util/denoiser_filter_sse2.cc", + "util/denoiser_filter_sse2.h", ] configs += [ "../..:common_config" ] @@ -72,3 +86,16 @@ if (build_video_processing_sse2) { } } } + +if (rtc_build_with_neon) { + source_set("video_processing_neon") { + sources = [ + "util/denoiser_filter_neon.cc", + "util/denoiser_filter_neon.h", + ] + if (current_cpu != "arm64") { + configs -= [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + } +} diff --git a/webrtc/modules/video_processing/frame_preprocessor.cc b/webrtc/modules/video_processing/frame_preprocessor.cc index a3ec3c8c1c..36e1b9bbfd 100644 --- a/webrtc/modules/video_processing/frame_preprocessor.cc +++ b/webrtc/modules/video_processing/frame_preprocessor.cc @@ -13,26 +13,34 @@ namespace webrtc { VPMFramePreprocessor::VPMFramePreprocessor() - : content_metrics_(NULL), + : content_metrics_(nullptr), resampled_frame_(), enable_ca_(false), + enable_denoising_(false), frame_cnt_(0) { spatial_resampler_ = new VPMSimpleSpatialResampler(); ca_ = new VPMContentAnalysis(true); vd_ = new VPMVideoDecimator(); + if (enable_denoising_) { + denoiser_ = new VideoDenoiser(); + } else { + denoiser_ = nullptr; + } } VPMFramePreprocessor::~VPMFramePreprocessor() { Reset(); - delete spatial_resampler_; delete ca_; delete vd_; + if (enable_denoising_) + delete denoiser_; + delete spatial_resampler_; } void VPMFramePreprocessor::Reset() { ca_->Release(); vd_->Reset(); - content_metrics_ = NULL; + content_metrics_ = nullptr; spatial_resampler_->Reset(); enable_ca_ = false; frame_cnt_ = 0; @@ -104,11 +112,22 @@ int32_t VPMFramePreprocessor::PreprocessFrame(const VideoFrame& frame, return 1; // drop 1 frame } - // Resizing incoming frame if needed. Otherwise, remains NULL. + // Resizing incoming frame if needed. Otherwise, remains nullptr. // We are not allowed to resample the input frame (must make a copy of it). - *processed_frame = NULL; + *processed_frame = nullptr; + if (denoiser_ != nullptr) { + denoiser_->DenoiseFrame(frame, &denoised_frame_); + *processed_frame = &denoised_frame_; + } + if (spatial_resampler_->ApplyResample(frame.width(), frame.height())) { - int32_t ret = spatial_resampler_->ResampleFrame(frame, &resampled_frame_); + int32_t ret; + if (enable_denoising_) { + ret = spatial_resampler_->ResampleFrame(denoised_frame_, + &resampled_frame_); + } else { + ret = spatial_resampler_->ResampleFrame(frame, &resampled_frame_); + } if (ret != VPM_OK) return ret; *processed_frame = &resampled_frame_; } @@ -118,14 +137,14 @@ int32_t VPMFramePreprocessor::PreprocessFrame(const VideoFrame& frame, // Compute new metrics every |kSkipFramesCA| frames, starting with // the first frame. if (frame_cnt_ % kSkipFrameCA == 0) { - if (*processed_frame == NULL) { + if (*processed_frame == nullptr) { content_metrics_ = ca_->ComputeContentMetrics(frame); } else { - content_metrics_ = ca_->ComputeContentMetrics(resampled_frame_); + content_metrics_ = ca_->ComputeContentMetrics(**processed_frame); } } - ++frame_cnt_; } + ++frame_cnt_; return VPM_OK; } diff --git a/webrtc/modules/video_processing/frame_preprocessor.h b/webrtc/modules/video_processing/frame_preprocessor.h index c5313b4066..27592603c7 100644 --- a/webrtc/modules/video_processing/frame_preprocessor.h +++ b/webrtc/modules/video_processing/frame_preprocessor.h @@ -18,7 +18,9 @@ #include "webrtc/modules/video_processing/content_analysis.h" #include "webrtc/modules/video_processing/spatial_resampler.h" #include "webrtc/modules/video_processing/video_decimator.h" +#include "webrtc/modules/video_processing/video_denoiser.h" #include "webrtc/typedefs.h" +#include "webrtc/video_frame.h" namespace webrtc { @@ -65,11 +67,14 @@ class VPMFramePreprocessor { enum { kSkipFrameCA = 2 }; VideoContentMetrics* content_metrics_; + VideoFrame denoised_frame_; VideoFrame resampled_frame_; VPMSpatialResampler* spatial_resampler_; VPMContentAnalysis* ca_; VPMVideoDecimator* vd_; + VideoDenoiser* denoiser_; bool enable_ca_; + bool enable_denoising_; int frame_cnt_; }; diff --git a/webrtc/modules/video_processing/util/denoiser_filter.cc b/webrtc/modules/video_processing/util/denoiser_filter.cc new file mode 100644 index 0000000000..a5819905b2 --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/video_processing/util/denoiser_filter.h" +#include "webrtc/modules/video_processing/util/denoiser_filter_c.h" +#include "webrtc/modules/video_processing/util/denoiser_filter_neon.h" +#include "webrtc/modules/video_processing/util/denoiser_filter_sse2.h" +#include "webrtc/system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +const int kMotionMagnitudeThreshold = 8 * 3; +const int kSumDiffThreshold = 16 * 16 * 2; +const int kSumDiffThresholdHigh = 600; + +DenoiserFilter* DenoiserFilter::Create() { + DenoiserFilter* filter = NULL; + + // If we know the minimum architecture at compile time, avoid CPU detection. +#if defined(WEBRTC_ARCH_X86_FAMILY) + // x86 CPU detection required. + if (WebRtc_GetCPUInfo(kSSE2)) { + filter = + new DenoiserFilterSSE2(); + } else { + filter = new DenoiserFilterC(); + } +#elif defined(WEBRTC_DETECT_NEON) + if (WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) { + filter = new DenoiserFilterNEON(); + } else { + filter = new DenoiserFilterC(); + } +#else + filter = new DenoiserFilterC(); +#endif + + return filter; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter.h b/webrtc/modules/video_processing/util/denoiser_filter.h new file mode 100644 index 0000000000..19135b3b9e --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_H_ + +#include "webrtc/modules/include/module_common_types.h" +#include "webrtc/modules/video_processing/include/video_processing_defines.h" + +namespace webrtc { + +extern const int kMotionMagnitudeThreshold; +extern const int kSumDiffThreshold; +extern const int kSumDiffThresholdHigh; + +enum DenoiserDecision { COPY_BLOCK, FILTER_BLOCK }; +struct DenoiseMetrics { + uint32_t var; + uint32_t sad; + uint8_t denoise; + bool is_skin; +}; + +class DenoiserFilter { + public: + static DenoiserFilter* Create(); + + virtual ~DenoiserFilter() {} + + virtual void CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) = 0; + virtual void CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) = 0; + virtual uint32_t Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + unsigned int* sse) = 0; + virtual DenoiserDecision MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) = 0; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_H_ diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.cc b/webrtc/modules/video_processing/util/denoiser_filter_c.cc new file mode 100644 index 0000000000..e32bf83889 --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_c.cc @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/modules/video_processing/util/denoiser_filter_c.h" + +namespace webrtc { + +void DenoiserFilterC::CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + for (int i = 0; i < 16; i++) { + memcpy(dst, src, 16); + src += src_stride; + dst += dst_stride; + } +} + +void DenoiserFilterC::CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + for (int i = 0; i < 8; i++) { + memcpy(dst, src, 8); + src += src_stride; + dst += dst_stride; + } +} + +uint32_t DenoiserFilterC::Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + uint32_t* sse) { + int sum = 0; + *sse = 0; + a_stride <<= 1; + b_stride <<= 1; + + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 16; j++) { + const int diff = a[j] - b[j]; + sum += diff; + *sse += diff * diff; + } + + a += a_stride; + b += b_stride; + } + return *sse - ((static_cast(sum) * sum) >> 8); +} + +DenoiserDecision DenoiserFilterC::MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) { + int sum_diff_thresh = 0; + int sum_diff = 0; + int adj_val[3] = {3, 4, 6}; + int shift_inc1 = 0; + int shift_inc2 = 1; + int col_sum[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + if (motion_magnitude <= kMotionMagnitudeThreshold) { + if (increase_denoising) { + shift_inc1 = 1; + shift_inc2 = 2; + } + adj_val[0] += shift_inc2; + adj_val[1] += shift_inc2; + adj_val[2] += shift_inc2; + } + + for (int r = 0; r < 16; ++r) { + for (int c = 0; c < 16; ++c) { + int diff = 0; + int adjustment = 0; + int absdiff = 0; + + diff = mc_running_avg_y[c] - sig[c]; + absdiff = abs(diff); + + // When |diff| <= |3 + shift_inc1|, use pixel value from + // last denoised raw. + if (absdiff <= 3 + shift_inc1) { + running_avg_y[c] = mc_running_avg_y[c]; + col_sum[c] += diff; + } else { + if (absdiff >= 4 + shift_inc1 && absdiff <= 7) + adjustment = adj_val[0]; + else if (absdiff >= 8 && absdiff <= 15) + adjustment = adj_val[1]; + else + adjustment = adj_val[2]; + + if (diff > 0) { + if ((sig[c] + adjustment) > 255) + running_avg_y[c] = 255; + else + running_avg_y[c] = sig[c] + adjustment; + + col_sum[c] += adjustment; + } else { + if ((sig[c] - adjustment) < 0) + running_avg_y[c] = 0; + else + running_avg_y[c] = sig[c] - adjustment; + + col_sum[c] -= adjustment; + } + } + } + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + + for (int c = 0; c < 16; ++c) { + if (col_sum[c] >= 128) { + col_sum[c] = 127; + } + sum_diff += col_sum[c]; + } + + sum_diff_thresh = kSumDiffThreshold; + if (increase_denoising) + sum_diff_thresh = kSumDiffThresholdHigh; + if (abs(sum_diff) > sum_diff_thresh) { + int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + sig -= sig_stride * 16; + mc_running_avg_y -= mc_avg_y_stride * 16; + running_avg_y -= avg_y_stride * 16; + for (int r = 0; r < 16; ++r) { + for (int c = 0; c < 16; ++c) { + int diff = mc_running_avg_y[c] - sig[c]; + int adjustment = abs(diff); + if (adjustment > delta) + adjustment = delta; + if (diff > 0) { + // Bring denoised signal down. + if (running_avg_y[c] - adjustment < 0) + running_avg_y[c] = 0; + else + running_avg_y[c] = running_avg_y[c] - adjustment; + col_sum[c] -= adjustment; + } else if (diff < 0) { + // Bring denoised signal up. + if (running_avg_y[c] + adjustment > 255) + running_avg_y[c] = 255; + else + running_avg_y[c] = running_avg_y[c] + adjustment; + col_sum[c] += adjustment; + } + } + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + + sum_diff = 0; + for (int c = 0; c < 16; ++c) { + if (col_sum[c] >= 128) { + col_sum[c] = 127; + } + sum_diff += col_sum[c]; + } + + if (abs(sum_diff) > sum_diff_thresh) + return COPY_BLOCK; + } else { + return COPY_BLOCK; + } + } + + return FILTER_BLOCK; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.h b/webrtc/modules/video_processing/util/denoiser_filter_c.h new file mode 100644 index 0000000000..830fcfcbbd --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_c.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_C_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_C_H_ + +#include "webrtc/modules/video_processing/util/denoiser_filter.h" + +namespace webrtc { + +class DenoiserFilterC : public DenoiserFilter { + public: + DenoiserFilterC() {} + void CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + void CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + uint32_t Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + unsigned int* sse) override; + DenoiserDecision MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) override; +}; + + + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_C_H_ diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc new file mode 100644 index 0000000000..67d420cda7 --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/modules/video_processing/util/denoiser_filter_neon.h" + +namespace webrtc { + +static int HorizontalAddS16x8(const int16x8_t v_16x8) { + const int32x4_t a = vpaddlq_s16(v_16x8); + const int64x2_t b = vpaddlq_s32(a); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +static int HorizontalAddS32x4(const int32x4_t v_32x4) { + const int64x2_t b = vpaddlq_s32(v_32x4); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +static void VarianceNeonW8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + int w, + int h, + uint32_t* sse, + int64_t* sum) { + int16x8_t v_sum = vdupq_n_s16(0); + int32x4_t v_sse_lo = vdupq_n_s32(0); + int32x4_t v_sse_hi = vdupq_n_s32(0); + + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; j += 8) { + const uint8x8_t v_a = vld1_u8(&a[j]); + const uint8x8_t v_b = vld1_u8(&b[j]); + const uint16x8_t v_diff = vsubl_u8(v_a, v_b); + const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); + v_sum = vaddq_s16(v_sum, sv_diff); + v_sse_lo = + vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); + v_sse_hi = + vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); + } + a += a_stride; + b += b_stride; + } + + *sum = HorizontalAddS16x8(v_sum); + *sse = static_cast( + HorizontalAddS32x4(vaddq_s32(v_sse_lo, v_sse_hi))); +} + +void DenoiserFilterNEON::CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + uint8x16_t qtmp; + for (int r = 0; r < 16; r++) { + qtmp = vld1q_u8(src); + vst1q_u8(dst, qtmp); + src += src_stride; + dst += dst_stride; + } +} + +void DenoiserFilterNEON::CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + uint8x8_t vtmp; + + for (int r = 0; r < 8; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +uint32_t DenoiserFilterNEON::Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + uint32_t* sse) { + int64_t sum = 0; + VarianceNeonW8(a, a_stride << 1, b, b_stride << 1, 16, 8, sse, &sum); + return *sse - ((sum * sum) >> 7); +} + +DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, + int mc_running_avg_y_stride, + uint8_t* running_avg_y, + int running_avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) { + // If motion_magnitude is small, making the denoiser more aggressive by + // increasing the adjustment for each level, level1 adjustment is + // increased, the deltas stay the same. + int shift_inc = + (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) + ? 1 + : 0; + const uint8x16_t v_level1_adjustment = vmovq_n_u8( + (motion_magnitude <= kMotionMagnitudeThreshold) ? 4 + shift_inc : 3); + const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); + const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); + const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); + const uint8x16_t v_level2_threshold = vdupq_n_u8(8); + const uint8x16_t v_level3_threshold = vdupq_n_u8(16); + int64x2_t v_sum_diff_total = vdupq_n_s64(0); + + // Go over lines. + for (int r = 0; r < 16; ++r) { + // Load inputs. + const uint8x16_t v_sig = vld1q_u8(sig); + const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); + + // Calculate absolute difference and sign masks. + const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); + + // Figure out which level that put us in. + const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, v_abs_diff); + const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, v_abs_diff); + const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, v_abs_diff); + + // Calculate absolute adjustments for level 1, 2 and 3. + const uint8x16_t v_level2_adjustment = + vandq_u8(v_level2_mask, v_delta_level_1_and_2); + const uint8x16_t v_level3_adjustment = + vandq_u8(v_level3_mask, v_delta_level_2_and_3); + const uint8x16_t v_level1and2_adjustment = + vaddq_u8(v_level1_adjustment, v_level2_adjustment); + const uint8x16_t v_level1and2and3_adjustment = + vaddq_u8(v_level1and2_adjustment, v_level3_adjustment); + + // Figure adjustment absolute value by selecting between the absolute + // difference if in level0 or the value for level 1, 2 and 3. + const uint8x16_t v_abs_adjustment = + vbslq_u8(v_level1_mask, v_level1and2and3_adjustment, v_abs_diff); + + // Calculate positive and negative adjustments. Apply them to the signal + // and accumulate them. Adjustments are less than eight and the maximum + // sum of them (7 * 16) can fit in a signed char. + const uint8x16_t v_pos_adjustment = + vandq_u8(v_diff_pos_mask, v_abs_adjustment); + const uint8x16_t v_neg_adjustment = + vandq_u8(v_diff_neg_mask, v_abs_adjustment); + + uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment); + v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment); + + // Store results. + vst1q_u8(running_avg_y, v_running_avg_y); + + // Sum all the accumulators to have the sum of all pixel differences + // for this macroblock. + { + const int8x16_t v_sum_diff = + vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), + vreinterpretq_s8_u8(v_neg_adjustment)); + const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); + const int32x4_t fedc_ba98_7654_3210 = + vpaddlq_s16(fe_dc_ba_98_76_54_32_10); + const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); + + v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); + } + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_running_avg_y_stride; + running_avg_y += running_avg_y_stride; + } + + // Too much adjustments => copy block. + { + int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), + vget_low_s64(v_sum_diff_total)); + int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); + int sum_diff_thresh = kSumDiffThreshold; + + if (increase_denoising) + sum_diff_thresh = kSumDiffThresholdHigh; + if (sum_diff > sum_diff_thresh) { + // Before returning to copy the block (i.e., apply no denoising), + // checK if we can still apply some (weaker) temporal filtering to + // this block, that would otherwise not be denoised at all. Simplest + // is to apply an additional adjustment to running_avg_y to bring it + // closer to sig. The adjustment is capped by a maximum delta, and + // chosen such that in most cases the resulting sum_diff will be + // within the accceptable range given by sum_diff_thresh. + + // The delta is set by the excess of absolute pixel diff over the + // threshold. + int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + const uint8x16_t k_delta = vmovq_n_u8(delta); + sig -= sig_stride * 16; + mc_running_avg_y -= mc_running_avg_y_stride * 16; + running_avg_y -= running_avg_y_stride * 16; + for (int r = 0; r < 16; ++r) { + uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); + const uint8x16_t v_sig = vld1q_u8(sig); + const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); + + // Calculate absolute difference and sign masks. + const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_pos_mask = + vcltq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_neg_mask = + vcgtq_u8(v_sig, v_mc_running_avg_y); + // Clamp absolute difference to delta to get the adjustment. + const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); + + const uint8x16_t v_pos_adjustment = + vandq_u8(v_diff_pos_mask, v_abs_adjustment); + const uint8x16_t v_neg_adjustment = + vandq_u8(v_diff_neg_mask, v_abs_adjustment); + + v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); + v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); + + // Store results. + vst1q_u8(running_avg_y, v_running_avg_y); + + { + const int8x16_t v_sum_diff = + vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), + vreinterpretq_s8_u8(v_pos_adjustment)); + + const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); + const int32x4_t fedc_ba98_7654_3210 = + vpaddlq_s16(fe_dc_ba_98_76_54_32_10); + const int64x2_t fedcba98_76543210 = + vpaddlq_s32(fedc_ba98_7654_3210); + + v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); + } + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_running_avg_y_stride; + running_avg_y += running_avg_y_stride; + } + { + // Update the sum of all pixel differences of this MB. + x = vqadd_s64(vget_high_s64(v_sum_diff_total), + vget_low_s64(v_sum_diff_total)); + sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); + + if (sum_diff > sum_diff_thresh) { + return COPY_BLOCK; + } + } + } else { + return COPY_BLOCK; + } + } + } + + // Tell above level that block was filtered. + running_avg_y -= running_avg_y_stride * 16; + sig -= sig_stride * 16; + + return FILTER_BLOCK; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.h b/webrtc/modules/video_processing/util/denoiser_filter_neon.h new file mode 100644 index 0000000000..bc87ba788e --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_NEON_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_NEON_H_ + +#include "webrtc/modules/video_processing/util/denoiser_filter.h" + +namespace webrtc { + +class DenoiserFilterNEON : public DenoiserFilter { + public: + DenoiserFilterNEON() {} + void CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + void CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + uint32_t Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + unsigned int* sse) override; + DenoiserDecision MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) override; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_NEON_H_ diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc new file mode 100644 index 0000000000..82f11344c0 --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/modules/video_processing/util/denoiser_filter_sse2.h" + +namespace webrtc { + +static void Get8x8varSse2(const uint8_t* src, + int src_stride, + const uint8_t* ref, + int ref_stride, + unsigned int* sse, + int* sum) { + const __m128i zero = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); + __m128i vsse = _mm_setzero_si128(); + + for (int i = 0; i < 8; i += 2) { + const __m128i src0 = _mm_unpacklo_epi8( + _mm_loadl_epi64((const __m128i*)(src + i * src_stride)), zero); + const __m128i ref0 = _mm_unpacklo_epi8( + _mm_loadl_epi64((const __m128i*)(ref + i * ref_stride)), zero); + const __m128i diff0 = _mm_sub_epi16(src0, ref0); + + const __m128i src1 = _mm_unpacklo_epi8( + _mm_loadl_epi64((const __m128i*)(src + (i + 1) * src_stride)), zero); + const __m128i ref1 = _mm_unpacklo_epi8( + _mm_loadl_epi64((const __m128i*)(ref + (i + 1) * ref_stride)), zero); + const __m128i diff1 = _mm_sub_epi16(src1, ref1); + + vsum = _mm_add_epi16(vsum, diff0); + vsum = _mm_add_epi16(vsum, diff1); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); + vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); + } + + // sum + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); + *sum = static_cast(_mm_extract_epi16(vsum, 0)); + + // sse + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); + vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); + *sse = _mm_cvtsi128_si32(vsse); +} + +static void VarianceSSE2(const unsigned char* src, + int src_stride, + const unsigned char* ref, + int ref_stride, + int w, + int h, + uint32_t* sse, + uint32_t* sum, + int block_size) { + *sse = 0; + *sum = 0; + + for (int i = 0; i < h; i += block_size) { + for (int j = 0; j < w; j += block_size) { + uint32_t sse0 = 0; + int32_t sum0 = 0; + + Get8x8varSse2(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); + *sse += sse0; + *sum += sum0; + } + } +} + +// Compute the sum of all pixel differences of this MB. +static uint32_t AbsSumDiff16x1(__m128i acc_diff) { + const __m128i k_1 = _mm_set1_epi16(1); + const __m128i acc_diff_lo = + _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8); + const __m128i acc_diff_hi = + _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8); + const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi); + const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1); + const __m128i hgfe_dcba = + _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8)); + const __m128i hgfedcba = + _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4)); + unsigned int sum_diff = abs(_mm_cvtsi128_si32(hgfedcba)); + + return sum_diff; +} + +// TODO(jackychen): Optimize this function using SSE2. +void DenoiserFilterSSE2::CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + for (int i = 0; i < 16; i++) { + memcpy(dst, src, 16); + src += src_stride; + dst += dst_stride; + } +} + +// TODO(jackychen): Optimize this function using SSE2. +void DenoiserFilterSSE2::CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + for (int i = 0; i < 8; i++) { + memcpy(dst, src, 8); + src += src_stride; + dst += dst_stride; + } +} + +uint32_t DenoiserFilterSSE2::Variance16x8(const uint8_t* src, + int src_stride, + const uint8_t* ref, + int ref_stride, + unsigned int* sse) { + uint32_t sum = 0; + VarianceSSE2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum, 8); + return *sse - ((sum * sum) >> 7); +} + +DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) { + int shift_inc = + (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) + ? 1 + : 0; + __m128i acc_diff = _mm_setzero_si128(); + const __m128i k_0 = _mm_setzero_si128(); + const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); + const __m128i k_8 = _mm_set1_epi8(8); + const __m128i k_16 = _mm_set1_epi8(16); + // Modify each level's adjustment according to motion_magnitude. + const __m128i l3 = _mm_set1_epi8( + (motion_magnitude <= kMotionMagnitudeThreshold) ? 7 + shift_inc : 6); + // Difference between level 3 and level 2 is 2. + const __m128i l32 = _mm_set1_epi8(2); + // Difference between level 2 and level 1 is 1. + const __m128i l21 = _mm_set1_epi8(1); + + for (int r = 0; r < 16; ++r) { + // Calculate differences. + const __m128i v_sig = + _mm_loadu_si128(reinterpret_cast(&sig[0])); + const __m128i v_mc_running_avg_y = + _mm_loadu_si128(reinterpret_cast<__m128i*>(&mc_running_avg_y[0])); + __m128i v_running_avg_y; + const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); + const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); + // Obtain the sign. FF if diff is negative. + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + // Clamp absolute difference to 16 to be used to get mask. Doing this + // allows us to use _mm_cmpgt_epi8, which operates on signed byte. + const __m128i clamped_absdiff = + _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_16); + // Get masks for l2 l1 and l0 adjustments. + const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); + const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); + const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); + // Get adjustments for l2, l1, and l0. + __m128i adj2 = _mm_and_si128(mask2, l32); + const __m128i adj1 = _mm_and_si128(mask1, l21); + const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); + __m128i adj, padj, nadj; + + // Combine the adjustments and get absolute adjustments. + adj2 = _mm_add_epi8(adj2, adj1); + adj = _mm_sub_epi8(l3, adj2); + adj = _mm_andnot_si128(mask0, adj); + adj = _mm_or_si128(adj, adj0); + + // Restore the sign and get positive and negative adjustments. + padj = _mm_andnot_si128(diff_sign, adj); + nadj = _mm_and_si128(diff_sign, adj); + + // Calculate filtered value. + v_running_avg_y = _mm_adds_epu8(v_sig, padj); + v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); + _mm_storeu_si128(reinterpret_cast<__m128i*>(running_avg_y), + v_running_avg_y); + + // Adjustments <=7, and each element in acc_diff can fit in signed + // char. + acc_diff = _mm_adds_epi8(acc_diff, padj); + acc_diff = _mm_subs_epi8(acc_diff, nadj); + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + + { + // Compute the sum of all pixel differences of this MB. + unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff); + unsigned int sum_diff_thresh = kSumDiffThreshold; + if (increase_denoising) + sum_diff_thresh = kSumDiffThresholdHigh; + if (abs_sum_diff > sum_diff_thresh) { + // Before returning to copy the block (i.e., apply no denoising), + // check if we can still apply some (weaker) temporal filtering to + // this block, that would otherwise not be denoised at all. Simplest + // is to apply an additional adjustment to running_avg_y to bring it + // closer to sig. The adjustment is capped by a maximum delta, and + // chosen such that in most cases the resulting sum_diff will be + // within the acceptable range given by sum_diff_thresh. + + // The delta is set by the excess of absolute pixel diff over the + // threshold. + int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + const __m128i k_delta = _mm_set1_epi8(delta); + sig -= sig_stride * 16; + mc_running_avg_y -= mc_avg_y_stride * 16; + running_avg_y -= avg_y_stride * 16; + for (int r = 0; r < 16; ++r) { + __m128i v_running_avg_y = + _mm_loadu_si128(reinterpret_cast<__m128i*>(&running_avg_y[0])); + // Calculate differences. + const __m128i v_sig = + _mm_loadu_si128(reinterpret_cast(&sig[0])); + const __m128i v_mc_running_avg_y = _mm_loadu_si128( + reinterpret_cast<__m128i*>(&mc_running_avg_y[0])); + const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); + const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); + // Obtain the sign. FF if diff is negative. + const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); + // Clamp absolute difference to delta to get the adjustment. + const __m128i adj = _mm_min_epu8( + _mm_or_si128(pdiff, ndiff), k_delta); + // Restore the sign and get positive and negative adjustments. + __m128i padj, nadj; + padj = _mm_andnot_si128(diff_sign, adj); + nadj = _mm_and_si128(diff_sign, adj); + // Calculate filtered value. + v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); + v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); + _mm_storeu_si128(reinterpret_cast<__m128i*>(running_avg_y), + v_running_avg_y); + + // Accumulate the adjustments. + acc_diff = _mm_subs_epi8(acc_diff, padj); + acc_diff = _mm_adds_epi8(acc_diff, nadj); + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_avg_y_stride; + running_avg_y += avg_y_stride; + } + abs_sum_diff = AbsSumDiff16x1(acc_diff); + if (abs_sum_diff > sum_diff_thresh) { + return COPY_BLOCK; + } + } else { + return COPY_BLOCK; + } + } + } + return FILTER_BLOCK; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.h b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h new file mode 100644 index 0000000000..31d8510902 --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_SSE2_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_SSE2_H_ + +#include "webrtc/modules/video_processing/util/denoiser_filter.h" + +namespace webrtc { + +class DenoiserFilterSSE2 : public DenoiserFilter { + public: + DenoiserFilterSSE2() {} + void CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + void CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) override; + uint32_t Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + unsigned int* sse) override; + DenoiserDecision MbDenoise(uint8_t* mc_running_avg_y, + int mc_avg_y_stride, + uint8_t* running_avg_y, + int avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) override; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_SSE2_H_ diff --git a/webrtc/modules/video_processing/util/skin_detection.cc b/webrtc/modules/video_processing/util/skin_detection.cc new file mode 100755 index 0000000000..b3b2cd6aad --- /dev/null +++ b/webrtc/modules/video_processing/util/skin_detection.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "webrtc/modules/video_processing/util/skin_detection.h" + +namespace webrtc { + +// Fixed-point skin color model parameters. +static const int skin_mean[2] = {7463, 9614}; // q6 +static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 +static const int skin_threshold = 1570636; // q18 + +// Thresholds on luminance. +static const int y_low = 20; +static const int y_high = 220; + +// Evaluates the Mahalanobis distance measure for the input CbCr values. +static int EvaluateSkinColorDifference(int cb, int cr) { + const int cb_q6 = cb << 6; + const int cr_q6 = cr << 6; + const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]); + const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]); + const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]); + const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; + const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; + const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; + const int skin_diff = + skin_inv_cov[0] * cb_diff_q2 + skin_inv_cov[1] * cbcr_diff_q2 + + skin_inv_cov[2] * cbcr_diff_q2 + skin_inv_cov[3] * cr_diff_q2; + return skin_diff; +} + +bool MbHasSkinColor(const uint8_t* y_src, + const uint8_t* u_src, + const uint8_t* v_src, + const int stride_y, + const int stride_u, + const int stride_v, + const int mb_row, + const int mb_col) { + const uint8_t* y = + y_src + ((mb_row << 4) + 8) * stride_y + (mb_col << 4) + 8; + const uint8_t* u = + u_src + ((mb_row << 3) + 4) * stride_u + (mb_col << 3) + 4; + const uint8_t* v = + v_src + ((mb_row << 3) + 4) * stride_v + (mb_col << 3) + 4; + // Use 2x2 average of center pixel to compute skin area. + uint8_t y_avg = + (*y + *(y + 1) + *(y + stride_y) + *(y + stride_y + 1)) >> 2; + uint8_t u_avg = + (*u + *(u + 1) + *(u + stride_u) + *(u + stride_u + 1)) >> 2; + uint8_t v_avg = + (*v + *(v + 1) + *(v + stride_v) + *(v + stride_v + 1)) >> 2; + // Ignore MB with too high or low brightness. + if (y_avg < y_low || y_avg > y_high) + return false; + else + return (EvaluateSkinColorDifference(u_avg, v_avg) < skin_threshold); +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/util/skin_detection.h b/webrtc/modules/video_processing/util/skin_detection.h new file mode 100755 index 0000000000..561c03c425 --- /dev/null +++ b/webrtc/modules/video_processing/util/skin_detection.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_SKIN_DETECTION_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_SKIN_DETECTION_H_ + +namespace webrtc { + +typedef unsigned char uint8_t; +bool MbHasSkinColor(const uint8_t* y_src, + const uint8_t* u_src, + const uint8_t* v_src, + const int stride_y, + const int stride_u, + const int stride_v, + const int mb_row, + const int mb_col); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_SKIN_DETECTION_H_ diff --git a/webrtc/modules/video_processing/video_denoiser.cc b/webrtc/modules/video_processing/video_denoiser.cc new file mode 100644 index 0000000000..83bcf7f1cc --- /dev/null +++ b/webrtc/modules/video_processing/video_denoiser.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "webrtc/common_video/libyuv/include/scaler.h" +#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" +#include "webrtc/modules/video_processing/video_denoiser.h" + +namespace webrtc { + +VideoDenoiser::VideoDenoiser() + : width_(0), + height_(0), + filter_(DenoiserFilter::Create()) {} + +void VideoDenoiser::TrailingReduction(int mb_rows, + int mb_cols, + const uint8_t* y_src, + int stride_y, + uint8_t* y_dst) { + for (int mb_row = 1; mb_row < mb_rows - 1; ++mb_row) { + for (int mb_col = 1; mb_col < mb_cols - 1; ++mb_col) { + int mb_index = mb_row * mb_cols + mb_col; + uint8_t* mb_dst = y_dst + (mb_row << 4) * stride_y + (mb_col << 4); + const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4); + // If the number of denoised neighbors is less than a threshold, + // do NOT denoise for the block. Set different threshold for skin MB. + // The change of denoising status will not propagate. + if (metrics_[mb_index].is_skin) { + // The threshold is high (more strict) for non-skin MB where the trailing + // usually happen. + if (metrics_[mb_index].denoise && + metrics_[mb_index + 1].denoise + + metrics_[mb_index - 1].denoise + + metrics_[mb_index + mb_cols].denoise + + metrics_[mb_index - mb_cols].denoise <= 2) { + metrics_[mb_index].denoise = 0; + filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); + } + } else if (metrics_[mb_index].denoise && + metrics_[mb_index + 1].denoise + + metrics_[mb_index - 1].denoise + + metrics_[mb_index + mb_cols + 1].denoise + + metrics_[mb_index + mb_cols - 1].denoise + + metrics_[mb_index - mb_cols + 1].denoise + + metrics_[mb_index - mb_cols - 1].denoise + + metrics_[mb_index + mb_cols].denoise + + metrics_[mb_index - mb_cols].denoise <= 7) { + filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); + } + } + } +} + +void VideoDenoiser::DenoiseFrame(const VideoFrame& frame, + VideoFrame* denoised_frame) { + int stride_y = frame.stride(kYPlane); + int stride_u = frame.stride(kUPlane); + int stride_v = frame.stride(kVPlane); + // If previous width and height are different from current frame's, then no + // denoising for the current frame. + if (width_ != frame.width() || height_ != frame.height()) { + width_ = frame.width(); + height_ = frame.height(); + denoised_frame->CreateFrame(frame.buffer(kYPlane), frame.buffer(kUPlane), + frame.buffer(kVPlane), width_, height_, + stride_y, stride_u, stride_v); + // Setting time parameters to the output frame. + denoised_frame->set_timestamp(frame.timestamp()); + denoised_frame->set_render_time_ms(frame.render_time_ms()); + return; + } + // For 16x16 block. + int mb_cols = width_ >> 4; + int mb_rows = height_ >> 4; + if (metrics_.get() == nullptr) + metrics_.reset(new DenoiseMetrics[mb_cols * mb_rows]); + // Denoise on Y plane. + uint8_t* y_dst = denoised_frame->buffer(kYPlane); + uint8_t* u_dst = denoised_frame->buffer(kUPlane); + uint8_t* v_dst = denoised_frame->buffer(kVPlane); + const uint8_t* y_src = frame.buffer(kYPlane); + const uint8_t* u_src = frame.buffer(kUPlane); + const uint8_t* v_src = frame.buffer(kVPlane); + // Temporary buffer to store denoising result. + uint8_t y_tmp[16 * 16] = {0}; + for (int mb_row = 0; mb_row < mb_rows; ++mb_row) { + for (int mb_col = 0; mb_col < mb_cols; ++mb_col) { + const uint8_t* mb_src = + y_src + (mb_row << 4) * stride_y + (mb_col << 4); + uint8_t* mb_dst = y_dst + (mb_row << 4) * stride_y + (mb_col << 4); + int mb_index = mb_row * mb_cols + mb_col; + // Denoise each MB at the very start and save the result to a temporary + // buffer. + if (filter_->MbDenoise( + mb_dst, stride_y, y_tmp, 16, mb_src, stride_y, 0, 1) == + FILTER_BLOCK) { + uint32_t thr_var = 0; + // Save var and sad to the buffer. + metrics_[mb_index].var = filter_->Variance16x8( + mb_dst, stride_y, y_tmp, 16, &metrics_[mb_index].sad); + // Get skin map. + metrics_[mb_index].is_skin = + MbHasSkinColor(y_src, u_src, v_src, stride_y, stride_u, stride_v, + mb_row, mb_col); + // Variance threshold for skin/non-skin MB is different. + // Skin MB use a small threshold to reduce blockiness. + thr_var = metrics_[mb_index].is_skin ? 128 : 12 * 128; + if (metrics_[mb_index].var > thr_var) { + metrics_[mb_index].denoise = 0; + // Use the source MB. + filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); + } else { + metrics_[mb_index].denoise = 1; + // Use the denoised MB. + filter_->CopyMem16x16(y_tmp, 16, mb_dst, stride_y); + } + } else { + metrics_[mb_index].denoise = 0; + filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); + } + // Copy source U/V plane. + const uint8_t* mb_src_u = + u_src + (mb_row << 3) * stride_u + (mb_col << 3); + const uint8_t* mb_src_v = + v_src + (mb_row << 3) * stride_v + (mb_col << 3); + uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u + (mb_col << 3); + uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v + (mb_col << 3); + filter_->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u); + filter_->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v); + } + } + // Second round. + // This is to reduce the trailing artifact and blockiness by referring + // neighbors' denoising status. + TrailingReduction(mb_rows, mb_cols, y_src, stride_y, y_dst); + + // Setting time parameters to the output frame. + denoised_frame->set_timestamp(frame.timestamp()); + denoised_frame->set_render_time_ms(frame.render_time_ms()); + return; +} + +} // namespace webrtc diff --git a/webrtc/modules/video_processing/video_denoiser.h b/webrtc/modules/video_processing/video_denoiser.h new file mode 100644 index 0000000000..69edfaf956 --- /dev/null +++ b/webrtc/modules/video_processing/video_denoiser.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_VIDEO_DENOISER_H_ +#define WEBRTC_MODULES_VIDEO_PROCESSING_VIDEO_DENOISER_H_ + +#include "webrtc/modules/video_processing/util/denoiser_filter.h" +#include "webrtc/modules/video_processing/util/skin_detection.h" + +namespace webrtc { + +class VideoDenoiser { + public: + VideoDenoiser(); + void DenoiseFrame(const VideoFrame& frame, VideoFrame* denoised_frame); + + private: + void TrailingReduction(int mb_rows, int mb_cols, const uint8_t* y_src, + int stride_y, uint8_t* y_dst); + int width_; + int height_; + rtc::scoped_ptr metrics_; + rtc::scoped_ptr filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_VIDEO_PROCESSING_VIDEO_DENOISER_H_ diff --git a/webrtc/modules/video_processing/video_processing.gypi b/webrtc/modules/video_processing/video_processing.gypi index 25e2097007..e054f895be 100644 --- a/webrtc/modules/video_processing/video_processing.gypi +++ b/webrtc/modules/video_processing/video_processing.gypi @@ -36,11 +36,22 @@ 'video_decimator.h', 'video_processing_impl.cc', 'video_processing_impl.h', + 'video_denoiser.cc', + 'video_denoiser.h', + 'util/denoiser_filter.cc', + 'util/denoiser_filter.h', + 'util/denoiser_filter_c.cc', + 'util/denoiser_filter_c.h', + 'util/skin_detection.cc', + 'util/skin_detection.h', ], 'conditions': [ ['target_arch=="ia32" or target_arch=="x64"', { 'dependencies': [ 'video_processing_sse2', ], }], + ['target_arch=="arm" or target_arch == "arm64"', { + 'dependencies': [ 'video_processing_neon', ], + }], ], }, ], @@ -52,6 +63,8 @@ 'type': 'static_library', 'sources': [ 'content_analysis_sse2.cc', + 'util/denoiser_filter_sse2.cc', + 'util/denoiser_filter_sse2.h', ], 'conditions': [ ['os_posix==1 and OS!="mac"', { @@ -66,6 +79,19 @@ }, ], }], + ['target_arch=="arm" or target_arch == "arm64"', { + 'targets': [ + { + 'target_name': 'video_processing_neon', + 'type': 'static_library', + 'includes': [ '../../build/arm_neon.gypi', ], + 'sources': [ + 'util/denoiser_filter_neon.cc', + 'util/denoiser_filter_neon.h', + ], + }, + ], + }], ], }