From afaae0d15132161a9ef88ade6da9a27d8f9049eb Mon Sep 17 00:00:00 2001 From: jackychen Date: Tue, 12 Apr 2016 23:02:55 -0700 Subject: [PATCH] External VNR speed improvement. Improved visual quality with 3x times speed-up. Change list: 1. Remove second chance filter in temporal denoising filter to mitigate trailing artifact. 2. Add swap buffer to save one whole-frame memcpy. 3. Do noise estimation on every N blocks. 4. Adopt a faster moving object detection algorithm (change the structure). 5. Refactor the for loops and PositionCheck(). 6. Refactor the function ReduceFalseDetection (RFD). 7. Fix a bug in TrailingBlock() which causes a mismatch. 8. Change unit test to support swap buffer test. 9. Remove CopyMem8x8, use memcpy to copy U/V plane which can be optimized future. 10. Remove DenoiseMetrics. Review URL: https://codereview.webrtc.org/1871853003 Cr-Commit-Position: refs/heads/master@{#12340} --- .../video_processing/frame_preprocessor.cc | 16 +- .../video_processing/frame_preprocessor.h | 4 +- .../video_processing/test/denoiser_test.cc | 58 ++- .../video_processing/util/denoiser_filter.h | 13 +- .../util/denoiser_filter_c.cc | 72 +-- .../video_processing/util/denoiser_filter_c.h | 7 +- .../util/denoiser_filter_neon.cc | 110 +--- .../util/denoiser_filter_neon.h | 7 +- .../util/denoiser_filter_sse2.cc | 94 +--- .../util/denoiser_filter_sse2.h | 7 +- .../video_processing/util/noise_estimation.cc | 33 +- .../video_processing/util/noise_estimation.h | 14 +- .../video_processing/video_denoiser.cc | 491 +++++++++--------- .../modules/video_processing/video_denoiser.h | 46 +- 14 files changed, 391 insertions(+), 581 deletions(-) diff --git a/webrtc/modules/video_processing/frame_preprocessor.cc b/webrtc/modules/video_processing/frame_preprocessor.cc index fd0d0efb97..c2042148a2 100644 --- a/webrtc/modules/video_processing/frame_preprocessor.cc +++ b/webrtc/modules/video_processing/frame_preprocessor.cc @@ -23,6 +23,7 @@ VPMFramePreprocessor::VPMFramePreprocessor() ca_ = new VPMContentAnalysis(true); vd_ = new VPMVideoDecimator(); EnableDenosing(false); + denoised_frame_toggle_ = 0; } VPMFramePreprocessor::~VPMFramePreprocessor() { @@ -116,9 +117,18 @@ const VideoFrame* VPMFramePreprocessor::PreprocessFrame( const VideoFrame* current_frame = &frame; if (denoiser_) { - denoiser_->DenoiseFrame(*current_frame, &denoised_frame_, - &denoised_frame_prev_, 0); - current_frame = &denoised_frame_; + VideoFrame* denoised_frame = &denoised_frame_[0]; + VideoFrame* denoised_frame_prev = &denoised_frame_[1]; + // Swap the buffer to save one memcpy in DenoiseFrame. + if (denoised_frame_toggle_) { + denoised_frame = &denoised_frame_[1]; + denoised_frame_prev = &denoised_frame_[0]; + } + // Invert the flag. + denoised_frame_toggle_ ^= 1; + denoiser_->DenoiseFrame(*current_frame, denoised_frame, denoised_frame_prev, + true); + current_frame = denoised_frame; } if (spatial_resampler_->ApplyResample(current_frame->width(), diff --git a/webrtc/modules/video_processing/frame_preprocessor.h b/webrtc/modules/video_processing/frame_preprocessor.h index c35dd0d7af..270fbc2fc9 100644 --- a/webrtc/modules/video_processing/frame_preprocessor.h +++ b/webrtc/modules/video_processing/frame_preprocessor.h @@ -70,14 +70,14 @@ class VPMFramePreprocessor { enum { kSkipFrameCA = 2 }; VideoContentMetrics* content_metrics_; - VideoFrame denoised_frame_; - VideoFrame denoised_frame_prev_; + VideoFrame denoised_frame_[2]; VideoFrame resampled_frame_; VPMSpatialResampler* spatial_resampler_; VPMContentAnalysis* ca_; VPMVideoDecimator* vd_; std::unique_ptr denoiser_; bool enable_ca_; + uint8_t denoised_frame_toggle_; uint32_t frame_cnt_; }; diff --git a/webrtc/modules/video_processing/test/denoiser_test.cc b/webrtc/modules/video_processing/test/denoiser_test.cc index a45f933bb5..4c13a05d63 100644 --- a/webrtc/modules/video_processing/test/denoiser_test.cc +++ b/webrtc/modules/video_processing/test/denoiser_test.cc @@ -31,18 +31,10 @@ TEST_F(VideoProcessingTest, CopyMem) { } } - memset(dst, 0, 8 * 8); - df_c->CopyMem8x8(src, 8, dst, 8); - EXPECT_EQ(0, memcmp(src, dst, 8 * 8)); - memset(dst, 0, 16 * 16); df_c->CopyMem16x16(src, 16, dst, 16); EXPECT_EQ(0, memcmp(src, dst, 16 * 16)); - memset(dst, 0, 8 * 8); - df_sse_neon->CopyMem16x16(src, 8, dst, 8); - EXPECT_EQ(0, memcmp(src, dst, 8 * 8)); - memset(dst, 0, 16 * 16); df_sse_neon->CopyMem16x16(src, 16, dst, 16); EXPECT_EQ(0, memcmp(src, dst, 16 * 16)); @@ -87,10 +79,9 @@ TEST_F(VideoProcessingTest, MbDenoise) { } } memset(dst, 0, 16 * 16); - df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false); + df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1); memset(dst_sse_neon, 0, 16 * 16); - df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1, - false); + df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1); EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16)); // Test case: |diff| >= |4 + shift_inc1| @@ -101,10 +92,9 @@ TEST_F(VideoProcessingTest, MbDenoise) { } } memset(dst, 0, 16 * 16); - df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false); + df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1); memset(dst_sse_neon, 0, 16 * 16); - df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1, - false); + df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1); EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16)); // Test case: |diff| >= 8 @@ -115,10 +105,9 @@ TEST_F(VideoProcessingTest, MbDenoise) { } } memset(dst, 0, 16 * 16); - df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false); + df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1); memset(dst_sse_neon, 0, 16 * 16); - df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1, - false); + df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1); EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16)); // Test case: |diff| > 15 @@ -130,22 +119,23 @@ TEST_F(VideoProcessingTest, MbDenoise) { } memset(dst, 0, 16 * 16); DenoiserDecision decision = - df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false); + df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1); EXPECT_EQ(COPY_BLOCK, decision); - decision = - df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false); + decision = df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1); EXPECT_EQ(COPY_BLOCK, decision); } TEST_F(VideoProcessingTest, Denoiser) { + // Used in swap buffer. + int denoised_frame_toggle = 0; // Create pure C denoiser. VideoDenoiser denoiser_c(false); // Create SSE or NEON denoiser. VideoDenoiser denoiser_sse_neon(true); VideoFrame denoised_frame_c; - VideoFrame denoised_frame_track_c; + VideoFrame denoised_frame_prev_c; VideoFrame denoised_frame_sse_neon; - VideoFrame denoised_frame_track_sse_neon; + VideoFrame denoised_frame_prev_sse_neon; std::unique_ptr video_buffer(new uint8_t[frame_length_]); while (fread(video_buffer.get(), 1, frame_length_, source_file_) == @@ -154,13 +144,25 @@ TEST_F(VideoProcessingTest, Denoiser) { EXPECT_EQ(0, ConvertToI420(kI420, video_buffer.get(), 0, 0, width_, height_, 0, kVideoRotation_0, &video_frame_)); - denoiser_c.DenoiseFrame(video_frame_, &denoised_frame_c, - &denoised_frame_track_c, -1); - denoiser_sse_neon.DenoiseFrame(video_frame_, &denoised_frame_sse_neon, - &denoised_frame_track_sse_neon, -1); - + VideoFrame* p_denoised_c = &denoised_frame_c; + VideoFrame* p_denoised_prev_c = &denoised_frame_prev_c; + VideoFrame* p_denoised_sse_neon = &denoised_frame_sse_neon; + VideoFrame* p_denoised_prev_sse_neon = &denoised_frame_prev_sse_neon; + // Swap the buffer to save one memcpy in DenoiseFrame. + if (denoised_frame_toggle) { + p_denoised_c = &denoised_frame_prev_c; + p_denoised_prev_c = &denoised_frame_c; + p_denoised_sse_neon = &denoised_frame_prev_sse_neon; + p_denoised_prev_sse_neon = &denoised_frame_sse_neon; + } + denoiser_c.DenoiseFrame(video_frame_, p_denoised_c, p_denoised_prev_c, + false); + denoiser_sse_neon.DenoiseFrame(video_frame_, p_denoised_sse_neon, + p_denoised_prev_sse_neon, false); + // Invert the flag. + denoised_frame_toggle ^= 1; // Denoising results should be the same for C and SSE/NEON denoiser. - ASSERT_TRUE(test::FramesEqual(denoised_frame_c, denoised_frame_sse_neon)); + ASSERT_TRUE(test::FramesEqual(*p_denoised_c, *p_denoised_sse_neon)); } ASSERT_NE(0, feof(source_file_)) << "Error reading source file"; } diff --git a/webrtc/modules/video_processing/util/denoiser_filter.h b/webrtc/modules/video_processing/util/denoiser_filter.h index f2c7570083..1254a88d3c 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter.h +++ b/webrtc/modules/video_processing/util/denoiser_filter.h @@ -25,12 +25,6 @@ extern const int kSumDiffThresholdHigh; enum DenoiserDecision { COPY_BLOCK, FILTER_BLOCK }; enum CpuType { CPU_NEON, CPU_NOT_NEON }; -struct DenoiseMetrics { - uint32_t var; - uint32_t sad; - uint8_t denoise; - bool is_skin; -}; class DenoiserFilter { public: @@ -43,10 +37,6 @@ class DenoiserFilter { int src_stride, uint8_t* dst, int dst_stride) = 0; - virtual void CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) = 0; virtual uint32_t Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -59,8 +49,7 @@ class DenoiserFilter { const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) = 0; + int increase_denoising) = 0; }; } // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.cc b/webrtc/modules/video_processing/util/denoiser_filter_c.cc index 8c84f4989c..1b3c0b7098 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_c.cc +++ b/webrtc/modules/video_processing/util/denoiser_filter_c.cc @@ -25,17 +25,6 @@ void DenoiserFilterC::CopyMem16x16(const uint8_t* src, } } -void DenoiserFilterC::CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) { - for (int i = 0; i < 8; i++) { - memcpy(dst, src, 8); - src += src_stride; - dst += dst_stride; - } -} - uint32_t DenoiserFilterC::Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -66,8 +55,7 @@ DenoiserDecision DenoiserFilterC::MbDenoise(uint8_t* mc_running_avg_y, const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) { + int increase_denoising) { int sum_diff_thresh = 0; int sum_diff = 0; int adj_val[3] = {3, 4, 6}; @@ -137,60 +125,10 @@ DenoiserDecision DenoiserFilterC::MbDenoise(uint8_t* mc_running_avg_y, sum_diff += col_sum[c]; } - if (denoise_always) - sum_diff_thresh = INT_MAX; - else if (increase_denoising) - sum_diff_thresh = kSumDiffThresholdHigh; - else - sum_diff_thresh = kSumDiffThreshold; - if (abs(sum_diff) > sum_diff_thresh) { - int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - sig -= sig_stride * 16; - mc_running_avg_y -= mc_avg_y_stride * 16; - running_avg_y -= avg_y_stride * 16; - for (int r = 0; r < 16; ++r) { - for (int c = 0; c < 16; ++c) { - int diff = mc_running_avg_y[c] - sig[c]; - int adjustment = abs(diff); - if (adjustment > delta) - adjustment = delta; - if (diff > 0) { - // Bring denoised signal down. - if (running_avg_y[c] - adjustment < 0) - running_avg_y[c] = 0; - else - running_avg_y[c] = running_avg_y[c] - adjustment; - col_sum[c] -= adjustment; - } else if (diff < 0) { - // Bring denoised signal up. - if (running_avg_y[c] + adjustment > 255) - running_avg_y[c] = 255; - else - running_avg_y[c] = running_avg_y[c] + adjustment; - col_sum[c] += adjustment; - } - } - sig += sig_stride; - mc_running_avg_y += mc_avg_y_stride; - running_avg_y += avg_y_stride; - } - - sum_diff = 0; - for (int c = 0; c < 16; ++c) { - if (col_sum[c] >= 128) { - col_sum[c] = 127; - } - sum_diff += col_sum[c]; - } - - if (abs(sum_diff) > sum_diff_thresh) - return COPY_BLOCK; - } else { - return COPY_BLOCK; - } - } + sum_diff_thresh = + increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold; + if (abs(sum_diff) > sum_diff_thresh) + return COPY_BLOCK; return FILTER_BLOCK; } diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.h b/webrtc/modules/video_processing/util/denoiser_filter_c.h index 3e52c3e47c..d8b6c5eb79 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_c.h +++ b/webrtc/modules/video_processing/util/denoiser_filter_c.h @@ -22,10 +22,6 @@ class DenoiserFilterC : public DenoiserFilter { int src_stride, uint8_t* dst, int dst_stride) override; - void CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) override; uint32_t Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -38,8 +34,7 @@ class DenoiserFilterC : public DenoiserFilter { const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) override; + int increase_denoising) override; }; } // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc index 2920305f71..195b985b98 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc +++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc @@ -75,20 +75,6 @@ void DenoiserFilterNEON::CopyMem16x16(const uint8_t* src, } } -void DenoiserFilterNEON::CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) { - uint8x8_t vtmp; - - for (int r = 0; r < 8; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - uint32_t DenoiserFilterNEON::Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -106,8 +92,7 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) { + int increase_denoising) { // If motion_magnitude is small, making the denoiser more aggressive by // increasing the adjustment for each level, level1 adjustment is // increased, the deltas stay the same. @@ -190,92 +175,13 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, } // Too much adjustments => copy block. - { - int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - if (denoise_always) - sum_diff_thresh = INT_MAX; - else if (increase_denoising) - sum_diff_thresh = kSumDiffThresholdHigh; - else - sum_diff_thresh = kSumDiffThreshold; - if (sum_diff > sum_diff_thresh) { - // Before returning to copy the block (i.e., apply no denoising), - // checK if we can still apply some (weaker) temporal filtering to - // this block, that would otherwise not be denoised at all. Simplest - // is to apply an additional adjustment to running_avg_y to bring it - // closer to sig. The adjustment is capped by a maximum delta, and - // chosen such that in most cases the resulting sum_diff will be - // within the accceptable range given by sum_diff_thresh. - - // The delta is set by the excess of absolute pixel diff over the - // threshold. - int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const uint8x16_t k_delta = vmovq_n_u8(delta); - sig -= sig_stride * 16; - mc_running_avg_y -= mc_running_avg_y_stride * 16; - running_avg_y -= running_avg_y_stride * 16; - for (int r = 0; r < 16; ++r) { - uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); - const uint8x16_t v_sig = vld1q_u8(sig); - const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); - - // Calculate absolute difference and sign masks. - const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); - const uint8x16_t v_diff_pos_mask = - vcltq_u8(v_sig, v_mc_running_avg_y); - const uint8x16_t v_diff_neg_mask = - vcgtq_u8(v_sig, v_mc_running_avg_y); - // Clamp absolute difference to delta to get the adjustment. - const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); - - const uint8x16_t v_pos_adjustment = - vandq_u8(v_diff_pos_mask, v_abs_adjustment); - const uint8x16_t v_neg_adjustment = - vandq_u8(v_diff_neg_mask, v_abs_adjustment); - - v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); - v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); - - // Store results. - vst1q_u8(running_avg_y, v_running_avg_y); - - { - const int8x16_t v_sum_diff = - vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), - vreinterpretq_s8_u8(v_pos_adjustment)); - - const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); - const int32x4_t fedc_ba98_7654_3210 = - vpaddlq_s16(fe_dc_ba_98_76_54_32_10); - const int64x2_t fedcba98_76543210 = - vpaddlq_s32(fedc_ba98_7654_3210); - - v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); - } - // Update pointers for next iteration. - sig += sig_stride; - mc_running_avg_y += mc_running_avg_y_stride; - running_avg_y += running_avg_y_stride; - } - { - // Update the sum of all pixel differences of this MB. - x = vqadd_s64(vget_high_s64(v_sum_diff_total), - vget_low_s64(v_sum_diff_total)); - sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); - - if (sum_diff > sum_diff_thresh) { - return COPY_BLOCK; - } - } - } else { - return COPY_BLOCK; - } - } - } + int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), + vget_low_s64(v_sum_diff_total)); + int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); + sum_diff_thresh = + increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold; + if (sum_diff > sum_diff_thresh) + return COPY_BLOCK; // Tell above level that block was filtered. running_avg_y -= running_avg_y_stride * 16; diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.h b/webrtc/modules/video_processing/util/denoiser_filter_neon.h index 2e3ea26829..55850bd1ea 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_neon.h +++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.h @@ -22,10 +22,6 @@ class DenoiserFilterNEON : public DenoiserFilter { int src_stride, uint8_t* dst, int dst_stride) override; - void CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) override; uint32_t Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -38,8 +34,7 @@ class DenoiserFilterNEON : public DenoiserFilter { const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) override; + int increase_denoising) override; }; } // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc index 614b6c9485..0545a97398 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc +++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc @@ -9,7 +9,6 @@ */ #include - #include "webrtc/modules/video_processing/util/denoiser_filter_sse2.h" namespace webrtc { @@ -110,18 +109,6 @@ void DenoiserFilterSSE2::CopyMem16x16(const uint8_t* src, } } -// TODO(jackychen): Optimize this function using SSE2. -void DenoiserFilterSSE2::CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) { - for (int i = 0; i < 8; i++) { - memcpy(dst, src, 8); - src += src_stride; - dst += dst_stride; - } -} - uint32_t DenoiserFilterSSE2::Variance16x8(const uint8_t* src, int src_stride, const uint8_t* ref, @@ -139,8 +126,8 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y, const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) { + int increase_denoising) { + DenoiserDecision decision = FILTER_BLOCK; unsigned int sum_diff_thresh = 0; int shift_inc = (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) ? 1 @@ -210,76 +197,13 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y, running_avg_y += avg_y_stride; } - { - // Compute the sum of all pixel differences of this MB. - unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff); - if (denoise_always) - sum_diff_thresh = INT_MAX; - else if (increase_denoising) - sum_diff_thresh = kSumDiffThresholdHigh; - else - sum_diff_thresh = kSumDiffThreshold; - if (abs_sum_diff > sum_diff_thresh) { - // Before returning to copy the block (i.e., apply no denoising), - // check if we can still apply some (weaker) temporal filtering to - // this block, that would otherwise not be denoised at all. Simplest - // is to apply an additional adjustment to running_avg_y to bring it - // closer to sig. The adjustment is capped by a maximum delta, and - // chosen such that in most cases the resulting sum_diff will be - // within the acceptable range given by sum_diff_thresh. - - // The delta is set by the excess of absolute pixel diff over the - // threshold. - int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1; - // Only apply the adjustment for max delta up to 3. - if (delta < 4) { - const __m128i k_delta = _mm_set1_epi8(delta); - sig -= sig_stride * 16; - mc_running_avg_y -= mc_avg_y_stride * 16; - running_avg_y -= avg_y_stride * 16; - for (int r = 0; r < 16; ++r) { - __m128i v_running_avg_y = - _mm_loadu_si128(reinterpret_cast<__m128i*>(&running_avg_y[0])); - // Calculate differences. - const __m128i v_sig = - _mm_loadu_si128(reinterpret_cast(&sig[0])); - const __m128i v_mc_running_avg_y = - _mm_loadu_si128(reinterpret_cast<__m128i*>(&mc_running_avg_y[0])); - const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); - const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); - // Obtain the sign. FF if diff is negative. - const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); - // Clamp absolute difference to delta to get the adjustment. - const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); - // Restore the sign and get positive and negative adjustments. - __m128i padj, nadj; - padj = _mm_andnot_si128(diff_sign, adj); - nadj = _mm_and_si128(diff_sign, adj); - // Calculate filtered value. - v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); - v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); - _mm_storeu_si128(reinterpret_cast<__m128i*>(running_avg_y), - v_running_avg_y); - - // Accumulate the adjustments. - acc_diff = _mm_subs_epi8(acc_diff, padj); - acc_diff = _mm_adds_epi8(acc_diff, nadj); - - // Update pointers for next iteration. - sig += sig_stride; - mc_running_avg_y += mc_avg_y_stride; - running_avg_y += avg_y_stride; - } - abs_sum_diff = AbsSumDiff16x1(acc_diff); - if (abs_sum_diff > sum_diff_thresh) { - return COPY_BLOCK; - } - } else { - return COPY_BLOCK; - } - } - } - return FILTER_BLOCK; + // Compute the sum of all pixel differences of this MB. + unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff); + sum_diff_thresh = + increase_denoising ? kSumDiffThresholdHigh : kSumDiffThreshold; + if (abs_sum_diff > sum_diff_thresh) + decision = COPY_BLOCK; + return decision; } } // namespace webrtc diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.h b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h index 395fa10eca..731344c809 100644 --- a/webrtc/modules/video_processing/util/denoiser_filter_sse2.h +++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h @@ -22,10 +22,6 @@ class DenoiserFilterSSE2 : public DenoiserFilter { int src_stride, uint8_t* dst, int dst_stride) override; - void CopyMem8x8(const uint8_t* src, - int src_stride, - uint8_t* dst, - int dst_stride) override; uint32_t Variance16x8(const uint8_t* a, int a_stride, const uint8_t* b, @@ -38,8 +34,7 @@ class DenoiserFilterSSE2 : public DenoiserFilter { const uint8_t* sig, int sig_stride, uint8_t motion_magnitude, - int increase_denoising, - bool denoise_always) override; + int increase_denoising) override; }; } // namespace webrtc diff --git a/webrtc/modules/video_processing/util/noise_estimation.cc b/webrtc/modules/video_processing/util/noise_estimation.cc index 87beac38ae..a0ae2c474a 100644 --- a/webrtc/modules/video_processing/util/noise_estimation.cc +++ b/webrtc/modules/video_processing/util/noise_estimation.cc @@ -27,10 +27,10 @@ void NoiseEstimation::GetNoise(int mb_index, uint32_t var, uint32_t luma) { consec_low_var_[mb_index]++; num_static_block_++; if (consec_low_var_[mb_index] >= kConsecLowVarFrame && - (luma >> 8) < kAverageLumaMax && (luma >> 8) > kAverageLumaMin) { + (luma >> 6) < kAverageLumaMax && (luma >> 6) > kAverageLumaMin) { // Normalized var by the average luma value, this gives more weight to // darker blocks. - int nor_var = var / (luma >> 12); + int nor_var = var / (luma >> 10); noise_var_ += nor_var > kBlockSelectionVarMax ? kBlockSelectionVarMax : nor_var; num_noisy_block_++; @@ -46,25 +46,28 @@ void NoiseEstimation::UpdateNoiseLevel() { // condition more reasonable. // No enough samples implies the motion of the camera or too many moving // objects in the frame. - if (num_static_block_ < (0.65 * mb_cols_ * mb_rows_) || !num_noisy_block_) { + if (num_static_block_ < + (0.65 * mb_cols_ * mb_rows_ / NOISE_SUBSAMPLE_INTERVAL) || + !num_noisy_block_) { +#if DISPLAY + printf("Not enough samples. %d \n", num_static_block_); +#endif noise_var_ = 0; noise_var_accum_ = 0; - num_static_block_ = 0; num_noisy_block_ = 0; -#if DISPLAY - printf("Not enough samples.\n"); -#endif + num_static_block_ = 0; return; } else { +#if DISPLAY + printf("%d %d fraction = %.3f\n", num_static_block_, + mb_cols_ * mb_rows_ / NOISE_SUBSAMPLE_INTERVAL, + percent_static_block_); +#endif // Normalized by the number of noisy blocks. noise_var_ /= num_noisy_block_; // Get the percentage of static blocks. - percent_static_block_ = - static_cast(num_static_block_) / (mb_cols_ * mb_rows_); -#if DISPLAY - printf("%d %d fraction = %.3f\n", num_static_block_, mb_cols_ * mb_rows_, - percent_static_block_); -#endif + percent_static_block_ = static_cast(num_static_block_) / + (mb_cols_ * mb_rows_ / NOISE_SUBSAMPLE_INTERVAL); num_noisy_block_ = 0; num_static_block_ = 0; } @@ -75,12 +78,12 @@ void NoiseEstimation::UpdateNoiseLevel() { } else { noise_var_accum_ = (noise_var_accum_ * 15 + noise_var_) / 16; } - // Reset noise_var_ for the next frame. - noise_var_ = 0; #if DISPLAY printf("noise_var_accum_ = %.1f, noise_var_ = %d.\n", noise_var_accum_, noise_var_); #endif + // Reset noise_var_ for the next frame. + noise_var_ = 0; } uint8_t NoiseEstimation::GetNoiseLevel() { diff --git a/webrtc/modules/video_processing/util/noise_estimation.h b/webrtc/modules/video_processing/util/noise_estimation.h index ca5cc2324f..24d44ca4ad 100644 --- a/webrtc/modules/video_processing/util/noise_estimation.h +++ b/webrtc/modules/video_processing/util/noise_estimation.h @@ -18,7 +18,6 @@ namespace webrtc { -#define EXPERIMENTAL 0 #define DISPLAY 0 const int kNoiseThreshold = 200; @@ -28,11 +27,18 @@ const int kAverageLumaMin = 20; const int kAverageLumaMax = 220; const int kBlockSelectionVarMax = kNoiseThreshold << 1; +// TODO(jackychen): To test different sampling strategy. +// Collect noise data every NOISE_SUBSAMPLE_INTERVAL blocks. +#define NOISE_SUBSAMPLE_INTERVAL 41 + class NoiseEstimation { public: void Init(int width, int height, CpuType cpu_type); + // Collect noise data from one qualified block. void GetNoise(int mb_index, uint32_t var, uint32_t luma); + // Reset the counter for consecutive low-var blocks. void ResetConsecLowVar(int mb_index); + // Update noise level for current frame. void UpdateNoiseLevel(); // 0: low noise, 1: high noise uint8_t GetNoiseLevel(); @@ -42,13 +48,13 @@ class NoiseEstimation { int height_; int mb_rows_; int mb_cols_; + int num_noisy_block_; + int num_static_block_; CpuType cpu_type_; uint32_t noise_var_; double noise_var_accum_; - int num_noisy_block_; - int num_static_block_; double percent_static_block_; - rtc::scoped_ptr consec_low_var_; + std::unique_ptr consec_low_var_; }; } // namespace webrtc diff --git a/webrtc/modules/video_processing/video_denoiser.cc b/webrtc/modules/video_processing/video_denoiser.cc index b00da5c90a..4eef6d67c8 100644 --- a/webrtc/modules/video_processing/video_denoiser.cc +++ b/webrtc/modules/video_processing/video_denoiser.cc @@ -7,10 +7,65 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ + #include "webrtc/common_video/libyuv/include/scaler.h" #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" #include "webrtc/modules/video_processing/video_denoiser.h" +#if DISPLAY // Rectangle diagnostics +static void CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + for (int i = 0; i < 8; i++) { + memcpy(dst, src, 8); + src += src_stride; + dst += dst_stride; + } +} + +static void ShowRect(const std::unique_ptr& filter, + const std::unique_ptr& d_status, + const std::unique_ptr& moving_edge_red, + const std::unique_ptr& x_density, + const std::unique_ptr& y_density, + const uint8_t* u_src, + const uint8_t* v_src, + uint8_t* u_dst, + uint8_t* v_dst, + int mb_rows_, + int mb_cols_, + int stride_u_, + int stride_v_) { + for (int mb_row = 0; mb_row < mb_rows_; ++mb_row) { + for (int mb_col = 0; mb_col < mb_cols_; ++mb_col) { + int mb_index = mb_row * mb_cols_ + mb_col; + const uint8_t* mb_src_u = + u_src + (mb_row << 3) * stride_u_ + (mb_col << 3); + const uint8_t* mb_src_v = + v_src + (mb_row << 3) * stride_v_ + (mb_col << 3); + uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u_ + (mb_col << 3); + uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v_ + (mb_col << 3); + uint8_t uv_tmp[8 * 8]; + memset(uv_tmp, 200, 8 * 8); + if (d_status[mb_index] == 1) { + // Paint to red. + CopyMem8x8(mb_src_u, stride_u_, mb_dst_u, stride_u_); + CopyMem8x8(uv_tmp, 8, mb_dst_v, stride_v_); + } else if (moving_edge_red[mb_row * mb_cols_ + mb_col] && + x_density[mb_col] * y_density[mb_row]) { + // Paint to blue. + CopyMem8x8(uv_tmp, 8, mb_dst_u, stride_u_); + CopyMem8x8(mb_src_v, stride_v_, mb_dst_v, stride_v_); + } else { + CopyMem8x8(mb_src_u, stride_u_, mb_dst_u, stride_u_); + CopyMem8x8(mb_src_v, stride_v_, mb_dst_v, stride_v_); + } + } + } +} +#endif + namespace webrtc { VideoDenoiser::VideoDenoiser(bool runtime_cpu_detection) @@ -19,293 +74,255 @@ VideoDenoiser::VideoDenoiser(bool runtime_cpu_detection) filter_(DenoiserFilter::Create(runtime_cpu_detection, &cpu_type_)), ne_(new NoiseEstimation()) {} -#if EXPERIMENTAL -// Check the mb position(1: close to the center, 3: close to the border). -static int PositionCheck(int mb_row, int mb_col, int mb_rows, int mb_cols) { - if ((mb_row >= (mb_rows >> 3)) && (mb_row <= (7 * mb_rows >> 3)) && - (mb_col >= (mb_cols >> 3)) && (mb_col <= (7 * mb_cols >> 3))) +void VideoDenoiser::DenoiserReset(const VideoFrame& frame, + VideoFrame* denoised_frame, + VideoFrame* denoised_frame_prev) { + width_ = frame.width(); + height_ = frame.height(); + mb_cols_ = width_ >> 4; + mb_rows_ = height_ >> 4; + stride_y_ = frame.stride(kYPlane); + stride_u_ = frame.stride(kUPlane); + stride_v_ = frame.stride(kVPlane); + + // Allocate an empty buffer for denoised_frame_prev. + denoised_frame_prev->CreateEmptyFrame(width_, height_, stride_y_, stride_u_, + stride_v_); + // Allocate and initialize denoised_frame with key frame. + denoised_frame->CreateFrame(frame.buffer(kYPlane), frame.buffer(kUPlane), + frame.buffer(kVPlane), width_, height_, stride_y_, + stride_u_, stride_v_, kVideoRotation_0); + // Set time parameters to the output frame. + denoised_frame->set_timestamp(frame.timestamp()); + denoised_frame->set_render_time_ms(frame.render_time_ms()); + + // Init noise estimator and allocate buffers. + ne_->Init(width_, height_, cpu_type_); + moving_edge_.reset(new uint8_t[mb_cols_ * mb_rows_]); + mb_filter_decision_.reset(new DenoiserDecision[mb_cols_ * mb_rows_]); + x_density_.reset(new uint8_t[mb_cols_]); + y_density_.reset(new uint8_t[mb_rows_]); + moving_object_.reset(new uint8_t[mb_cols_ * mb_rows_]); +} + +int VideoDenoiser::PositionCheck(int mb_row, int mb_col, int noise_level) { + if (noise_level == 0) return 1; - else if ((mb_row >= (mb_rows >> 4)) && (mb_row <= (15 * mb_rows >> 4)) && - (mb_col >= (mb_cols >> 4)) && (mb_col <= (15 * mb_cols >> 4))) + if ((mb_row <= (mb_rows_ >> 4)) || (mb_col <= (mb_cols_ >> 4)) || + (mb_col >= (15 * mb_cols_ >> 4))) + return 3; + else if ((mb_row <= (mb_rows_ >> 3)) || (mb_col <= (mb_cols_ >> 3)) || + (mb_col >= (7 * mb_cols_ >> 3))) return 2; else - return 3; + return 1; } -static void ReduceFalseDetection(const std::unique_ptr& d_status, - std::unique_ptr* d_status_tmp1, - std::unique_ptr* d_status_tmp2, - int noise_level, - int mb_rows, - int mb_cols) { - // Draft. This can be optimized. This code block is to reduce false detection - // in moving object detection. - int mb_row_min = noise_level ? mb_rows >> 3 : 1; - int mb_col_min = noise_level ? mb_cols >> 3 : 1; - int mb_row_max = noise_level ? (7 * mb_rows >> 3) : mb_rows - 2; - int mb_col_max = noise_level ? (7 * mb_cols >> 3) : mb_cols - 2; - memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols); - // Up left. - for (int mb_row = mb_row_min; mb_row <= mb_row_max; ++mb_row) { - for (int mb_col = mb_col_min; mb_col <= mb_col_max; ++mb_col) { - (*d_status_tmp1)[mb_row * mb_cols + mb_col] |= - ((*d_status_tmp1)[(mb_row - 1) * mb_cols + mb_col] | - (*d_status_tmp1)[mb_row * mb_cols + mb_col - 1]); +void VideoDenoiser::ReduceFalseDetection( + const std::unique_ptr& d_status, + std::unique_ptr* moving_edge_red, + int noise_level) { + // From up left corner. + int mb_col_stop = mb_cols_ - 1; + for (int mb_row = 0; mb_row <= mb_rows_ - 1; ++mb_row) { + for (int mb_col = 0; mb_col <= mb_col_stop; ++mb_col) { + if (d_status[mb_row * mb_cols_ + mb_col]) { + mb_col_stop = mb_col - 1; + break; + } + (*moving_edge_red)[mb_row * mb_cols_ + mb_col] = 0; } } - memcpy((*d_status_tmp2).get(), (*d_status_tmp1).get(), mb_rows * mb_cols); - memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols); - // Bottom left. - for (int mb_row = mb_row_max; mb_row >= mb_row_min; --mb_row) { - for (int mb_col = mb_col_min; mb_col <= mb_col_max; ++mb_col) { - (*d_status_tmp1)[mb_row * mb_cols + mb_col] |= - ((*d_status_tmp1)[(mb_row + 1) * mb_cols + mb_col] | - (*d_status_tmp1)[mb_row * mb_cols + mb_col - 1]); - (*d_status_tmp2)[mb_row * mb_cols + mb_col] &= - (*d_status_tmp1)[mb_row * mb_cols + mb_col]; + // From bottom left corner. + mb_col_stop = mb_cols_ - 1; + for (int mb_row = mb_rows_ - 1; mb_row >= 0; --mb_row) { + for (int mb_col = 0; mb_col <= mb_col_stop; ++mb_col) { + if (d_status[mb_row * mb_cols_ + mb_col]) { + mb_col_stop = mb_col - 1; + break; + } + (*moving_edge_red)[mb_row * mb_cols_ + mb_col] = 0; } } - memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols); - // Up right. - for (int mb_row = mb_row_min; mb_row <= mb_row_max; ++mb_row) { - for (int mb_col = mb_col_max; mb_col >= mb_col_min; --mb_col) { - (*d_status_tmp1)[mb_row * mb_cols + mb_col] |= - ((*d_status_tmp1)[(mb_row - 1) * mb_cols + mb_col] | - (*d_status_tmp1)[mb_row * mb_cols + mb_col + 1]); - (*d_status_tmp2)[mb_row * mb_cols + mb_col] &= - (*d_status_tmp1)[mb_row * mb_cols + mb_col]; + // From up right corner. + mb_col_stop = 0; + for (int mb_row = 0; mb_row <= mb_rows_ - 1; ++mb_row) { + for (int mb_col = mb_cols_ - 1; mb_col >= mb_col_stop; --mb_col) { + if (d_status[mb_row * mb_cols_ + mb_col]) { + mb_col_stop = mb_col + 1; + break; + } + (*moving_edge_red)[mb_row * mb_cols_ + mb_col] = 0; } } - memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols); - // Bottom right. - for (int mb_row = mb_row_max; mb_row >= mb_row_min; --mb_row) { - for (int mb_col = mb_col_max; mb_col >= mb_col_min; --mb_col) { - (*d_status_tmp1)[mb_row * mb_cols + mb_col] |= - ((*d_status_tmp1)[(mb_row + 1) * mb_cols + mb_col] | - (*d_status_tmp1)[mb_row * mb_cols + mb_col + 1]); - (*d_status_tmp2)[mb_row * mb_cols + mb_col] &= - (*d_status_tmp1)[mb_row * mb_cols + mb_col]; + // From bottom right corner. + mb_col_stop = 0; + for (int mb_row = mb_rows_ - 1; mb_row >= 0; --mb_row) { + for (int mb_col = mb_cols_ - 1; mb_col >= mb_col_stop; --mb_col) { + if (d_status[mb_row * mb_cols_ + mb_col]) { + mb_col_stop = mb_col + 1; + break; + } + (*moving_edge_red)[mb_row * mb_cols_ + mb_col] = 0; } } } -static bool TrailingBlock(const std::unique_ptr& d_status, - int mb_row, - int mb_col, - int mb_rows, - int mb_cols) { - int mb_index = mb_row * mb_cols + mb_col; - if (!mb_row || !mb_col || mb_row == mb_rows - 1 || mb_col == mb_cols - 1) - return false; - return d_status[mb_index + 1] || d_status[mb_index - 1] || - d_status[mb_index + mb_cols] || d_status[mb_index - mb_cols]; +bool VideoDenoiser::IsTrailingBlock(const std::unique_ptr& d_status, + int mb_row, + int mb_col) { + bool ret = false; + int mb_index = mb_row * mb_cols_ + mb_col; + if (!mb_row || !mb_col || mb_row == mb_rows_ - 1 || mb_col == mb_cols_ - 1) + ret = false; + else + ret = d_status[mb_index + 1] || d_status[mb_index - 1] || + d_status[mb_index + mb_cols_] || d_status[mb_index - mb_cols_]; + return ret; } -#endif -#if DISPLAY -void ShowRect(const std::unique_ptr& filter, - const std::unique_ptr& d_status, - const std::unique_ptr& d_status_tmp2, - const std::unique_ptr& x_density, - const std::unique_ptr& y_density, - const uint8_t* u_src, - const uint8_t* v_src, - uint8_t* u_dst, - uint8_t* v_dst, - int mb_rows, - int mb_cols, - int stride_u, - int stride_v) { - for (int mb_row = 0; mb_row < mb_rows; ++mb_row) { - for (int mb_col = 0; mb_col < mb_cols; ++mb_col) { - int mb_index = mb_row * mb_cols + mb_col; - const uint8_t* mb_src_u = - u_src + (mb_row << 3) * stride_u + (mb_col << 3); - const uint8_t* mb_src_v = - v_src + (mb_row << 3) * stride_v + (mb_col << 3); - uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u + (mb_col << 3); - uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v + (mb_col << 3); - uint8_t y_tmp_255[8 * 8]; - memset(y_tmp_255, 200, 8 * 8); - // x_density_[mb_col] * y_density_[mb_row] - if (d_status[mb_index] == 1) { - // Paint to red. - filter->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u); - filter->CopyMem8x8(y_tmp_255, 8, mb_dst_v, stride_v); -#if EXPERIMENTAL - } else if (d_status_tmp2[mb_row * mb_cols + mb_col] && - x_density[mb_col] * y_density[mb_row]) { -#else - } else if (x_density[mb_col] * y_density[mb_row]) { -#endif - // Paint to blue. - filter->CopyMem8x8(y_tmp_255, 8, mb_dst_u, stride_u); - filter->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v); - } else { - filter->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u); - filter->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v); +void VideoDenoiser::CopySrcOnMOB(const uint8_t* y_src, uint8_t* y_dst) { + // Loop over to copy src block if the block is marked as moving object block + // or if the block may cause trailing artifacts. + for (int mb_row = 0; mb_row < mb_rows_; ++mb_row) { + const int mb_index_base = mb_row * mb_cols_; + const int offset_base = (mb_row << 4) * stride_y_; + const uint8_t* mb_src_base = y_src + offset_base; + uint8_t* mb_dst_base = y_dst + offset_base; + for (int mb_col = 0; mb_col < mb_cols_; ++mb_col) { + const int mb_index = mb_index_base + mb_col; + const uint32_t offset_col = mb_col << 4; + const uint8_t* mb_src = mb_src_base + offset_col; + uint8_t* mb_dst = mb_dst_base + offset_col; + // Check if the block is a moving object block or may cause a trailing + // artifacts. + if (mb_filter_decision_[mb_index] != FILTER_BLOCK || + IsTrailingBlock(moving_edge_, mb_row, mb_col) || + (x_density_[mb_col] * y_density_[mb_row] && + moving_object_[mb_row * mb_cols_ + mb_col])) { + // Copy y source. + filter_->CopyMem16x16(mb_src, stride_y_, mb_dst, stride_y_); } } } } -#endif void VideoDenoiser::DenoiseFrame(const VideoFrame& frame, VideoFrame* denoised_frame, VideoFrame* denoised_frame_prev, - int noise_level_prev) { - int stride_y = frame.stride(kYPlane); - int stride_u = frame.stride(kUPlane); - int stride_v = frame.stride(kVPlane); - // If previous width and height are different from current frame's, then no - // denoising for the current frame. + bool noise_estimation_enabled) { + // If previous width and height are different from current frame's, need to + // reallocate the buffers and no denoising for the current frame. if (width_ != frame.width() || height_ != frame.height()) { - width_ = frame.width(); - height_ = frame.height(); - denoised_frame->CreateFrame(frame.buffer(kYPlane), frame.buffer(kUPlane), - frame.buffer(kVPlane), width_, height_, - stride_y, stride_u, stride_v, kVideoRotation_0); - denoised_frame_prev->CreateFrame( - frame.buffer(kYPlane), frame.buffer(kUPlane), frame.buffer(kVPlane), - width_, height_, stride_y, stride_u, stride_v, kVideoRotation_0); - // Setting time parameters to the output frame. - denoised_frame->set_timestamp(frame.timestamp()); - denoised_frame->set_render_time_ms(frame.render_time_ms()); - ne_->Init(width_, height_, cpu_type_); + DenoiserReset(frame, denoised_frame, denoised_frame_prev); return; } - // For 16x16 block. - int mb_cols = width_ >> 4; - int mb_rows = height_ >> 4; - if (metrics_.get() == nullptr) - metrics_.reset(new DenoiseMetrics[mb_cols * mb_rows]()); - if (d_status_.get() == nullptr) { - d_status_.reset(new uint8_t[mb_cols * mb_rows]()); -#if EXPERIMENTAL - d_status_tmp1_.reset(new uint8_t[mb_cols * mb_rows]()); - d_status_tmp2_.reset(new uint8_t[mb_cols * mb_rows]()); -#endif - x_density_.reset(new uint8_t[mb_cols]()); - y_density_.reset(new uint8_t[mb_rows]()); - } - // Denoise on Y plane. + // Set buffer pointers. + const uint8_t* y_src = frame.buffer(kYPlane); + const uint8_t* u_src = frame.buffer(kUPlane); + const uint8_t* v_src = frame.buffer(kVPlane); uint8_t* y_dst = denoised_frame->buffer(kYPlane); uint8_t* u_dst = denoised_frame->buffer(kUPlane); uint8_t* v_dst = denoised_frame->buffer(kVPlane); uint8_t* y_dst_prev = denoised_frame_prev->buffer(kYPlane); - const uint8_t* y_src = frame.buffer(kYPlane); - const uint8_t* u_src = frame.buffer(kUPlane); - const uint8_t* v_src = frame.buffer(kVPlane); - uint8_t noise_level = noise_level_prev == -1 ? 0 : ne_->GetNoiseLevel(); - // Temporary buffer to store denoising result. - uint8_t y_tmp[16 * 16] = {0}; - memset(x_density_.get(), 0, mb_cols); - memset(y_density_.get(), 0, mb_rows); + memset(x_density_.get(), 0, mb_cols_); + memset(y_density_.get(), 0, mb_rows_); + memset(moving_object_.get(), 1, mb_cols_ * mb_rows_); + uint8_t noise_level = noise_estimation_enabled ? ne_->GetNoiseLevel() : 0; + int thr_var_base = 16 * 16 * 5; // Loop over blocks to accumulate/extract noise level and update x/y_density // factors for moving object detection. - for (int mb_row = 0; mb_row < mb_rows; ++mb_row) { - for (int mb_col = 0; mb_col < mb_cols; ++mb_col) { - const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4); - uint8_t* mb_dst_prev = - y_dst_prev + (mb_row << 4) * stride_y + (mb_col << 4); - int mb_index = mb_row * mb_cols + mb_col; -#if EXPERIMENTAL - int pos_factor = PositionCheck(mb_row, mb_col, mb_rows, mb_cols); - uint32_t thr_var_adp = 16 * 16 * 5 * (noise_level ? pos_factor : 1); -#else - uint32_t thr_var_adp = 16 * 16 * 5; -#endif - int brightness = 0; - for (int i = 0; i < 16; ++i) { - for (int j = 0; j < 16; ++j) { - brightness += mb_src[i * stride_y + j]; + for (int mb_row = 0; mb_row < mb_rows_; ++mb_row) { + const int mb_index_base = mb_row * mb_cols_; + const int offset_base = (mb_row << 4) * stride_y_; + const uint8_t* mb_src_base = y_src + offset_base; + uint8_t* mb_dst_base = y_dst + offset_base; + uint8_t* mb_dst_prev_base = y_dst_prev + offset_base; + for (int mb_col = 0; mb_col < mb_cols_; ++mb_col) { + const int mb_index = mb_index_base + mb_col; + const bool ne_enable = (mb_index % NOISE_SUBSAMPLE_INTERVAL == 0); + const int pos_factor = PositionCheck(mb_row, mb_col, noise_level); + const uint32_t thr_var_adp = thr_var_base * pos_factor; + const uint32_t offset_col = mb_col << 4; + const uint8_t* mb_src = mb_src_base + offset_col; + uint8_t* mb_dst = mb_dst_base + offset_col; + uint8_t* mb_dst_prev = mb_dst_prev_base + offset_col; + + // TODO(jackychen): Need SSE2/NEON opt. + int luma = 0; + if (ne_enable) { + for (int i = 4; i < 12; ++i) { + for (int j = 4; j < 12; ++j) { + luma += mb_src[i * stride_y_ + j]; + } } } - // Get the denoised block. - filter_->MbDenoise(mb_dst_prev, stride_y, y_tmp, 16, mb_src, stride_y, 0, - 1, true); - // The variance is based on the denoised blocks in time T and T-1. - metrics_[mb_index].var = filter_->Variance16x8( - mb_dst_prev, stride_y, y_tmp, 16, &metrics_[mb_index].sad); + // Get the filtered block and filter_decision. + mb_filter_decision_[mb_index] = + filter_->MbDenoise(mb_dst_prev, stride_y_, mb_dst, stride_y_, mb_src, + stride_y_, 0, noise_level); - if (metrics_[mb_index].var > thr_var_adp) { - ne_->ResetConsecLowVar(mb_index); - d_status_[mb_index] = 1; -#if EXPERIMENTAL - if (noise_level == 0 || pos_factor < 3) { - x_density_[mb_col] += 1; - y_density_[mb_row] += 1; + // If filter decision is FILTER_BLOCK, no need to check moving edge. + // It is unlikely for a moving edge block to be filtered in current + // setting. + if (mb_filter_decision_[mb_index] == FILTER_BLOCK) { + uint32_t sse_t = 0; + if (ne_enable) { + // The variance used in noise estimation is based on the src block in + // time t (mb_src) and filtered block in time t-1 (mb_dist_prev). + uint32_t noise_var = filter_->Variance16x8(mb_dst_prev, stride_y_, + mb_src, stride_y_, &sse_t); + ne_->GetNoise(mb_index, noise_var, luma); } -#else - x_density_[mb_col] += 1; - y_density_[mb_row] += 1; -#endif + moving_edge_[mb_index] = 0; // Not a moving edge block. } else { uint32_t sse_t = 0; - // The variance is based on the src blocks in time T and denoised block - // in time T-1. - uint32_t noise_var = filter_->Variance16x8(mb_dst_prev, stride_y, - mb_src, stride_y, &sse_t); - ne_->GetNoise(mb_index, noise_var, brightness); - d_status_[mb_index] = 0; - } - // Track denoised frame. - filter_->CopyMem16x16(y_tmp, 16, mb_dst_prev, stride_y); - } - } - -#if EXPERIMENTAL - ReduceFalseDetection(d_status_, &d_status_tmp1_, &d_status_tmp2_, noise_level, - mb_rows, mb_cols); -#endif - - // Denoise each MB based on the results of moving objects detection. - for (int mb_row = 0; mb_row < mb_rows; ++mb_row) { - for (int mb_col = 0; mb_col < mb_cols; ++mb_col) { - const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4); - uint8_t* mb_dst = y_dst + (mb_row << 4) * stride_y + (mb_col << 4); - const uint8_t* mb_src_u = - u_src + (mb_row << 3) * stride_u + (mb_col << 3); - const uint8_t* mb_src_v = - v_src + (mb_row << 3) * stride_v + (mb_col << 3); - uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u + (mb_col << 3); - uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v + (mb_col << 3); -#if EXPERIMENTAL - if ((!d_status_tmp2_[mb_row * mb_cols + mb_col] || - x_density_[mb_col] * y_density_[mb_row] == 0) && - !TrailingBlock(d_status_, mb_row, mb_col, mb_rows, mb_cols)) { -#else - if (x_density_[mb_col] * y_density_[mb_row] == 0) { -#endif - if (filter_->MbDenoise(mb_dst, stride_y, y_tmp, 16, mb_src, stride_y, 0, - noise_level, false) == FILTER_BLOCK) { - filter_->CopyMem16x16(y_tmp, 16, mb_dst, stride_y); + // The variance used in MOD is based on the filtered blocks in time + // T (mb_dst) and T-1 (mb_dst_prev). + uint32_t noise_var = filter_->Variance16x8(mb_dst_prev, stride_y_, + mb_dst, stride_y_, &sse_t); + if (noise_var > thr_var_adp) { // Moving edge checking. + if (ne_enable) { + ne_->ResetConsecLowVar(mb_index); + } + moving_edge_[mb_index] = 1; // Mark as moving edge block. + x_density_[mb_col] += (pos_factor < 3); + y_density_[mb_row] += (pos_factor < 3); } else { - // Copy y source. - filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); + moving_edge_[mb_index] = 0; + if (ne_enable) { + // The variance used in noise estimation is based on the src block + // in time t (mb_src) and filtered block in time t-1 (mb_dist_prev). + uint32_t noise_var = filter_->Variance16x8( + mb_dst_prev, stride_y_, mb_src, stride_y_, &sse_t); + ne_->GetNoise(mb_index, noise_var, luma); + } } - } else { - // Copy y source. - filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y); } - filter_->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u); - filter_->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v); - } - } + } // End of for loop + } // End of for loop + + ReduceFalseDetection(moving_edge_, &moving_object_, noise_level); + + CopySrcOnMOB(y_src, y_dst); + + // TODO(jackychen): Need SSE2/NEON opt. + // Copy u/v planes. + memcpy(u_dst, u_src, (height_ >> 1) * stride_u_); + memcpy(v_dst, v_src, (height_ >> 1) * stride_v_); + + // Set time parameters to the output frame. + denoised_frame->set_timestamp(frame.timestamp()); + denoised_frame->set_render_time_ms(frame.render_time_ms()); #if DISPLAY // Rectangle diagnostics // Show rectangular region - ShowRect(filter_, d_status_, d_status_tmp2_, x_density_, y_density_, u_src, - v_src, u_dst, v_dst, mb_rows, mb_cols, stride_u, stride_v); + ShowRect(filter_, moving_edge_, moving_object_, x_density_, y_density_, u_src, + v_src, u_dst, v_dst, mb_rows_, mb_cols_, stride_u_, stride_v_); #endif - - // Setting time parameters to the output frame. - denoised_frame->set_timestamp(frame.timestamp()); - denoised_frame->set_render_time_ms(frame.render_time_ms()); - return; } } // namespace webrtc diff --git a/webrtc/modules/video_processing/video_denoiser.h b/webrtc/modules/video_processing/video_denoiser.h index 03b30d91c7..319845bf2d 100644 --- a/webrtc/modules/video_processing/video_denoiser.h +++ b/webrtc/modules/video_processing/video_denoiser.h @@ -22,25 +22,55 @@ namespace webrtc { class VideoDenoiser { public: explicit VideoDenoiser(bool runtime_cpu_detection); + void DenoiseFrame(const VideoFrame& frame, VideoFrame* denoised_frame, - VideoFrame* denoised_frame_track, - int noise_level_prev); + VideoFrame* denoised_frame_prev, + bool noise_estimation_enabled); private: + void DenoiserReset(const VideoFrame& frame, + VideoFrame* denoised_frame, + VideoFrame* denoised_frame_prev); + + // Check the mb position, return 1: close to the frame center (between 1/8 + // and 7/8 of width/height), 3: close to the border (out of 1/16 and 15/16 + // of width/height), 2: in between. + int PositionCheck(int mb_row, int mb_col, int noise_level); + + // To reduce false detection in moving object detection (MOD). + void ReduceFalseDetection(const std::unique_ptr& d_status, + std::unique_ptr* d_status_red, + int noise_level); + + // Return whether a block might cause trailing artifact by checking if one of + // its neighbor blocks is a moving edge block. + bool IsTrailingBlock(const std::unique_ptr& d_status, + int mb_row, + int mb_col); + + // Copy input blocks to dst buffer on moving object blocks (MOB). + void CopySrcOnMOB(const uint8_t* y_src, uint8_t* y_dst); + int width_; int height_; + int mb_rows_; + int mb_cols_; + int stride_y_; + int stride_u_; + int stride_v_; CpuType cpu_type_; - std::unique_ptr metrics_; std::unique_ptr filter_; std::unique_ptr ne_; - std::unique_ptr d_status_; -#if EXPERIMENTAL - std::unique_ptr d_status_tmp1_; - std::unique_ptr d_status_tmp2_; -#endif + // 1 for moving edge block, 0 for static block. + std::unique_ptr moving_edge_; + // 1 for moving object block, 0 for static block. + std::unique_ptr moving_object_; + // x_density_ and y_density_ are used in MOD process. std::unique_ptr x_density_; std::unique_ptr y_density_; + // Save the return values by MbDenoise for each block. + std::unique_ptr mb_filter_decision_; }; } // namespace webrtc