/* * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "video/config/encoder_stream_factory.h" #include #include #include #include #include #include "absl/algorithm/container.h" #include "absl/strings/match.h" #include "api/field_trials_view.h" #include "api/video/video_codec_constants.h" #include "media/base/media_constants.h" #include "media/base/video_adapter.h" #include "modules/video_coding/codecs/vp9/svc_config.h" #include "modules/video_coding/utility/simulcast_utility.h" #include "rtc_base/experiments/min_video_bitrate_experiment.h" #include "rtc_base/experiments/normalize_simulcast_size_experiment.h" #include "rtc_base/logging.h" #include "video/config/simulcast.h" namespace cricket { namespace { using ::webrtc::FieldTrialsView; const int kMinLayerSize = 16; int ScaleDownResolution(int resolution, double scale_down_by, int min_resolution) { // Resolution is never scalied down to smaller than min_resolution. // If the input resolution is already smaller than min_resolution, // no scaling should be done at all. if (resolution <= min_resolution) return resolution; return std::max(static_cast(resolution / scale_down_by + 0.5), min_resolution); } bool PowerOfTwo(int value) { return (value > 0) && ((value & (value - 1)) == 0); } bool IsScaleFactorsPowerOfTwo(const webrtc::VideoEncoderConfig& config) { for (const auto& layer : config.simulcast_layers) { double scale = std::max(layer.scale_resolution_down_by, 1.0); if (std::round(scale) != scale || !PowerOfTwo(scale)) { return false; } } return true; } bool IsTemporalLayersSupported(webrtc::VideoCodecType codec_type) { return codec_type == webrtc::VideoCodecType::kVideoCodecVP8 || codec_type == webrtc::VideoCodecType::kVideoCodecVP9 || codec_type == webrtc::VideoCodecType::kVideoCodecAV1; } size_t FindRequiredActiveLayers( const webrtc::VideoEncoderConfig& encoder_config) { // Need enough layers so that at least the first active one is present. for (size_t i = 0; i < encoder_config.number_of_streams; ++i) { if (encoder_config.simulcast_layers[i].active) { return i + 1; } } return 0; } // The selected thresholds for QVGA and VGA corresponded to a QP around 10. // The change in QP declined above the selected bitrates. static int GetMaxDefaultVideoBitrateKbps(int width, int height, bool is_screenshare) { int max_bitrate; if (width * height <= 320 * 240) { max_bitrate = 600; } else if (width * height <= 640 * 480) { max_bitrate = 1700; } else if (width * height <= 960 * 540) { max_bitrate = 2000; } else { max_bitrate = 2500; } if (is_screenshare) max_bitrate = std::max(max_bitrate, 1200); return max_bitrate; } int GetDefaultMaxQp(webrtc::VideoCodecType codec_type) { switch (codec_type) { case webrtc::kVideoCodecH264: case webrtc::kVideoCodecH265: return kDefaultVideoMaxQpH26x; case webrtc::kVideoCodecVP8: case webrtc::kVideoCodecVP9: case webrtc::kVideoCodecAV1: case webrtc::kVideoCodecGeneric: return kDefaultVideoMaxQpVpx; } } // Round size to nearest simulcast-friendly size. // Simulcast stream width and height must both be dividable by // |2 ^ (simulcast_layers - 1)|. int NormalizeSimulcastSize(const FieldTrialsView& field_trials, int size, size_t simulcast_layers) { int base2_exponent = static_cast(simulcast_layers) - 1; const std::optional experimental_base2_exponent = webrtc::NormalizeSimulcastSizeExperiment::GetBase2Exponent(field_trials); if (experimental_base2_exponent && (size > (1 << *experimental_base2_exponent))) { base2_exponent = *experimental_base2_exponent; } return ((size >> base2_exponent) << base2_exponent); } // Override bitrate limits and other stream settings with values from // `encoder_config.simulcast_layers` which come from `RtpEncodingParameters`. void OverrideStreamSettings( const webrtc::VideoEncoderConfig& encoder_config, const std::optional& experimental_min_bitrate, std::vector& layers) { RTC_DCHECK_LE(layers.size(), encoder_config.simulcast_layers.size()); // Allow an experiment to override the minimum bitrate for the lowest // spatial layer. The experiment's configuration has the lowest priority. layers[0].min_bitrate_bps = experimental_min_bitrate .value_or(webrtc::DataRate::BitsPerSec( webrtc::kDefaultMinVideoBitrateBps)) .bps(); const bool temporal_layers_supported = IsTemporalLayersSupported(encoder_config.codec_type); for (size_t i = 0; i < layers.size(); ++i) { const webrtc::VideoStream& overrides = encoder_config.simulcast_layers[i]; webrtc::VideoStream& layer = layers[i]; layer.active = overrides.active; layer.scalability_mode = overrides.scalability_mode; layer.requested_resolution = overrides.requested_resolution; // Update with configured num temporal layers if supported by codec. if (overrides.num_temporal_layers > 0 && temporal_layers_supported) { layer.num_temporal_layers = *overrides.num_temporal_layers; } if (overrides.max_framerate > 0) { layer.max_framerate = overrides.max_framerate; } // Update simulcast bitrates with configured min and max bitrate. if (overrides.min_bitrate_bps > 0) { layer.min_bitrate_bps = overrides.min_bitrate_bps; } if (overrides.max_bitrate_bps > 0) { layer.max_bitrate_bps = overrides.max_bitrate_bps; } if (overrides.target_bitrate_bps > 0) { layer.target_bitrate_bps = overrides.target_bitrate_bps; } if (overrides.min_bitrate_bps > 0 && overrides.max_bitrate_bps > 0) { // Min and max bitrate are configured. // Set target to 3/4 of the max bitrate (or to max if below min). if (overrides.target_bitrate_bps <= 0) layer.target_bitrate_bps = layer.max_bitrate_bps * 3 / 4; if (layer.target_bitrate_bps < layer.min_bitrate_bps) layer.target_bitrate_bps = layer.max_bitrate_bps; } else if (overrides.min_bitrate_bps > 0) { // Only min bitrate is configured, make sure target/max are above min. layer.target_bitrate_bps = std::max(layer.target_bitrate_bps, layer.min_bitrate_bps); layer.max_bitrate_bps = std::max(layer.max_bitrate_bps, layer.min_bitrate_bps); } else if (overrides.max_bitrate_bps > 0) { // Only max bitrate is configured, make sure min/target are below max. // Keep target bitrate if it is set explicitly in encoding config. // Otherwise set target bitrate to 3/4 of the max bitrate // or the one calculated from GetSimulcastConfig() which is larger. layer.min_bitrate_bps = std::min(layer.min_bitrate_bps, layer.max_bitrate_bps); if (overrides.target_bitrate_bps <= 0) { layer.target_bitrate_bps = std::max(layer.target_bitrate_bps, layer.max_bitrate_bps * 3 / 4); } layer.target_bitrate_bps = std::clamp(layer.target_bitrate_bps, layer.min_bitrate_bps, layer.max_bitrate_bps); } if (overrides.max_qp > 0) { layer.max_qp = overrides.max_qp; } else if (encoder_config.max_qp > 0) { layer.max_qp = encoder_config.max_qp; } else { layer.max_qp = GetDefaultMaxQp(encoder_config.codec_type); } } bool is_highest_layer_max_bitrate_configured = encoder_config.simulcast_layers[layers.size() - 1].max_bitrate_bps > 0; bool is_screencast = encoder_config.content_type == webrtc::VideoEncoderConfig::ContentType::kScreen; if (!is_screencast && !is_highest_layer_max_bitrate_configured && encoder_config.max_bitrate_bps > 0) { // No application-configured maximum for the largest layer. // If there is bitrate leftover, give it to the largest layer. BoostMaxSimulcastLayer( webrtc::DataRate::BitsPerSec(encoder_config.max_bitrate_bps), &layers); } // Sort the layers by max_bitrate_bps, they might not always be from // smallest to biggest std::vector index(layers.size()); std::iota(index.begin(), index.end(), 0); absl::c_stable_sort(index, [&layers](size_t a, size_t b) { return layers[a].max_bitrate_bps < layers[b].max_bitrate_bps; }); if (!layers[index[0]].active) { // Adjust min bitrate of the first active layer to allow it to go as low as // the lowest (now inactive) layer could. // Otherwise, if e.g. a single HD stream is active, it would have 600kbps // min bitrate, which would always be allocated to the stream. // This would lead to congested network, dropped frames and overall bad // experience. const int min_configured_bitrate = layers[index[0]].min_bitrate_bps; for (size_t i = 0; i < layers.size(); ++i) { if (layers[index[i]].active) { layers[index[i]].min_bitrate_bps = min_configured_bitrate; break; } } } } } // namespace EncoderStreamFactory::EncoderStreamFactory( const webrtc::VideoEncoder::EncoderInfo& encoder_info, std::optional restrictions) : encoder_info_requested_resolution_alignment_( encoder_info.requested_resolution_alignment), restrictions_(restrictions) {} std::vector EncoderStreamFactory::CreateEncoderStreams( const FieldTrialsView& trials, int frame_width, int frame_height, const webrtc::VideoEncoderConfig& encoder_config) { RTC_DCHECK_GT(encoder_config.number_of_streams, 0); RTC_DCHECK_GE(encoder_config.simulcast_layers.size(), encoder_config.number_of_streams); const std::optional experimental_min_bitrate = GetExperimentalMinVideoBitrate(trials, encoder_config.codec_type); bool is_simulcast = (encoder_config.number_of_streams > 1); // If scalability mode was specified, don't treat {active,inactive,inactive} // as simulcast since the simulcast configuration assumes very low bitrates // on the first layer. This would prevent rampup of multiple spatial layers. // See https://crbug.com/webrtc/15041. if (is_simulcast && encoder_config.simulcast_layers[0].scalability_mode.has_value()) { // Require at least one non-first layer to be active for is_simulcast=true. is_simulcast = false; for (size_t i = 1; i < encoder_config.simulcast_layers.size(); ++i) { if (encoder_config.simulcast_layers[i].active) { is_simulcast = true; break; } } } std::vector streams; if (is_simulcast || webrtc::SimulcastUtility::IsConferenceModeScreenshare(encoder_config)) { streams = CreateSimulcastOrConferenceModeScreenshareStreams( trials, frame_width, frame_height, encoder_config, experimental_min_bitrate); } else { streams = CreateDefaultVideoStreams( frame_width, frame_height, encoder_config, experimental_min_bitrate); } // The bitrate priority currently implemented on a per-sender level, so we // just set it for the first simulcast layer. RTC_DCHECK_GT(streams.size(), 0); streams[0].bitrate_priority = encoder_config.bitrate_priority; return streams; } std::vector EncoderStreamFactory::CreateDefaultVideoStreams( int width, int height, const webrtc::VideoEncoderConfig& encoder_config, const std::optional& experimental_min_bitrate) const { bool is_screencast = encoder_config.content_type == webrtc::VideoEncoderConfig::ContentType::kScreen; // The max bitrate specified by the API. // - `encoder_config.simulcast_layers[0].max_bitrate_bps` comes from the first // RtpEncodingParamters, which is the encoding of this stream. // - `encoder_config.max_bitrate_bps` comes from SDP; "b=AS" or conditionally // "x-google-max-bitrate". // If `api_max_bitrate_bps` has a value then it is positive. std::optional api_max_bitrate_bps; if (encoder_config.simulcast_layers[0].max_bitrate_bps > 0) { api_max_bitrate_bps = encoder_config.simulcast_layers[0].max_bitrate_bps; } if (encoder_config.max_bitrate_bps > 0) { api_max_bitrate_bps = api_max_bitrate_bps.has_value() ? std::min(encoder_config.max_bitrate_bps, *api_max_bitrate_bps) : encoder_config.max_bitrate_bps; } // For unset max bitrates set default bitrate for non-simulcast. int max_bitrate_bps = api_max_bitrate_bps.has_value() ? *api_max_bitrate_bps : GetMaxDefaultVideoBitrateKbps(width, height, is_screencast) * 1000; int min_bitrate_bps = experimental_min_bitrate ? rtc::saturated_cast(experimental_min_bitrate->bps()) : webrtc::kDefaultMinVideoBitrateBps; if (encoder_config.simulcast_layers[0].min_bitrate_bps > 0) { // Use set min bitrate. min_bitrate_bps = encoder_config.simulcast_layers[0].min_bitrate_bps; // If only min bitrate is configured, make sure max is above min. if (!api_max_bitrate_bps.has_value()) max_bitrate_bps = std::max(min_bitrate_bps, max_bitrate_bps); } int max_framerate = (encoder_config.simulcast_layers[0].max_framerate > 0) ? encoder_config.simulcast_layers[0].max_framerate : kDefaultVideoMaxFramerate; webrtc::VideoStream layer; layer.width = width; layer.height = height; layer.max_framerate = max_framerate; layer.requested_resolution = encoder_config.simulcast_layers[0].requested_resolution; // Note: VP9 seems to have be sending if any layer is active, // (see `UpdateSendState`) and still use parameters only from // encoder_config.simulcast_layers[0]. layer.active = absl::c_any_of(encoder_config.simulcast_layers, [](const auto& layer) { return layer.active; }); if (encoder_config.simulcast_layers[0].requested_resolution) { auto res = GetLayerResolutionFromRequestedResolution( width, height, *encoder_config.simulcast_layers[0].requested_resolution); layer.width = res.width; layer.height = res.height; } else if (encoder_config.simulcast_layers[0].scale_resolution_down_by > 1.) { layer.width = ScaleDownResolution( layer.width, encoder_config.simulcast_layers[0].scale_resolution_down_by, kMinLayerSize); layer.height = ScaleDownResolution( layer.height, encoder_config.simulcast_layers[0].scale_resolution_down_by, kMinLayerSize); } if (encoder_config.codec_type == webrtc::VideoCodecType::kVideoCodecVP9) { RTC_DCHECK(encoder_config.encoder_specific_settings); // Use VP9 SVC layering from codec settings which might be initialized // though field trial in ConfigureVideoEncoderSettings. webrtc::VideoCodecVP9 vp9_settings; encoder_config.encoder_specific_settings->FillVideoCodecVp9(&vp9_settings); layer.num_temporal_layers = vp9_settings.numberOfTemporalLayers; // Number of spatial layers is signalled differently from different call // sites (sigh), pick the max as we are interested in the upper bound. int num_spatial_layers = std::max({encoder_config.simulcast_layers.size(), encoder_config.spatial_layers.size(), size_t{vp9_settings.numberOfSpatialLayers}}); if (width * height > 0 && (layer.num_temporal_layers > 1u || num_spatial_layers > 1)) { // In SVC mode, the VP9 max bitrate is determined by SvcConfig, instead of // GetMaxDefaultVideoBitrateKbps(). std::vector svc_layers = webrtc::GetSvcConfig(width, height, max_framerate, /*first_active_layer=*/0, num_spatial_layers, *layer.num_temporal_layers, is_screencast); int sum_max_bitrates_kbps = 0; for (const webrtc::SpatialLayer& spatial_layer : svc_layers) { sum_max_bitrates_kbps += spatial_layer.maxBitrate; } RTC_DCHECK_GE(sum_max_bitrates_kbps, 0); if (!api_max_bitrate_bps.has_value()) { max_bitrate_bps = sum_max_bitrates_kbps * 1000; } else { max_bitrate_bps = std::min(max_bitrate_bps, sum_max_bitrates_kbps * 1000); } max_bitrate_bps = std::max(min_bitrate_bps, max_bitrate_bps); } } // In the case that the application sets a max bitrate that's lower than the // min bitrate, we adjust it down (see bugs.webrtc.org/9141). layer.min_bitrate_bps = std::min(min_bitrate_bps, max_bitrate_bps); if (encoder_config.simulcast_layers[0].target_bitrate_bps <= 0) { layer.target_bitrate_bps = max_bitrate_bps; } else { layer.target_bitrate_bps = std::min( encoder_config.simulcast_layers[0].target_bitrate_bps, max_bitrate_bps); } layer.max_bitrate_bps = max_bitrate_bps; layer.bitrate_priority = encoder_config.bitrate_priority; if (encoder_config.max_qp > 0) { layer.max_qp = encoder_config.max_qp; } else { layer.max_qp = GetDefaultMaxQp(encoder_config.codec_type); } if (IsTemporalLayersSupported(encoder_config.codec_type)) { // Use configured number of temporal layers if set. if (encoder_config.simulcast_layers[0].num_temporal_layers) { layer.num_temporal_layers = *encoder_config.simulcast_layers[0].num_temporal_layers; } } layer.scalability_mode = encoder_config.simulcast_layers[0].scalability_mode; return {layer}; } std::vector EncoderStreamFactory::CreateSimulcastOrConferenceModeScreenshareStreams( const FieldTrialsView& trials, int width, int height, const webrtc::VideoEncoderConfig& encoder_config, const std::optional& experimental_min_bitrate) const { std::vector resolutions = GetStreamResolutions(trials, width, height, encoder_config); // Use legacy simulcast screenshare if conference mode is explicitly enabled // or use the regular simulcast configuration path which is generic. std::vector layers = GetSimulcastConfig( resolutions, webrtc::SimulcastUtility::IsConferenceModeScreenshare(encoder_config), IsTemporalLayersSupported(encoder_config.codec_type), trials, encoder_config.codec_type); OverrideStreamSettings(encoder_config, experimental_min_bitrate, layers); return layers; } webrtc::Resolution EncoderStreamFactory::GetLayerResolutionFromRequestedResolution( int frame_width, int frame_height, webrtc::Resolution requested_resolution) const { // Make frame and requested resolution have matching orientation. if ((frame_width < frame_height) != (requested_resolution.width < requested_resolution.height)) { requested_resolution = {.width = requested_resolution.height, .height = requested_resolution.width}; } // Downscale by smallest scaling factor, if necessary. if (frame_width > 0 && frame_height > 0 && (requested_resolution.width < frame_width || requested_resolution.height < frame_height)) { double scale_factor = std::min( requested_resolution.width / static_cast(frame_width), requested_resolution.height / static_cast(frame_height)); frame_width = std::round(frame_width * scale_factor); frame_height = std::round(frame_height * scale_factor); } webrtc::Resolution frame = {.width = frame_width, .height = frame_height}; // Maybe adapt further based on restrictions and encoder alignment. VideoAdapter adapter(encoder_info_requested_resolution_alignment_); adapter.OnOutputFormatRequest(frame.ToPair(), frame.PixelCount(), std::nullopt); if (restrictions_) { rtc::VideoSinkWants wants; wants.is_active = true; wants.target_pixel_count = restrictions_->target_pixels_per_frame(); wants.max_pixel_count = rtc::dchecked_cast(restrictions_->max_pixels_per_frame().value_or( std::numeric_limits::max())); wants.aggregates.emplace(rtc::VideoSinkWants::Aggregates()); wants.resolution_alignment = encoder_info_requested_resolution_alignment_; adapter.OnSinkWants(wants); } int cropped_width, cropped_height; int out_width = 0, out_height = 0; if (!adapter.AdaptFrameResolution(frame_width, frame_height, 0, &cropped_width, &cropped_height, &out_width, &out_height)) { RTC_LOG(LS_ERROR) << "AdaptFrameResolution returned false!"; } return {.width = out_width, .height = out_height}; } std::vector EncoderStreamFactory::GetStreamResolutions( const webrtc::FieldTrialsView& trials, int width, int height, const webrtc::VideoEncoderConfig& encoder_config) const { std::vector resolutions; if (webrtc::SimulcastUtility::IsConferenceModeScreenshare(encoder_config)) { for (size_t i = 0; i < encoder_config.number_of_streams; ++i) { resolutions.push_back({.width = width, .height = height}); } } else { size_t min_num_layers = FindRequiredActiveLayers(encoder_config); size_t max_num_layers = !encoder_config.HasRequestedResolution() ? LimitSimulcastLayerCount( min_num_layers, encoder_config.number_of_streams, width, height, trials, encoder_config.codec_type) : encoder_config.number_of_streams; RTC_DCHECK_LE(max_num_layers, encoder_config.number_of_streams); // When the `requested_resolution` API is used, disable upper layers that // are bigger than what adaptation restrictions allow. For example if // restrictions are 540p, simulcast 180p:360p:720p becomes 180p:360p:- as // opposed to 180p:360p:540p. This makes CPU adaptation consistent with BW // adaptation (bitrate allocator disabling layers rather than downscaling) // and means we don't have to break power of two optimization paths (i.e. // S-modes based simulcast). Note that the lowest layer is never disabled. if (encoder_config.HasRequestedResolution() && restrictions_.has_value() && restrictions_->max_pixels_per_frame().has_value()) { int max_pixels = rtc::dchecked_cast( restrictions_->max_pixels_per_frame().value()); int prev_pixel_count = encoder_config.simulcast_layers[0] .requested_resolution.value_or(webrtc::Resolution()) .PixelCount(); std::optional restricted_num_layers; for (size_t i = 1; i < max_num_layers; ++i) { int pixel_count = encoder_config.simulcast_layers[i] .requested_resolution.value_or(webrtc::Resolution()) .PixelCount(); if (!restricted_num_layers.has_value() && max_pixels < pixel_count) { // Current layer is the highest layer allowed by restrictions. restricted_num_layers = i; } if (pixel_count < prev_pixel_count) { // Cannot limit layers because config is not lower-to-higher. restricted_num_layers = std::nullopt; break; } prev_pixel_count = pixel_count; } max_num_layers = restricted_num_layers.value_or(max_num_layers); } const bool has_scale_resolution_down_by = absl::c_any_of( encoder_config.simulcast_layers, [](const webrtc::VideoStream& layer) { return layer.scale_resolution_down_by != -1.; }); bool default_scale_factors_used = true; if (has_scale_resolution_down_by) { default_scale_factors_used = IsScaleFactorsPowerOfTwo(encoder_config); } const bool norm_size_configured = webrtc::NormalizeSimulcastSizeExperiment::GetBase2Exponent(trials) .has_value(); const int normalized_width = (default_scale_factors_used || norm_size_configured) && (width >= kMinLayerSize) ? NormalizeSimulcastSize(trials, width, max_num_layers) : width; const int normalized_height = (default_scale_factors_used || norm_size_configured) && (height >= kMinLayerSize) ? NormalizeSimulcastSize(trials, height, max_num_layers) : height; resolutions.resize(max_num_layers); for (size_t i = 0; i < max_num_layers; i++) { if (encoder_config.simulcast_layers[i].requested_resolution.has_value()) { resolutions[i] = GetLayerResolutionFromRequestedResolution( normalized_width, normalized_height, *encoder_config.simulcast_layers[i].requested_resolution); } else if (has_scale_resolution_down_by) { const double scale_resolution_down_by = std::max( encoder_config.simulcast_layers[i].scale_resolution_down_by, 1.0); resolutions[i].width = ScaleDownResolution( normalized_width, scale_resolution_down_by, kMinLayerSize); resolutions[i].height = ScaleDownResolution( normalized_height, scale_resolution_down_by, kMinLayerSize); } else { // Resolutions with default 1/2 scale factor, from low to high. resolutions[i].width = normalized_width >> (max_num_layers - i - 1); resolutions[i].height = normalized_height >> (max_num_layers - i - 1); } } } return resolutions; } } // namespace cricket