diff --git a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
index 74bf29d19c..3c4344fdba 100644
--- a/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
+++ b/modules/desktop_capture/linux/wayland/shared_screencast_stream.cc
@@ -99,8 +99,8 @@ class SharedScreenCastStreamPrivate {
   uint32_t pw_stream_node_id_ = 0;
   int pw_fd_ = -1;
 
-  DesktopSize desktop_size_ = {};
-  DesktopSize video_size_;
+  DesktopSize stream_size_ = {};
+  DesktopSize frame_size_;
 
   webrtc::Mutex queue_lock_;
   ScreenCaptureFrameQueue<SharedDesktopFrame> queue_
@@ -246,7 +246,7 @@ void SharedScreenCastStreamPrivate::OnStreamParamChanged(
   auto stride = SPA_ROUND_UP_N(width * kBytesPerPixel, 4);
   auto size = height * stride;
 
-  that->desktop_size_ = DesktopSize(width, height);
+  that->stream_size_ = DesktopSize(width, height);
 
   uint8_t buffer[1024] = {};
   auto builder = spa_pod_builder{buffer, sizeof(buffer)};
@@ -658,7 +658,7 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) {
     // to BYTES_PER_PIXEL x WIDTH as that's the size of the DesktopFrame we
     // allocate for each captured frame.
     src_unique_ptr = egl_dmabuf_->ImageFromDmaBuf(
-        desktop_size_, spa_video_format_.format, plane_datas, modifier_);
+        stream_size_, spa_video_format_.format, plane_datas, modifier_);
     if (src_unique_ptr) {
       src = src_unique_ptr.get();
     } else {
@@ -685,66 +685,88 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) {
     return;
   }
 
-  struct spa_meta_region* video_metadata =
+  // Use SPA_META_VideoCrop metadata to get the frame size. KDE and GNOME do
+  // handle screen/window sharing differently. KDE/KWin doesn't use
+  // SPA_META_VideoCrop metadata and when sharing a window, it always sets
+  // stream size to size of the window. With that we just allocate the
+  // DesktopFrame using the size of the stream itself. GNOME/Mutter
+  // always sets stream size to the size of the whole screen, even when sharing
+  // a window. To get the real window size we have to use SPA_META_VideoCrop
+  // metadata. This gives us the size we need in order to allocate the
+  // DesktopFrame.
+
+  struct spa_meta_region* videocrop_metadata =
       static_cast<struct spa_meta_region*>(spa_buffer_find_meta_data(
-          spa_buffer, SPA_META_VideoCrop, sizeof(*video_metadata)));
+          spa_buffer, SPA_META_VideoCrop, sizeof(*videocrop_metadata)));
 
   // Video size from metadata is bigger than an actual video stream size.
   // The metadata are wrong or we should up-scale the video...in both cases
   // just quit now.
-  if (video_metadata && (video_metadata->region.size.width >
-                             static_cast<uint32_t>(desktop_size_.width()) ||
-                         video_metadata->region.size.height >
-                             static_cast<uint32_t>(desktop_size_.height()))) {
+  if (videocrop_metadata &&
+      (videocrop_metadata->region.size.width >
+           static_cast<uint32_t>(stream_size_.width()) ||
+       videocrop_metadata->region.size.height >
+           static_cast<uint32_t>(stream_size_.height()))) {
     RTC_LOG(LS_ERROR) << "Stream metadata sizes are wrong!";
     return;
   }
 
-  // Use video metadata when video size from metadata is set and smaller than
-  // video stream size, so we need to adjust it.
-  bool video_metadata_use = false;
-  const struct spa_rectangle* video_metadata_size =
-      video_metadata ? &video_metadata->region.size : nullptr;
+  // Use SPA_META_VideoCrop metadata to get the DesktopFrame size in case
+  // a windows is shared and it represents just a small portion of the
+  // stream itself. This will be for example used in case of GNOME (Mutter)
+  // where the stream will have the size of the screen itself, but we care
+  // only about smaller portion representing the window inside.
+  bool videocrop_metadata_use = false;
+  const struct spa_rectangle* videocrop_metadata_size =
+      videocrop_metadata ? &videocrop_metadata->region.size : nullptr;
 
-  if (video_metadata_size && video_metadata_size->width != 0 &&
-      video_metadata_size->height != 0 &&
-      (static_cast<int>(video_metadata_size->width) < desktop_size_.width() ||
-       static_cast<int>(video_metadata_size->height) <
-           desktop_size_.height())) {
-    video_metadata_use = true;
+  if (videocrop_metadata_size && videocrop_metadata_size->width != 0 &&
+      videocrop_metadata_size->height != 0 &&
+      (static_cast<int>(videocrop_metadata_size->width) <
+           stream_size_.width() ||
+       static_cast<int>(videocrop_metadata_size->height) <
+           stream_size_.height())) {
+    videocrop_metadata_use = true;
   }
 
-  if (video_metadata_use) {
-    video_size_ =
-        DesktopSize(video_metadata_size->width, video_metadata_size->height);
+  if (videocrop_metadata_use) {
+    frame_size_ = DesktopSize(videocrop_metadata_size->width,
+                              videocrop_metadata_size->height);
   } else {
-    video_size_ = desktop_size_;
+    frame_size_ = stream_size_;
   }
 
-  uint32_t y_offset = video_metadata_use && (video_metadata->region.position.y +
-                                                 video_size_.height() <=
-                                             desktop_size_.height())
-                          ? video_metadata->region.position.y
-                          : 0;
-  uint32_t x_offset = video_metadata_use && (video_metadata->region.position.x +
-                                                 video_size_.width() <=
-                                             desktop_size_.width())
-                          ? video_metadata->region.position.x
-                          : 0;
+  // Get the position of the video crop within the stream. Just double-check
+  // that the position doesn't exceed the size of the stream itself. NOTE:
+  // Currently it looks there is no implementation using this.
+  uint32_t y_offset =
+      videocrop_metadata_use &&
+              (videocrop_metadata->region.position.y + frame_size_.height() <=
+               stream_size_.height())
+          ? videocrop_metadata->region.position.y
+          : 0;
+  uint32_t x_offset =
+      videocrop_metadata_use &&
+              (videocrop_metadata->region.position.x + frame_size_.width() <=
+               stream_size_.width())
+          ? videocrop_metadata->region.position.x
+          : 0;
 
-  const uint32_t frame_stride = kBytesPerPixel * video_size_.width();
-  uint32_t stride = spa_buffer->datas[0].chunk->stride;
+  const uint32_t stream_stride = kBytesPerPixel * stream_size_.width();
+  uint32_t buffer_stride = spa_buffer->datas[0].chunk->stride;
+  uint32_t src_stride = buffer_stride;
 
-  if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf && stride > frame_stride) {
+  if (spa_buffer->datas[0].type == SPA_DATA_DmaBuf &&
+      buffer_stride > stream_stride) {
     // When DMA-BUFs are used, sometimes spa_buffer->stride we get might
     // contain additional padding, but after we import the buffer, the stride
     // we used is no longer relevant and we should just calculate it based on
-    // width. For more context see https://crbug.com/1333304.
-    stride = frame_stride;
+    // the stream width. For more context see https://crbug.com/1333304.
+    src_stride = stream_stride;
   }
 
   uint8_t* updated_src =
-      src + (stride * y_offset) + (kBytesPerPixel * x_offset);
+      src + (src_stride * y_offset) + (kBytesPerPixel * x_offset);
 
   webrtc::MutexLock lock(&queue_lock_);
 
@@ -759,20 +781,20 @@ void SharedScreenCastStreamPrivate::ProcessBuffer(pw_buffer* buffer) {
   }
 
   if (!queue_.current_frame() ||
-      !queue_.current_frame()->size().equals(video_size_)) {
+      !queue_.current_frame()->size().equals(frame_size_)) {
     std::unique_ptr<DesktopFrame> frame(new BasicDesktopFrame(
-        DesktopSize(video_size_.width(), video_size_.height())));
+        DesktopSize(frame_size_.width(), frame_size_.height())));
     queue_.ReplaceCurrentFrame(SharedDesktopFrame::Wrap(std::move(frame)));
   }
 
   queue_.current_frame()->CopyPixelsFrom(
-      updated_src, (stride - (kBytesPerPixel * x_offset)),
-      DesktopRect::MakeWH(video_size_.width(), video_size_.height()));
+      updated_src, (src_stride - (kBytesPerPixel * x_offset)),
+      DesktopRect::MakeWH(frame_size_.width(), frame_size_.height()));
 
   if (spa_video_format_.format == SPA_VIDEO_FORMAT_RGBx ||
       spa_video_format_.format == SPA_VIDEO_FORMAT_RGBA) {
     uint8_t* tmp_src = queue_.current_frame()->data();
-    for (int i = 0; i < video_size_.height(); ++i) {
+    for (int i = 0; i < frame_size_.height(); ++i) {
       // If both sides decided to go with the RGBx format we need to convert
       // it to BGRx to match color format expected by WebRTC.
       ConvertRGBxToBGRx(tmp_src, queue_.current_frame()->stride());