Reland "Fix getStats() freeze bug affecting Chromium but not WebRTC standalone."
This is a reland of 05d43c6f7fe497fed0f2c8714e2042dd07a86df2 The original CL got reverted because Chrome did not support IsQuitting() which triggered a NOTREACHED() inside of a DCHECK. With https://chromium-review.googlesource.com/c/chromium/src/+/1491620 it is safe to reland this CL. The only changes between this and the original patch set is that this is now rebased on top of https://webrtc-review.googlesource.com/c/src/+/124701, i.e. rtc::PostMessageWithFunctor() has been replaced by rtc::Thread::PostTask(). Original change's description: > Fix getStats() freeze bug affecting Chromium but not WebRTC standalone. > > PeerConnection::Close() is, per-spec, a blocking operation. > Unfortunately, PeerConnection is implemented to own resources used by > the network thread, and Close() - on the signaling thread - destroys > these resources. As such, tasks run in parallel like getStats() get into > race conditions with Close() unless synchronized. The mechanism in-place > is RTCStatsCollector::WaitForPendingRequest(), it waits until the > network thread is done with the in-parallel stats request. > > Prior to this CL, this was implemented by performing > rtc::Thread::ProcessMessages() in a loop until the network thread had > posted a task on the signaling thread to say that it was done which > would then get processed by ProcessMessages(). In WebRTC this works, and > the test is RTCStatsIntegrationTest.GetsStatsWhileClosingPeerConnection. > > But because Chromium's thread wrapper does no support > ProcessMessages(), calling getStats() followed by close() in Chrome > resulted in waiting forever (https://crbug.com/850907). > > In this CL, the process messages loop is removed. Instead, the shared > resources are guarded by an rtc::Event. WaitForPendingRequest() still > blocks the signaling thread, but only while shared resources are in use > by the network thread. After this CL, calling WaitForPendingRequest() no > longer has any unexpected side-effects since it no longer processes > other messages that might have been posted on the thread. > > The resource ownership and threading model of WebRTC deserves to be > revisited, but this fixes a common Chromium crash without redesigning > PeerConnection, in a way that does not cause more blocking than what > the other PeerConnection methods are already doing. > > Note: An alternative to using rtc::Event is to use resource locks and > to not perform the stats collection on the network thread if the > request was cancelled before the start of processing, but this has very > little benefit in terms of performance: once the network thread starts > collecting the stats, it would use the lock until collection is > completed, blocking the signaling thread trying to acquire that lock > anyway. This defeats the purpose and is a riskier change, since > cancelling partial collection in this inherently racy edge-case would > have observable differences from the returned stats, which may cause > more regressions. > > Bug: chromium:850907 > Change-Id: Idceeee0bddc0c9d5518b58a2b263abb2bbf47cff > Reviewed-on: https://webrtc-review.googlesource.com/c/121567 > Commit-Queue: Henrik Boström <hbos@webrtc.org> > Reviewed-by: Steve Anton <steveanton@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#26707} TBR=steveanton@webrtc.org Bug: chromium:850907 Change-Id: I5be7f69f0de65ff1120e4926fbf904def97ea9c0 Reviewed-on: https://webrtc-review.googlesource.com/c/124781 Reviewed-by: Henrik Boström <hbos@webrtc.org> Reviewed-by: Steve Anton <steveanton@webrtc.org> Commit-Queue: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/master@{#26896}
This commit is contained in:
parent
25fb765367
commit
40b030edbf
@ -753,6 +753,8 @@ RTCStatsCollector::RTCStatsCollector(PeerConnectionInternal* pc,
|
||||
network_thread_(pc->network_thread()),
|
||||
num_pending_partial_reports_(0),
|
||||
partial_report_timestamp_us_(0),
|
||||
network_report_event_(true /* manual_reset */,
|
||||
true /* initially_signaled */),
|
||||
cache_timestamp_us_(0),
|
||||
cache_lifetime_us_(cache_lifetime_us) {
|
||||
RTC_DCHECK(pc_);
|
||||
@ -799,10 +801,9 @@ void RTCStatsCollector::GetStatsReportInternal(
|
||||
// reentrancy problems.
|
||||
std::vector<RequestInfo> requests;
|
||||
requests.swap(requests_);
|
||||
invoker_.AsyncInvoke<void>(
|
||||
RTC_FROM_HERE, signaling_thread_,
|
||||
rtc::Bind(&RTCStatsCollector::DeliverCachedReport, this, cached_report_,
|
||||
std::move(requests)));
|
||||
signaling_thread_->PostTask(
|
||||
RTC_FROM_HERE, rtc::Bind(&RTCStatsCollector::DeliverCachedReport, this,
|
||||
cached_report_, std::move(requests)));
|
||||
} else if (!num_pending_partial_reports_) {
|
||||
// Only start gathering stats if we're not already gathering stats. In the
|
||||
// case of already gathering stats, |callback_| will be invoked when there
|
||||
@ -830,10 +831,14 @@ void RTCStatsCollector::GetStatsReportInternal(
|
||||
// network thread, where it more naturally belongs.
|
||||
call_stats_ = pc_->GetCallStats();
|
||||
|
||||
invoker_.AsyncInvoke<void>(
|
||||
RTC_FROM_HERE, network_thread_,
|
||||
// Don't touch |network_report_| on the signaling thread until
|
||||
// ProducePartialResultsOnNetworkThread() has signaled the
|
||||
// |network_report_event_|.
|
||||
network_report_event_.Reset();
|
||||
network_thread_->PostTask(
|
||||
RTC_FROM_HERE,
|
||||
rtc::Bind(&RTCStatsCollector::ProducePartialResultsOnNetworkThread,
|
||||
rtc::scoped_refptr<RTCStatsCollector>(this), timestamp_us));
|
||||
this, timestamp_us));
|
||||
ProducePartialResultsOnSignalingThread(timestamp_us);
|
||||
}
|
||||
}
|
||||
@ -845,89 +850,116 @@ void RTCStatsCollector::ClearCachedStatsReport() {
|
||||
|
||||
void RTCStatsCollector::WaitForPendingRequest() {
|
||||
RTC_DCHECK(signaling_thread_->IsCurrent());
|
||||
if (num_pending_partial_reports_) {
|
||||
rtc::Thread::Current()->ProcessMessages(0);
|
||||
while (num_pending_partial_reports_) {
|
||||
rtc::Thread::Current()->SleepMs(1);
|
||||
rtc::Thread::Current()->ProcessMessages(0);
|
||||
}
|
||||
}
|
||||
// If a request is pending, blocks until the |network_report_event_| is
|
||||
// signaled and then delivers the result. Otherwise this is a NO-OP.
|
||||
MergeNetworkReport_s();
|
||||
}
|
||||
|
||||
void RTCStatsCollector::ProducePartialResultsOnSignalingThread(
|
||||
int64_t timestamp_us) {
|
||||
RTC_DCHECK(signaling_thread_->IsCurrent());
|
||||
rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
|
||||
timestamp_us);
|
||||
partial_report_ = RTCStatsReport::Create(timestamp_us);
|
||||
|
||||
ProduceDataChannelStats_s(timestamp_us, report.get());
|
||||
ProduceMediaStreamStats_s(timestamp_us, report.get());
|
||||
ProduceMediaStreamTrackStats_s(timestamp_us, report.get());
|
||||
ProducePeerConnectionStats_s(timestamp_us, report.get());
|
||||
ProducePartialResultsOnSignalingThreadImpl(timestamp_us,
|
||||
partial_report_.get());
|
||||
|
||||
AddPartialResults(report);
|
||||
// ProducePartialResultsOnSignalingThread() is running synchronously on the
|
||||
// signaling thread, so it is always the first partial result delivered on the
|
||||
// signaling thread. The request is not complete until MergeNetworkReport_s()
|
||||
// happens; we don't have to do anything here.
|
||||
RTC_DCHECK_GT(num_pending_partial_reports_, 1);
|
||||
--num_pending_partial_reports_;
|
||||
}
|
||||
|
||||
void RTCStatsCollector::ProducePartialResultsOnSignalingThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
RTCStatsReport* partial_report) {
|
||||
RTC_DCHECK(signaling_thread_->IsCurrent());
|
||||
ProduceDataChannelStats_s(timestamp_us, partial_report);
|
||||
ProduceMediaStreamStats_s(timestamp_us, partial_report);
|
||||
ProduceMediaStreamTrackStats_s(timestamp_us, partial_report);
|
||||
ProducePeerConnectionStats_s(timestamp_us, partial_report);
|
||||
}
|
||||
|
||||
void RTCStatsCollector::ProducePartialResultsOnNetworkThread(
|
||||
int64_t timestamp_us) {
|
||||
RTC_DCHECK(network_thread_->IsCurrent());
|
||||
rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
|
||||
timestamp_us);
|
||||
// Touching |network_report_| on this thread is safe by this method because
|
||||
// |network_report_event_| is reset before this method is invoked.
|
||||
network_report_ = RTCStatsReport::Create(timestamp_us);
|
||||
|
||||
std::map<std::string, cricket::TransportStats> transport_stats_by_name =
|
||||
pc_->GetTransportStatsByNames(transport_names_);
|
||||
|
||||
std::map<std::string, CertificateStatsPair> transport_cert_stats =
|
||||
PrepareTransportCertificateStats_n(transport_stats_by_name);
|
||||
|
||||
ProduceCertificateStats_n(timestamp_us, transport_cert_stats, report.get());
|
||||
ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, report.get());
|
||||
ProduceIceCandidateAndPairStats_n(timestamp_us, transport_stats_by_name,
|
||||
call_stats_, report.get());
|
||||
ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_, report.get());
|
||||
ProduceTransportStats_n(timestamp_us, transport_stats_by_name,
|
||||
transport_cert_stats, report.get());
|
||||
ProducePartialResultsOnNetworkThreadImpl(
|
||||
timestamp_us, transport_stats_by_name, transport_cert_stats,
|
||||
network_report_.get());
|
||||
|
||||
AddPartialResults(report);
|
||||
// Signal that it is now safe to touch |network_report_| on the signaling
|
||||
// thread, and post a task to merge it into the final results.
|
||||
network_report_event_.Set();
|
||||
signaling_thread_->PostTask(
|
||||
RTC_FROM_HERE, rtc::Bind(&RTCStatsCollector::MergeNetworkReport_s, this));
|
||||
}
|
||||
|
||||
void RTCStatsCollector::AddPartialResults(
|
||||
const rtc::scoped_refptr<RTCStatsReport>& partial_report) {
|
||||
if (!signaling_thread_->IsCurrent()) {
|
||||
invoker_.AsyncInvoke<void>(RTC_FROM_HERE, signaling_thread_,
|
||||
rtc::Bind(&RTCStatsCollector::AddPartialResults_s,
|
||||
rtc::scoped_refptr<RTCStatsCollector>(this),
|
||||
partial_report));
|
||||
void RTCStatsCollector::ProducePartialResultsOnNetworkThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
const std::map<std::string, cricket::TransportStats>&
|
||||
transport_stats_by_name,
|
||||
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
|
||||
RTCStatsReport* partial_report) {
|
||||
RTC_DCHECK(network_thread_->IsCurrent());
|
||||
ProduceCertificateStats_n(timestamp_us, transport_cert_stats, partial_report);
|
||||
ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, partial_report);
|
||||
ProduceIceCandidateAndPairStats_n(timestamp_us, transport_stats_by_name,
|
||||
call_stats_, partial_report);
|
||||
ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_,
|
||||
partial_report);
|
||||
ProduceTransportStats_n(timestamp_us, transport_stats_by_name,
|
||||
transport_cert_stats, partial_report);
|
||||
}
|
||||
|
||||
void RTCStatsCollector::MergeNetworkReport_s() {
|
||||
RTC_DCHECK(signaling_thread_->IsCurrent());
|
||||
// The |network_report_event_| must be signaled for it to be safe to touch
|
||||
// |network_report_|. This is normally not blocking, but if
|
||||
// WaitForPendingRequest() is called while a request is pending, we might have
|
||||
// to wait until the network thread is done touching |network_report_|.
|
||||
network_report_event_.Wait(rtc::Event::kForever);
|
||||
if (!network_report_) {
|
||||
// Normally, MergeNetworkReport_s() is executed because it is posted from
|
||||
// the network thread. But if WaitForPendingRequest() is called while a
|
||||
// request is pending, an early call to MergeNetworkReport_s() is made,
|
||||
// merging the report and setting |network_report_| to null. If so, when the
|
||||
// previously posted MergeNetworkReport_s() is later executed, the report is
|
||||
// already null and nothing needs to be done here.
|
||||
return;
|
||||
}
|
||||
AddPartialResults_s(partial_report);
|
||||
}
|
||||
|
||||
void RTCStatsCollector::AddPartialResults_s(
|
||||
rtc::scoped_refptr<RTCStatsReport> partial_report) {
|
||||
RTC_DCHECK(signaling_thread_->IsCurrent());
|
||||
RTC_DCHECK_GT(num_pending_partial_reports_, 0);
|
||||
if (!partial_report_)
|
||||
partial_report_ = partial_report;
|
||||
else
|
||||
partial_report_->TakeMembersFrom(partial_report);
|
||||
RTC_DCHECK(partial_report_);
|
||||
partial_report_->TakeMembersFrom(network_report_);
|
||||
network_report_ = nullptr;
|
||||
--num_pending_partial_reports_;
|
||||
if (!num_pending_partial_reports_) {
|
||||
cache_timestamp_us_ = partial_report_timestamp_us_;
|
||||
cached_report_ = partial_report_;
|
||||
partial_report_ = nullptr;
|
||||
transceiver_stats_infos_.clear();
|
||||
// Trace WebRTC Stats when getStats is called on Javascript.
|
||||
// This allows access to WebRTC stats from trace logs. To enable them,
|
||||
// select the "webrtc_stats" category when recording traces.
|
||||
TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
|
||||
cached_report_->ToJson());
|
||||
// |network_report_| is currently the only partial report collected
|
||||
// asynchronously, so |num_pending_partial_reports_| must now be 0 and we are
|
||||
// ready to deliver the result.
|
||||
RTC_DCHECK_EQ(num_pending_partial_reports_, 0);
|
||||
cache_timestamp_us_ = partial_report_timestamp_us_;
|
||||
cached_report_ = partial_report_;
|
||||
partial_report_ = nullptr;
|
||||
transceiver_stats_infos_.clear();
|
||||
// Trace WebRTC Stats when getStats is called on Javascript.
|
||||
// This allows access to WebRTC stats from trace logs. To enable them,
|
||||
// select the "webrtc_stats" category when recording traces.
|
||||
TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
|
||||
cached_report_->ToJson());
|
||||
|
||||
// Deliver report and clear |requests_|.
|
||||
std::vector<RequestInfo> requests;
|
||||
requests.swap(requests_);
|
||||
DeliverCachedReport(cached_report_, std::move(requests));
|
||||
}
|
||||
// Deliver report and clear |requests_|.
|
||||
std::vector<RequestInfo> requests;
|
||||
requests.swap(requests_);
|
||||
DeliverCachedReport(cached_report_, std::move(requests));
|
||||
}
|
||||
|
||||
void RTCStatsCollector::DeliverCachedReport(
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
#include "pc/data_channel.h"
|
||||
#include "pc/peer_connection_internal.h"
|
||||
#include "pc/track_media_info_map.h"
|
||||
#include "rtc_base/async_invoker.h"
|
||||
#include "rtc_base/event.h"
|
||||
#include "rtc_base/ref_count.h"
|
||||
#include "rtc_base/ssl_identity.h"
|
||||
#include "rtc_base/third_party/sigslot/sigslot.h"
|
||||
@ -77,14 +77,21 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
|
||||
RTCStatsCollector(PeerConnectionInternal* pc, int64_t cache_lifetime_us);
|
||||
~RTCStatsCollector();
|
||||
|
||||
// Stats gathering on a particular thread. Calls |AddPartialResults| before
|
||||
// returning. Virtual for the sake of testing.
|
||||
virtual void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
|
||||
virtual void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
|
||||
struct CertificateStatsPair {
|
||||
std::unique_ptr<rtc::SSLCertificateStats> local;
|
||||
std::unique_ptr<rtc::SSLCertificateStats> remote;
|
||||
};
|
||||
|
||||
// Can be called on any thread.
|
||||
void AddPartialResults(
|
||||
const rtc::scoped_refptr<RTCStatsReport>& partial_report);
|
||||
// Stats gathering on a particular thread. Virtual for the sake of testing.
|
||||
virtual void ProducePartialResultsOnSignalingThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
RTCStatsReport* partial_report);
|
||||
virtual void ProducePartialResultsOnNetworkThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
const std::map<std::string, cricket::TransportStats>&
|
||||
transport_stats_by_name,
|
||||
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
|
||||
RTCStatsReport* partial_report);
|
||||
|
||||
private:
|
||||
class RequestInfo {
|
||||
@ -130,11 +137,6 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
|
||||
|
||||
void GetStatsReportInternal(RequestInfo request);
|
||||
|
||||
struct CertificateStatsPair {
|
||||
std::unique_ptr<rtc::SSLCertificateStats> local;
|
||||
std::unique_ptr<rtc::SSLCertificateStats> remote;
|
||||
};
|
||||
|
||||
// Structure for tracking stats about each RtpTransceiver managed by the
|
||||
// PeerConnection. This can either by a Plan B style or Unified Plan style
|
||||
// transceiver (i.e., can have 0 or many senders and receivers).
|
||||
@ -150,7 +152,6 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
|
||||
std::unique_ptr<TrackMediaInfoMap> track_media_info_map;
|
||||
};
|
||||
|
||||
void AddPartialResults_s(rtc::scoped_refptr<RTCStatsReport> partial_report);
|
||||
void DeliverCachedReport(
|
||||
rtc::scoped_refptr<const RTCStatsReport> cached_report,
|
||||
std::vector<RequestInfo> requests);
|
||||
@ -211,6 +212,13 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
|
||||
std::vector<RtpTransceiverStatsInfo> PrepareTransceiverStatsInfos_s() const;
|
||||
std::set<std::string> PrepareTransportNames_s() const;
|
||||
|
||||
// Stats gathering on a particular thread.
|
||||
void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
|
||||
void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
|
||||
// Merges |network_report_| into |partial_report_| and completes the request.
|
||||
// This is a NO-OP if |network_report_| is null.
|
||||
void MergeNetworkReport_s();
|
||||
|
||||
// Slots for signals (sigslot) that are wired up to |pc_|.
|
||||
void OnDataChannelCreated(DataChannel* channel);
|
||||
// Slots for signals (sigslot) that are wired up to |channel|.
|
||||
@ -221,12 +229,24 @@ class RTCStatsCollector : public virtual rtc::RefCountInterface,
|
||||
rtc::Thread* const signaling_thread_;
|
||||
rtc::Thread* const worker_thread_;
|
||||
rtc::Thread* const network_thread_;
|
||||
rtc::AsyncInvoker invoker_;
|
||||
|
||||
int num_pending_partial_reports_;
|
||||
int64_t partial_report_timestamp_us_;
|
||||
// Reports that are produced on the signaling thread or the network thread are
|
||||
// merged into this report. It is only touched on the signaling thread. Once
|
||||
// all partial reports are merged this is the result of a request.
|
||||
rtc::scoped_refptr<RTCStatsReport> partial_report_;
|
||||
std::vector<RequestInfo> requests_;
|
||||
// Holds the result of ProducePartialResultsOnNetworkThread(). It is merged
|
||||
// into |partial_report_| on the signaling thread and then nulled by
|
||||
// MergeNetworkReport_s(). Thread-safety is ensured by using
|
||||
// |network_report_event_|.
|
||||
rtc::scoped_refptr<RTCStatsReport> network_report_;
|
||||
// If set, it is safe to touch the |network_report_| on the signaling thread.
|
||||
// This is reset before async-invoking ProducePartialResultsOnNetworkThread()
|
||||
// and set when ProducePartialResultsOnNetworkThread() is complete, after it
|
||||
// has updated the value of |network_report_|.
|
||||
rtc::Event network_report_event_;
|
||||
|
||||
// Set in |GetStatsReport|, read in |ProducePartialResultsOnNetworkThread| and
|
||||
// |ProducePartialResultsOnSignalingThread|, reset after work is complete. Not
|
||||
|
||||
@ -2249,7 +2249,9 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
|
||||
worker_thread_(pc->worker_thread()),
|
||||
network_thread_(pc->network_thread()) {}
|
||||
|
||||
void ProducePartialResultsOnSignalingThread(int64_t timestamp_us) override {
|
||||
void ProducePartialResultsOnSignalingThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
RTCStatsReport* partial_report) override {
|
||||
EXPECT_TRUE(signaling_thread_->IsCurrent());
|
||||
{
|
||||
rtc::CritScope cs(&lock_);
|
||||
@ -2257,13 +2259,15 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
|
||||
++produced_on_signaling_thread_;
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<RTCStatsReport> signaling_report =
|
||||
RTCStatsReport::Create(0);
|
||||
signaling_report->AddStats(std::unique_ptr<const RTCStats>(
|
||||
partial_report->AddStats(std::unique_ptr<const RTCStats>(
|
||||
new RTCTestStats("SignalingThreadStats", timestamp_us)));
|
||||
AddPartialResults(signaling_report);
|
||||
}
|
||||
void ProducePartialResultsOnNetworkThread(int64_t timestamp_us) override {
|
||||
void ProducePartialResultsOnNetworkThreadImpl(
|
||||
int64_t timestamp_us,
|
||||
const std::map<std::string, cricket::TransportStats>&
|
||||
transport_stats_by_name,
|
||||
const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
|
||||
RTCStatsReport* partial_report) override {
|
||||
EXPECT_TRUE(network_thread_->IsCurrent());
|
||||
{
|
||||
rtc::CritScope cs(&lock_);
|
||||
@ -2271,11 +2275,8 @@ class FakeRTCStatsCollector : public RTCStatsCollector,
|
||||
++produced_on_network_thread_;
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<RTCStatsReport> network_report =
|
||||
RTCStatsReport::Create(0);
|
||||
network_report->AddStats(std::unique_ptr<const RTCStats>(
|
||||
partial_report->AddStats(std::unique_ptr<const RTCStats>(
|
||||
new RTCTestStats("NetworkThreadStats", timestamp_us)));
|
||||
AddPartialResults(network_report);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user