From 94a2f21c0573be1d909c9df157deb08bd9650262 Mon Sep 17 00:00:00 2001 From: pthatcher Date: Wed, 8 Feb 2017 14:42:22 -0800 Subject: [PATCH] Increase STUN RTOs to work better on poor networks, such as 2G networks. BUG=b/34822484 Review-Url: https://codereview.webrtc.org/2677743002 Cr-Commit-Position: refs/heads/master@{#16503} --- webrtc/p2p/base/port.cc | 5 ++-- webrtc/p2p/base/stunport_unittest.cc | 6 ++--- webrtc/p2p/base/stunrequest.cc | 26 +++++++++++++++---- webrtc/p2p/base/stunrequest.h | 5 ++++ webrtc/p2p/base/stunrequest_unittest.cc | 23 ++++++---------- .../p2p/client/basicportallocator_unittest.cc | 15 +++++------ webrtc/pc/webrtcsession_unittest.cc | 5 ++-- 7 files changed, 48 insertions(+), 37 deletions(-) diff --git a/webrtc/p2p/base/port.cc b/webrtc/p2p/base/port.cc index 16a8019435..385022c11a 100644 --- a/webrtc/p2p/base/port.cc +++ b/webrtc/p2p/base/port.cc @@ -74,8 +74,9 @@ inline int ConservativeRTTEstimate(int rtt) { // Weighting of the old rtt value to new data. const int RTT_RATIO = 3; // 3 : 1 -// The delay before we begin checking if this port is useless. -const int kPortTimeoutDelay = 30 * 1000; // 30 seconds +// The delay before we begin checking if this port is useless. We set +// it to a little higher than a total STUN timeout. +const int kPortTimeoutDelay = cricket::STUN_TOTAL_TIMEOUT + 5000; } // namespace namespace cricket { diff --git a/webrtc/p2p/base/stunport_unittest.cc b/webrtc/p2p/base/stunport_unittest.cc index a16887f012..aa708c1ba1 100644 --- a/webrtc/p2p/base/stunport_unittest.cc +++ b/webrtc/p2p/base/stunport_unittest.cc @@ -30,11 +30,9 @@ static const SocketAddress kStunAddr3("127.0.0.1", 3000); static const SocketAddress kBadAddr("0.0.0.1", 5000); static const SocketAddress kStunHostnameAddr("localhost", 5000); static const SocketAddress kBadHostnameAddr("not-a-real-hostname", 5000); -// STUN timeout (with all retries) is 9500ms. +// STUN timeout (with all retries) is cricket::STUN_TOTAL_TIMEOUT. // Add some margin of error for slow bots. -// TODO(deadbeef): Use simulated clock instead of just increasing timeouts to -// fix flaky tests. -static const int kTimeoutMs = 15000; +static const int kTimeoutMs = cricket::STUN_TOTAL_TIMEOUT; // stun prio = 100 << 24 | 30 (IPV4) << 8 | 256 - 0 static const uint32_t kStunCandidatePriority = 1677729535; static const int kInfiniteLifetime = -1; diff --git a/webrtc/p2p/base/stunrequest.cc b/webrtc/p2p/base/stunrequest.cc index 57c3c2e4a5..ef2bee8aa8 100644 --- a/webrtc/p2p/base/stunrequest.cc +++ b/webrtc/p2p/base/stunrequest.cc @@ -22,9 +22,22 @@ namespace cricket { const uint32_t MSG_STUN_SEND = 1; -const int MAX_SENDS = 9; -const int DELAY_UNIT = 100; // 100 milliseconds -const int DELAY_MAX_FACTOR = 16; +// RFC 5389 says SHOULD be 500ms. +// For years, this was 100ms, but for networks that +// experience moments of high RTT (such as 2G networks), this doesn't +// work well. +const int STUN_INITIAL_RTO = 250; // milliseconds + +// The timeout doubles each retransmission, up to this many times +// RFC 5389 says SHOULD retransmit 7 times. +// This has been 8 for years (not sure why). +const int STUN_MAX_RETRANSMISSIONS = 8; // Total sends: 9 + +// We also cap the doubling, even though the standard doesn't say to. +// This has been 1.6 seconds for years, but for networks that +// experience moments of high RTT (such as 2G networks), this doesn't +// work well. +const int STUN_MAX_RTO = 8000; // milliseconds, or 5 doublings StunRequestManager::StunRequestManager(rtc::Thread* thread) : thread_(thread) { @@ -226,7 +239,8 @@ void StunRequest::OnMessage(rtc::Message* pmsg) { void StunRequest::OnSent() { count_ += 1; - if (count_ == MAX_SENDS) { + int retransmissions = (count_ - 1); + if (retransmissions >= STUN_MAX_RETRANSMISSIONS) { timeout_ = true; } LOG(LS_VERBOSE) << "Sent STUN request " << count_ @@ -237,7 +251,9 @@ int StunRequest::resend_delay() { if (count_ == 0) { return 0; } - return DELAY_UNIT * std::min(1 << (count_-1), DELAY_MAX_FACTOR); + int retransmissions = (count_ - 1); + int rto = STUN_INITIAL_RTO << retransmissions; + return std::min(rto, STUN_MAX_RTO); } } // namespace cricket diff --git a/webrtc/p2p/base/stunrequest.h b/webrtc/p2p/base/stunrequest.h index f6d216d1f7..99dff14aee 100644 --- a/webrtc/p2p/base/stunrequest.h +++ b/webrtc/p2p/base/stunrequest.h @@ -23,6 +23,11 @@ class StunRequest; const int kAllRequests = 0; +// Total max timeouts: 39.75 seconds +// For years, this was 9.5 seconds, but for networks that experience moments of +// high RTT (such as 40s on 2G networks), this doesn't work well. +const int STUN_TOTAL_TIMEOUT = 39750; // milliseconds + // Manages a set of STUN requests, sending and resending until we receive a // response or determine that the request has timed out. class StunRequestManager { diff --git a/webrtc/p2p/base/stunrequest_unittest.cc b/webrtc/p2p/base/stunrequest_unittest.cc index 47583b90e8..9b8ccac1ed 100644 --- a/webrtc/p2p/base/stunrequest_unittest.cc +++ b/webrtc/p2p/base/stunrequest_unittest.cc @@ -55,14 +55,9 @@ class StunRequestTest : public testing::Test, return msg; } static int TotalDelay(int sends) { - int total = 0; - for (int i = 0; i < sends; i++) { - if (i < 4) - total += 100 << i; - else - total += 1600; - } - return total; + std::vector delays = {0, 250, 750, 1750, 3750, + 7750, 15750, 23750, 31750, 39750}; + return delays[sends]; } StunRequestManager manager_; @@ -142,10 +137,8 @@ TEST_F(StunRequestTest, TestUnexpected) { delete res; } -// Test that requests are sent at the right times, and that the 9th request -// (sent at 7900 ms) can be properly replied to. +// Test that requests are sent at the right times. TEST_F(StunRequestTest, TestBackoff) { - const int MAX_TIMEOUT_MS = 10000; rtc::ScopedFakeClock fake_clock; StunMessage* req = CreateStunMessage(STUN_BINDING_REQUEST, NULL); @@ -153,7 +146,8 @@ TEST_F(StunRequestTest, TestBackoff) { manager_.Send(new StunRequestThunker(req, this)); StunMessage* res = CreateStunMessage(STUN_BINDING_RESPONSE, req); for (int i = 0; i < 9; ++i) { - EXPECT_TRUE_SIMULATED_WAIT(request_count_ != i, MAX_TIMEOUT_MS, fake_clock); + EXPECT_TRUE_SIMULATED_WAIT(request_count_ != i, STUN_TOTAL_TIMEOUT, + fake_clock); int64_t elapsed = rtc::TimeMillis() - start; LOG(LS_INFO) << "STUN request #" << (i + 1) << " sent at " << elapsed << " ms"; @@ -168,15 +162,14 @@ TEST_F(StunRequestTest, TestBackoff) { delete res; } -// Test that we timeout properly if no response is received in 9500 ms. +// Test that we timeout properly if no response is received. TEST_F(StunRequestTest, TestTimeout) { rtc::ScopedFakeClock fake_clock; StunMessage* req = CreateStunMessage(STUN_BINDING_REQUEST, NULL); StunMessage* res = CreateStunMessage(STUN_BINDING_RESPONSE, req); manager_.Send(new StunRequestThunker(req, this)); - // Simulate the 9500 ms STUN timeout - SIMULATED_WAIT(false, 9500, fake_clock); + SIMULATED_WAIT(false, cricket::STUN_TOTAL_TIMEOUT, fake_clock); EXPECT_FALSE(manager_.CheckResponse(res)); EXPECT_TRUE(response_ == NULL); diff --git a/webrtc/p2p/client/basicportallocator_unittest.cc b/webrtc/p2p/client/basicportallocator_unittest.cc index 29d29ce1f4..7bba15b155 100644 --- a/webrtc/p2p/client/basicportallocator_unittest.cc +++ b/webrtc/p2p/client/basicportallocator_unittest.cc @@ -84,11 +84,9 @@ static const int kDefaultAllocationTimeout = 3000; static const char kTurnUsername[] = "test"; static const char kTurnPassword[] = "test"; -// STUN timeout (with all retries) is 9500ms. +// STUN timeout (with all retries) is cricket::STUN_TOTAL_TIMEOUT. // Add some margin of error for slow bots. -// TODO(deadbeef): Use simulated clock instead of just increasing timeouts to -// fix flaky tests. -static const int kStunTimeoutMs = 15000; +static const int kStunTimeoutMs = cricket::STUN_TOTAL_TIMEOUT; namespace cricket { @@ -1231,10 +1229,11 @@ TEST_F(BasicPortAllocatorTest, TestGetAllPortsNoUdpAllowed) { EXPECT_PRED4(HasCandidate, candidates_, "relay", "ssltcp", kRelaySslTcpIntAddr); EXPECT_PRED4(HasCandidate, candidates_, "relay", "udp", kRelayUdpExtAddr); - // We wait at least for a full STUN timeout, which is currently 9.5 - // seconds. But since 3-3.5 seconds already passed (see above), we - // only need 6.5 more seconds. - EXPECT_TRUE_SIMULATED_WAIT(candidate_allocation_done_, 6500, fake_clock); + // We wait at least for a full STUN timeout, which + // cricket::STUN_TOTAL_TIMEOUT seconds. But since 3-3.5 seconds + // already passed (see above), we wait 3 seconds less than that. + EXPECT_TRUE_SIMULATED_WAIT(candidate_allocation_done_, + cricket::STUN_TOTAL_TIMEOUT - 3000, fake_clock); } TEST_F(BasicPortAllocatorTest, TestCandidatePriorityOfMultipleInterfaces) { diff --git a/webrtc/pc/webrtcsession_unittest.cc b/webrtc/pc/webrtcsession_unittest.cc index 716ba25146..79e2055dec 100644 --- a/webrtc/pc/webrtcsession_unittest.cc +++ b/webrtc/pc/webrtcsession_unittest.cc @@ -106,8 +106,6 @@ static const char kMediaContentName1[] = "video"; static const int kDefaultTimeout = 10000; // 10 seconds. static const int kIceCandidatesTimeout = 10000; -// STUN timeout with all retransmissions is a total of 9500ms. -static const int kStunTimeout = 9500; static const char kFakeDtlsFingerprint[] = "BB:CD:72:F7:2F:D0:BA:43:F3:68:B1:0C:23:72:B6:4A:" @@ -1593,7 +1591,8 @@ TEST_F(WebRtcSessionTest, TestStunError) { SendAudioVideoStream1(); InitiateCall(); // Since kClientAddrHost1 is blocked, not expecting stun candidates for it. - EXPECT_TRUE_SIMULATED_WAIT(observer_.oncandidatesready_, kStunTimeout, clock); + EXPECT_TRUE_SIMULATED_WAIT(observer_.oncandidatesready_, + cricket::STUN_TOTAL_TIMEOUT, clock); EXPECT_EQ(6u, observer_.mline_0_candidates_.size()); EXPECT_EQ(6u, observer_.mline_1_candidates_.size()); // Destroy session before scoped fake clock goes out of scope to avoid TSan