Increase STUN RTOs to work better on poor networks, such as 2G networks.

BUG=b/34822484

Review-Url: https://codereview.webrtc.org/2677743002
Cr-Commit-Position: refs/heads/master@{#16503}
This commit is contained in:
pthatcher 2017-02-08 14:42:22 -08:00 committed by Commit bot
parent 1749bc372e
commit 94a2f21c05
7 changed files with 48 additions and 37 deletions

View File

@ -74,8 +74,9 @@ inline int ConservativeRTTEstimate(int rtt) {
// Weighting of the old rtt value to new data.
const int RTT_RATIO = 3; // 3 : 1
// The delay before we begin checking if this port is useless.
const int kPortTimeoutDelay = 30 * 1000; // 30 seconds
// The delay before we begin checking if this port is useless. We set
// it to a little higher than a total STUN timeout.
const int kPortTimeoutDelay = cricket::STUN_TOTAL_TIMEOUT + 5000;
} // namespace
namespace cricket {

View File

@ -30,11 +30,9 @@ static const SocketAddress kStunAddr3("127.0.0.1", 3000);
static const SocketAddress kBadAddr("0.0.0.1", 5000);
static const SocketAddress kStunHostnameAddr("localhost", 5000);
static const SocketAddress kBadHostnameAddr("not-a-real-hostname", 5000);
// STUN timeout (with all retries) is 9500ms.
// STUN timeout (with all retries) is cricket::STUN_TOTAL_TIMEOUT.
// Add some margin of error for slow bots.
// TODO(deadbeef): Use simulated clock instead of just increasing timeouts to
// fix flaky tests.
static const int kTimeoutMs = 15000;
static const int kTimeoutMs = cricket::STUN_TOTAL_TIMEOUT;
// stun prio = 100 << 24 | 30 (IPV4) << 8 | 256 - 0
static const uint32_t kStunCandidatePriority = 1677729535;
static const int kInfiniteLifetime = -1;

View File

@ -22,9 +22,22 @@ namespace cricket {
const uint32_t MSG_STUN_SEND = 1;
const int MAX_SENDS = 9;
const int DELAY_UNIT = 100; // 100 milliseconds
const int DELAY_MAX_FACTOR = 16;
// RFC 5389 says SHOULD be 500ms.
// For years, this was 100ms, but for networks that
// experience moments of high RTT (such as 2G networks), this doesn't
// work well.
const int STUN_INITIAL_RTO = 250; // milliseconds
// The timeout doubles each retransmission, up to this many times
// RFC 5389 says SHOULD retransmit 7 times.
// This has been 8 for years (not sure why).
const int STUN_MAX_RETRANSMISSIONS = 8; // Total sends: 9
// We also cap the doubling, even though the standard doesn't say to.
// This has been 1.6 seconds for years, but for networks that
// experience moments of high RTT (such as 2G networks), this doesn't
// work well.
const int STUN_MAX_RTO = 8000; // milliseconds, or 5 doublings
StunRequestManager::StunRequestManager(rtc::Thread* thread)
: thread_(thread) {
@ -226,7 +239,8 @@ void StunRequest::OnMessage(rtc::Message* pmsg) {
void StunRequest::OnSent() {
count_ += 1;
if (count_ == MAX_SENDS) {
int retransmissions = (count_ - 1);
if (retransmissions >= STUN_MAX_RETRANSMISSIONS) {
timeout_ = true;
}
LOG(LS_VERBOSE) << "Sent STUN request " << count_
@ -237,7 +251,9 @@ int StunRequest::resend_delay() {
if (count_ == 0) {
return 0;
}
return DELAY_UNIT * std::min(1 << (count_-1), DELAY_MAX_FACTOR);
int retransmissions = (count_ - 1);
int rto = STUN_INITIAL_RTO << retransmissions;
return std::min(rto, STUN_MAX_RTO);
}
} // namespace cricket

View File

@ -23,6 +23,11 @@ class StunRequest;
const int kAllRequests = 0;
// Total max timeouts: 39.75 seconds
// For years, this was 9.5 seconds, but for networks that experience moments of
// high RTT (such as 40s on 2G networks), this doesn't work well.
const int STUN_TOTAL_TIMEOUT = 39750; // milliseconds
// Manages a set of STUN requests, sending and resending until we receive a
// response or determine that the request has timed out.
class StunRequestManager {

View File

@ -55,14 +55,9 @@ class StunRequestTest : public testing::Test,
return msg;
}
static int TotalDelay(int sends) {
int total = 0;
for (int i = 0; i < sends; i++) {
if (i < 4)
total += 100 << i;
else
total += 1600;
}
return total;
std::vector<int> delays = {0, 250, 750, 1750, 3750,
7750, 15750, 23750, 31750, 39750};
return delays[sends];
}
StunRequestManager manager_;
@ -142,10 +137,8 @@ TEST_F(StunRequestTest, TestUnexpected) {
delete res;
}
// Test that requests are sent at the right times, and that the 9th request
// (sent at 7900 ms) can be properly replied to.
// Test that requests are sent at the right times.
TEST_F(StunRequestTest, TestBackoff) {
const int MAX_TIMEOUT_MS = 10000;
rtc::ScopedFakeClock fake_clock;
StunMessage* req = CreateStunMessage(STUN_BINDING_REQUEST, NULL);
@ -153,7 +146,8 @@ TEST_F(StunRequestTest, TestBackoff) {
manager_.Send(new StunRequestThunker(req, this));
StunMessage* res = CreateStunMessage(STUN_BINDING_RESPONSE, req);
for (int i = 0; i < 9; ++i) {
EXPECT_TRUE_SIMULATED_WAIT(request_count_ != i, MAX_TIMEOUT_MS, fake_clock);
EXPECT_TRUE_SIMULATED_WAIT(request_count_ != i, STUN_TOTAL_TIMEOUT,
fake_clock);
int64_t elapsed = rtc::TimeMillis() - start;
LOG(LS_INFO) << "STUN request #" << (i + 1)
<< " sent at " << elapsed << " ms";
@ -168,15 +162,14 @@ TEST_F(StunRequestTest, TestBackoff) {
delete res;
}
// Test that we timeout properly if no response is received in 9500 ms.
// Test that we timeout properly if no response is received.
TEST_F(StunRequestTest, TestTimeout) {
rtc::ScopedFakeClock fake_clock;
StunMessage* req = CreateStunMessage(STUN_BINDING_REQUEST, NULL);
StunMessage* res = CreateStunMessage(STUN_BINDING_RESPONSE, req);
manager_.Send(new StunRequestThunker(req, this));
// Simulate the 9500 ms STUN timeout
SIMULATED_WAIT(false, 9500, fake_clock);
SIMULATED_WAIT(false, cricket::STUN_TOTAL_TIMEOUT, fake_clock);
EXPECT_FALSE(manager_.CheckResponse(res));
EXPECT_TRUE(response_ == NULL);

View File

@ -84,11 +84,9 @@ static const int kDefaultAllocationTimeout = 3000;
static const char kTurnUsername[] = "test";
static const char kTurnPassword[] = "test";
// STUN timeout (with all retries) is 9500ms.
// STUN timeout (with all retries) is cricket::STUN_TOTAL_TIMEOUT.
// Add some margin of error for slow bots.
// TODO(deadbeef): Use simulated clock instead of just increasing timeouts to
// fix flaky tests.
static const int kStunTimeoutMs = 15000;
static const int kStunTimeoutMs = cricket::STUN_TOTAL_TIMEOUT;
namespace cricket {
@ -1231,10 +1229,11 @@ TEST_F(BasicPortAllocatorTest, TestGetAllPortsNoUdpAllowed) {
EXPECT_PRED4(HasCandidate, candidates_, "relay", "ssltcp",
kRelaySslTcpIntAddr);
EXPECT_PRED4(HasCandidate, candidates_, "relay", "udp", kRelayUdpExtAddr);
// We wait at least for a full STUN timeout, which is currently 9.5
// seconds. But since 3-3.5 seconds already passed (see above), we
// only need 6.5 more seconds.
EXPECT_TRUE_SIMULATED_WAIT(candidate_allocation_done_, 6500, fake_clock);
// We wait at least for a full STUN timeout, which
// cricket::STUN_TOTAL_TIMEOUT seconds. But since 3-3.5 seconds
// already passed (see above), we wait 3 seconds less than that.
EXPECT_TRUE_SIMULATED_WAIT(candidate_allocation_done_,
cricket::STUN_TOTAL_TIMEOUT - 3000, fake_clock);
}
TEST_F(BasicPortAllocatorTest, TestCandidatePriorityOfMultipleInterfaces) {

View File

@ -106,8 +106,6 @@ static const char kMediaContentName1[] = "video";
static const int kDefaultTimeout = 10000; // 10 seconds.
static const int kIceCandidatesTimeout = 10000;
// STUN timeout with all retransmissions is a total of 9500ms.
static const int kStunTimeout = 9500;
static const char kFakeDtlsFingerprint[] =
"BB:CD:72:F7:2F:D0:BA:43:F3:68:B1:0C:23:72:B6:4A:"
@ -1593,7 +1591,8 @@ TEST_F(WebRtcSessionTest, TestStunError) {
SendAudioVideoStream1();
InitiateCall();
// Since kClientAddrHost1 is blocked, not expecting stun candidates for it.
EXPECT_TRUE_SIMULATED_WAIT(observer_.oncandidatesready_, kStunTimeout, clock);
EXPECT_TRUE_SIMULATED_WAIT(observer_.oncandidatesready_,
cricket::STUN_TOTAL_TIMEOUT, clock);
EXPECT_EQ(6u, observer_.mline_0_candidates_.size());
EXPECT_EQ(6u, observer_.mline_1_candidates_.size());
// Destroy session before scoped fake clock goes out of scope to avoid TSan