diff --git a/src/common_audio/vad/vad_core.c b/src/common_audio/vad/vad_core.c
index 7516c7c493..133f6f1ed9 100644
--- a/src/common_audio/vad/vad_core.c
+++ b/src/common_audio/vad/vad_core.c
@@ -115,27 +115,29 @@ static int32_t WeightedAverage(int16_t* data, int16_t offset,
 // type of signal is most probable.
 //
 // - self           [i/o] : Pointer to VAD instance
-// - feature_vector [i]   : Feature vector = log10(energy in frequency band)
+// - features       [i]   : Feature vector of length |kNumChannels|
+//                          = log10(energy in frequency band)
 // - total_power    [i]   : Total power in audio frame.
 // - frame_length   [i]   : Number of input samples
 //
 // - returns              : the VAD decision (0 - noise, 1 - speech).
-static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
+static int16_t GmmProbability(VadInstT* self, int16_t* features,
                               int16_t total_power, int frame_length) {
-  int n, k;
+  int channel, k;
   int16_t feature_minimum;
   int16_t h0, h1;
   int16_t log_likelihood_ratio;
   int16_t vadflag = 0;
-  int16_t shifts0, shifts1;
+  int16_t shifts_h0, shifts_h1;
   int16_t tmp_s16, tmp1_s16, tmp2_s16;
   int16_t diff;
-  int nr, pos;
+  int gaussian;
   int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
   int16_t delt, ndelt;
   int16_t maxspe, maxmu;
   int16_t deltaN[kTableSize], deltaS[kTableSize];
-  int16_t ngprvec[kTableSize], sgprvec[kTableSize];
+  int16_t ngprvec[kTableSize] = { 0 };  // Conditional probability = 0.
+  int16_t sgprvec[kTableSize] = { 0 };  // Conditional probability = 0.
   int32_t h0_test, h1_test;
   int32_t tmp1_s32, tmp2_s32;
   int32_t sum_log_likelihood_ratios = 0;
@@ -162,109 +164,126 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
   }
 
   if (total_power > kMinEnergy) {
-    // We have a signal present.
+    // The signal power of current frame is large enough for processing. The
+    // processing consists of two parts:
+    // 1) Calculating the likelihood of speech and thereby a VAD decision.
+    // 2) Updating the underlying model, w.r.t., the decision made.
 
-    for (n = 0; n < kNumChannels; n++) {
-      // Perform for all channels.
-      pos = (n << 1);
+    // The detection scheme is an LRT with hypothesis
+    // H0: Noise
+    // H1: Speech
+    //
+    // We combine a global LRT with local tests, for each frequency sub-band,
+    // here defined as |channel|.
+    for (channel = 0; channel < kNumChannels; channel++) {
+      // For each channel we model the probability with a GMM consisting of
+      // |kNumGaussians|, with different means and standard deviations depending
+      // on H0 or H1.
       h0_test = 0;
       h1_test = 0;
-
       for (k = 0; k < kNumGaussians; k++) {
-        nr = n + k * kNumChannels;
-        // Probability for Noise, Q7 * Q20 = Q27.
-        tmp1_s32 = WebRtcVad_GaussianProbability(feature_vector[n],
-                                                 self->noise_means[nr],
-                                                 self->noise_stds[nr],
-                                                 &deltaN[pos + k]);
-        noise_probability[k] = kNoiseDataWeights[nr] * tmp1_s32;
+        gaussian = channel + k * kNumChannels;
+        // Probability under H0, that is, probability of frame being noise.
+        // Value given in Q27 = Q7 * Q20.
+        tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
+                                                 self->noise_means[gaussian],
+                                                 self->noise_stds[gaussian],
+                                                 &deltaN[gaussian]);
+        noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32;
         h0_test += noise_probability[k];  // Q27
 
-        // Probability for Speech.
-        tmp1_s32 = WebRtcVad_GaussianProbability(feature_vector[n],
-                                                 self->speech_means[nr],
-                                                 self->speech_stds[nr],
-                                                 &deltaS[pos + k]);
-        speech_probability[k] = kSpeechDataWeights[nr] * tmp1_s32;
+        // Probability under H1, that is, probability of frame being speech.
+        // Value given in Q27 = Q7 * Q20.
+        tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
+                                                 self->speech_means[gaussian],
+                                                 self->speech_stds[gaussian],
+                                                 &deltaS[gaussian]);
+        speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32;
         h1_test += speech_probability[k];  // Q27
       }
-      h0 = (int16_t) (h0_test >> 12);  // Q15
-      h1 = (int16_t) (h1_test >> 12);  // Q15
 
-      // Calculate the log likelihood ratio. Approximate log2(H1/H0) with
-      // |shifts0| - |shifts1|.
-      shifts0 = WebRtcSpl_NormW32(h0_test);
-      shifts1 = WebRtcSpl_NormW32(h1_test);
-
-      if ((h0_test > 0) && (h1_test > 0)) {
-        log_likelihood_ratio = shifts0 - shifts1;
-      } else if (h1_test > 0) {
-        log_likelihood_ratio = 31 - shifts1;
-      } else if (h0_test > 0) {
-        log_likelihood_ratio = shifts0 - 31;
-      } else {
-        log_likelihood_ratio = 0;
+      // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}).
+      // Approximation:
+      // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q)
+      //                           = log2(h1_test) - log2(h0_test)
+      //                           = log2(2^(31-shifts_h1)*(1+b1))
+      //                             - log2(2^(31-shifts_h0)*(1+b0))
+      //                           = shifts_h0 - shifts_h1
+      //                             + log2(1+b1) - log2(1+b0)
+      //                          ~= shifts_h0 - shifts_h1
+      //
+      // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1.
+      // Further, b0 and b1 are independent and on the average the two terms
+      // cancel.
+      shifts_h0 = WebRtcSpl_NormW32(h0_test);
+      shifts_h1 = WebRtcSpl_NormW32(h1_test);
+      if (h0_test == 0) {
+        shifts_h0 = 31;
       }
+      if (h1_test == 0) {
+        shifts_h1 = 31;
+      }
+      log_likelihood_ratio = shifts_h0 - shifts_h1;
 
-      // VAD decision with spectrum weighting.
-      sum_log_likelihood_ratios += WEBRTC_SPL_MUL_16_16(log_likelihood_ratio,
-                                                        kSpectrumWeight[n]);
+      // Update |sum_log_likelihood_ratios| with spectrum weighting. This is
+      // used for the global VAD decision.
+      sum_log_likelihood_ratios +=
+          (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
 
-      // Individual channel test.
+      // Local VAD decision.
       if ((log_likelihood_ratio << 2) > individualTest) {
         vadflag = 1;
       }
 
-      // Probabilities used when updating model.
+      // TODO(bjornv): The conditional probabilities below are applied on the
+      // hard coded number of Gaussians set to two. Find a way to generalize.
+      // Calculate local noise probabilities used later when updating the GMM.
+      h0 = (int16_t) (h0_test >> 12);  // Q15
       if (h0 > 0) {
-        tmp1_s32 = noise_probability[0] & 0xFFFFF000;  // Q27
-        tmp2_s32 = (tmp1_s32 << 2);  // Q29
-        ngprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, h0);  // Q14
-        ngprvec[pos + 1] = 16384 - ngprvec[pos];
+        // High probability of noise. Assign conditional probabilities for each
+        // Gaussian in the GMM.
+        tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2;  // Q29
+        ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0);  // Q14
+        ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel];
       } else {
-        ngprvec[pos] = 16384;
-        ngprvec[pos + 1] = 0;
+        // Low noise probability. Assign conditional probability 1 to the first
+        // Gaussian and 0 to the rest (which is already set at initialization).
+        ngprvec[channel] = 16384;
       }
 
-      // Probabilities used when updating model.
+      // Calculate local speech probabilities used later when updating the GMM.
+      h1 = (int16_t) (h1_test >> 12);  // Q15
       if (h1 > 0) {
-        tmp1_s32 = speech_probability[0] & 0xFFFFF000;
-        tmp2_s32 = (tmp1_s32 << 2);
-        sgprvec[pos] = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, h1);
-        sgprvec[pos + 1] = 16384 - sgprvec[pos];
-      } else {
-        sgprvec[pos] = 0;
-        sgprvec[pos + 1] = 0;
+        // High probability of speech. Assign conditional probabilities for each
+        // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0.
+        tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2;  // Q29
+        sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1);  // Q14
+        sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel];
       }
     }
 
-    // Overall test.
-    if (sum_log_likelihood_ratios >= totalTest) {
-      vadflag |= 1;
-    }
-
-    maxspe = 12800;
+    // Make a global VAD decision.
+    vadflag |= (sum_log_likelihood_ratios >= totalTest);
 
     // Update the model parameters.
-    for (n = 0; n < kNumChannels; n++) {
-      pos = (n << 1);
+    maxspe = 12800;
+    for (channel = 0; channel < kNumChannels; channel++) {
 
       // Get minimum value in past which is used for long term correction in Q4.
-      feature_minimum = WebRtcVad_FindMinimum(self, feature_vector[n], n);
+      feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel);
 
       // Compute the "global" mean, that is the sum of the two means weighted.
-      noise_global_mean = WeightedAverage(&self->noise_means[n], 0,
-                                          &kNoiseDataWeights[n]);
+      noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
+                                          &kNoiseDataWeights[channel]);
       tmp1_s16 = (int16_t) (noise_global_mean >> 6);  // Q8
 
       for (k = 0; k < kNumGaussians; k++) {
-        int current_gaussian = n + k * kNumChannels;
-        nr = pos + k;
+        gaussian = channel + k * kNumChannels;
 
-        nmk = self->noise_means[current_gaussian];
-        smk = self->speech_means[current_gaussian];
-        nsk = self->noise_stds[current_gaussian];
-        ssk = self->speech_stds[current_gaussian];
+        nmk = self->noise_means[gaussian];
+        smk = self->speech_means[gaussian];
+        nsk = self->noise_stds[gaussian];
+        ssk = self->speech_stds[gaussian];
 
         // Update noise mean vector if the frame consists of noise only.
         nmk2 = nmk;
@@ -274,7 +293,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
           //   (|noise_probability[0]| + |noise_probability[1]|)
 
           // (Q14 * Q11 >> 11) = Q14.
-          delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr], deltaN[nr],
+          delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[gaussian],
+                                                     deltaN[gaussian],
                                                      11);
           // Q7 + (Q14 * Q15 >> 22) = Q7.
           nmk2 = nmk + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt,
@@ -293,11 +313,11 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
         if (nmk3 < tmp_s16) {
           nmk3 = tmp_s16;
         }
-        tmp_s16 = (int16_t) ((72 + k - n) << 7);
+        tmp_s16 = (int16_t) ((72 + k - channel) << 7);
         if (nmk3 > tmp_s16) {
           nmk3 = tmp_s16;
         }
-        self->noise_means[current_gaussian] = nmk3;
+        self->noise_means[gaussian] = nmk3;
 
         if (vadflag) {
           // Update speech mean vector:
@@ -306,7 +326,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
           //   (|speech_probability[0]| + |speech_probability[1]|)
 
           // (Q14 * Q11) >> 11 = Q14.
-          delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr], deltaS[nr],
+          delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[gaussian],
+                                                     deltaS[gaussian],
                                                      11);
           // Q14 * Q15 >> 21 = Q8.
           tmp_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt,
@@ -323,20 +344,20 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
           if (smk2 > maxmu) {
             smk2 = maxmu;
           }
-          self->speech_means[current_gaussian] = smk2;  // Q7.
+          self->speech_means[gaussian] = smk2;  // Q7.
 
           // (Q7 >> 3) = Q4. With rounding.
           tmp_s16 = ((smk + 4) >> 3);
 
-          tmp_s16 = feature_vector[n] - tmp_s16;  // Q4
+          tmp_s16 = features[channel] - tmp_s16;  // Q4
           // (Q11 * Q4 >> 3) = Q12.
-          tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp_s16, 3);
+          tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[gaussian], tmp_s16, 3);
           tmp2_s32 = tmp1_s32 - 4096;
-          tmp_s16 = (sgprvec[nr] >> 2);
+          tmp_s16 = sgprvec[gaussian] >> 2;
           // (Q14 >> 2) * Q12 = Q24.
           tmp1_s32 = tmp_s16 * tmp2_s32;
 
-          tmp2_s32 = (tmp1_s32 >> 4);  // Q20
+          tmp2_s32 = tmp1_s32 >> 4;  // Q20
 
           // 0.1 * Q20 / Q7 = Q13.
           if (tmp2_s32 > 0) {
@@ -353,21 +374,22 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
           if (ssk < kMinStd) {
             ssk = kMinStd;
           }
-          self->speech_stds[current_gaussian] = ssk;
+          self->speech_stds[gaussian] = ssk;
         } else {
           // Update GMM variance vectors.
-          // deltaN * (feature_vector[n] - nmk) - 1
+          // deltaN * (features[channel] - nmk) - 1
           // Q4 - (Q7 >> 3) = Q4.
-          tmp_s16 = feature_vector[n] - (nmk >> 3);
+          tmp_s16 = features[channel] - (nmk >> 3);
           // (Q11 * Q4 >> 3) = Q12.
-          tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp_s16, 3) - 4096;
+          tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[gaussian], tmp_s16, 3);
+          tmp1_s32 -= 4096;
 
           // (Q14 >> 2) * Q12 = Q24.
-          tmp_s16 = ((ngprvec[nr] + 2) >> 2);
+          tmp_s16 = (ngprvec[gaussian] + 2) >> 2;
           tmp2_s32 = tmp_s16 * tmp1_s32;
           // Q20  * approx 0.001 (2^-10=0.0009766), hence,
           // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
-          tmp1_s32 = (tmp2_s32 >> 14);
+          tmp1_s32 = tmp2_s32 >> 14;
 
           // Q20 / Q7 = Q13.
           if (tmp1_s32 > 0) {
@@ -377,29 +399,29 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
             tmp_s16 = -tmp_s16;
           }
           tmp_s16 += 32;  // Rounding
-          nsk += (tmp_s16 >> 6);  // Q13 >> 6 = Q7.
+          nsk += tmp_s16 >> 6;  // Q13 >> 6 = Q7.
           if (nsk < kMinStd) {
             nsk = kMinStd;
           }
-          self->noise_stds[current_gaussian] = nsk;
+          self->noise_stds[gaussian] = nsk;
         }
       }
 
       // Separate models if they are too close.
       // |noise_global_mean| in Q14 (= Q7 * Q7).
-      noise_global_mean = WeightedAverage(&self->noise_means[n], 0,
-                                          &kNoiseDataWeights[n]);
+      noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
+                                          &kNoiseDataWeights[channel]);
 
       // |speech_global_mean| in Q14 (= Q7 * Q7).
-      speech_global_mean = WeightedAverage(&self->speech_means[n], 0,
-                                           &kSpeechDataWeights[n]);
+      speech_global_mean = WeightedAverage(&self->speech_means[channel], 0,
+                                           &kSpeechDataWeights[channel]);
 
       // |diff| = "global" speech mean - "global" noise mean.
       // (Q14 >> 9) - (Q14 >> 9) = Q5.
       diff = (int16_t) (speech_global_mean >> 9) -
           (int16_t) (noise_global_mean >> 9);
-      if (diff < kMinimumDifference[n]) {
-        tmp_s16 = kMinimumDifference[n] - diff;
+      if (diff < kMinimumDifference[channel]) {
+        tmp_s16 = kMinimumDifference[channel] - diff;
 
         // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
         // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
@@ -407,36 +429,38 @@ static int16_t GmmProbability(VadInstT* self, int16_t* feature_vector,
         tmp2_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(3, tmp_s16, 2);
 
         // Move Gaussian means for speech model by |tmp1_s16| and update
-        // |speech_global_mean|. Note that |self->speech_means[n]| is changed
-        // after the call.
-        speech_global_mean = WeightedAverage(&self->speech_means[n], tmp1_s16,
-                                             &kSpeechDataWeights[n]);
+        // |speech_global_mean|. Note that |self->speech_means[channel]| is
+        // changed after the call.
+        speech_global_mean = WeightedAverage(&self->speech_means[channel],
+                                             tmp1_s16,
+                                             &kSpeechDataWeights[channel]);
 
         // Move Gaussian means for noise model by -|tmp2_s16| and update
-        // |noise_global_mean|. Note that |self->noise_means[n]| is changed
-        // after the call.
-        noise_global_mean = WeightedAverage(&self->noise_means[n], -tmp2_s16,
-                                            &kNoiseDataWeights[n]);
+        // |noise_global_mean|. Note that |self->noise_means[channel]| is
+        // changed after the call.
+        noise_global_mean = WeightedAverage(&self->noise_means[channel],
+                                            -tmp2_s16,
+                                            &kNoiseDataWeights[channel]);
       }
 
       // Control that the speech & noise means do not drift to much.
-      maxspe = kMaximumSpeech[n];
+      maxspe = kMaximumSpeech[channel];
       tmp2_s16 = (int16_t) (speech_global_mean >> 7);
       if (tmp2_s16 > maxspe) {
         // Upper limit of speech model.
         tmp2_s16 -= maxspe;
 
         for (k = 0; k < kNumGaussians; k++) {
-          self->speech_means[n + k * kNumChannels] -= tmp2_s16;
+          self->speech_means[channel + k * kNumChannels] -= tmp2_s16;
         }
       }
 
       tmp2_s16 = (int16_t) (noise_global_mean >> 7);
-      if (tmp2_s16 > kMaximumNoise[n]) {
-        tmp2_s16 -= kMaximumNoise[n];
+      if (tmp2_s16 > kMaximumNoise[channel]) {
+        tmp2_s16 -= kMaximumNoise[channel];
 
         for (k = 0; k < kNumGaussians; k++) {
-          self->noise_means[n + k * kNumChannels] -= tmp2_s16;
+          self->noise_means[channel + k * kNumChannels] -= tmp2_s16;
         }
       }
     }