Remove activity_metric tool.

All the changes to these code are migration or codebase maintenance changes, the tool seems unused so instead of migrating it to ABSL_FLAG this CL removes the code. As agreed on https://webrtc-review.googlesource.com/c/src/+/143967/1#message-067b02363890435e52646dcfdf967df745ac37f1. Bug: None Change-Id: I57912541b2c0d0f49e2b862184db335f59def0e7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/144620 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28484}
2019-07-04 11:40:44 +02:00 · 2019-07-04 11:40:44 +02:00 · 8e3cb53892
commit 8e3cb53892
parent 543179e527
2 changed files with 1 additions and 417 deletions
--- a/rtc_tools/BUILD.gn
+++ b/rtc_tools/BUILD.gn
@ -31,10 +31,7 @@ group("rtc_tools") {
  }

  if (rtc_include_tests) {
-    deps += [
-      ":activity_metric",
-      ":tools_unittests",
-    ]
+    deps += [ ":tools_unittests" ]
    if (rtc_enable_protobuf) {
      if (!build_with_chromium) {
        deps += [ ":event_log_visualizer" ]
@ -362,23 +359,6 @@ if (rtc_include_tests) {
    }
  }

-  rtc_executable("activity_metric") {
-    testonly = true
-    sources = [
-      "agc/activity_metric.cc",
-    ]
-
-    deps = [
-      "../api/audio:audio_frame_api",
-      "../modules/audio_processing/agc:level_estimation",
-      "../modules/audio_processing/vad",
-      "../rtc_base:rtc_base_approved",
-      "../rtc_base:safe_minmax",
-      "../test:test_support",
-      "//testing/gtest",
-    ]
-  }
-
  tools_unittests_resources = [
    "../resources/foreman_128x96.yuv",
    "../resources/foreman_cif.yuv",
--- a/rtc_tools/agc/activity_metric.cc
+++ b/rtc_tools/agc/activity_metric.cc
@ -1,396 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <iostream>
-#include <memory>
-
-#include "api/audio/audio_frame.h"
-#include "modules/audio_processing/agc/loudness_histogram.h"
-#include "modules/audio_processing/vad/common.h"
-#include "modules/audio_processing/vad/pitch_based_vad.h"
-#include "modules/audio_processing/vad/standalone_vad.h"
-#include "modules/audio_processing/vad/vad_audio_proc.h"
-#include "rtc_base/flags.h"
-#include "rtc_base/numerics/safe_minmax.h"
-#include "test/gtest.h"
-
-static const int kAgcAnalWindowSamples = 100;
-static const float kDefaultActivityThreshold = 0.3f;
-
-WEBRTC_DEFINE_bool(standalone_vad, true, "enable stand-alone VAD");
-WEBRTC_DEFINE_string(true_vad,
-                     "",
-                     "name of a file containing true VAD in 'int'"
-                     " format");
-WEBRTC_DEFINE_string(
-    video_vad,
-    "",
-    "name of a file containing video VAD (activity"
-    " probabilities) in double format. One activity per 10ms is"
-    " required. If no file is given the video information is not"
-    " incorporated. Negative activity is interpreted as video is"
-    " not adapted and the statistics are not computed during"
-    " the learning phase. Note that the negative video activities"
-    " are ONLY allowed at the beginning.");
-WEBRTC_DEFINE_string(
-    result,
-    "",
-    "name of a file to write the results. The results"
-    " will be appended to the end of the file. This is optional.");
-WEBRTC_DEFINE_string(audio_content,
-                     "",
-                     "name of a file where audio content is written"
-                     " to, in double format.");
-WEBRTC_DEFINE_float(activity_threshold,
-                    kDefaultActivityThreshold,
-                    "Activity threshold");
-WEBRTC_DEFINE_bool(help, false, "prints this message");
-
-namespace webrtc {
-
-// TODO(turajs) A new CL will be committed soon where ExtractFeatures will
-// notify the caller of "silence" input, instead of bailing out. We would not
-// need the following function when such a change is made.
-
-// Add some dither to quiet frames. This avoids the ExtractFeatures skip a
-// silence frame. Otherwise true VAD would drift with respect to the audio.
-// We only consider mono inputs.
-static void DitherSilence(AudioFrame* frame) {
-  ASSERT_EQ(1u, frame->num_channels_);
-  const double kRmsSilence = 5;
-  const double sum_squared_silence =
-      kRmsSilence * kRmsSilence * frame->samples_per_channel_;
-  double sum_squared = 0;
-  int16_t* frame_data = frame->mutable_data();
-  for (size_t n = 0; n < frame->samples_per_channel_; n++)
-    sum_squared += frame_data[n] * frame_data[n];
-  if (sum_squared <= sum_squared_silence) {
-    for (size_t n = 0; n < frame->samples_per_channel_; n++)
-      frame_data[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
-  }
-}
-
-class AgcStat {
- public:
-  AgcStat()
-      : video_index_(0),
-        activity_threshold_(kDefaultActivityThreshold),
-        audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)),
-        audio_processing_(new VadAudioProc()),
-        vad_(new PitchBasedVad()),
-        standalone_vad_(StandaloneVad::Create()),
-        audio_content_fid_(NULL) {
-    for (size_t n = 0; n < kMaxNumFrames; n++)
-      video_vad_[n] = 0.5;
-  }
-
-  ~AgcStat() {
-    if (audio_content_fid_ != NULL) {
-      fclose(audio_content_fid_);
-    }
-  }
-
-  void set_audio_content_file(FILE* audio_content_fid) {
-    audio_content_fid_ = audio_content_fid;
-  }
-
-  int AddAudio(const AudioFrame& frame, double p_video, int* combined_vad) {
-    if (frame.num_channels_ != 1 ||
-        frame.samples_per_channel_ != kSampleRateHz / 100 ||
-        frame.sample_rate_hz_ != kSampleRateHz)
-      return -1;
-    video_vad_[video_index_++] = p_video;
-    AudioFeatures features;
-    const int16_t* frame_data = frame.data();
-    audio_processing_->ExtractFeatures(frame_data, frame.samples_per_channel_,
-                                       &features);
-    if (FLAG_standalone_vad) {
-      standalone_vad_->AddAudio(frame_data, frame.samples_per_channel_);
-    }
-    if (features.num_frames > 0) {
-      double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};
-      if (FLAG_standalone_vad) {
-        standalone_vad_->GetActivity(p, kMaxNumFrames);
-      }
-      // TODO(turajs) combining and limiting are used in the source files as
-      // well they can be moved to utility.
-      // Combine Video and stand-alone VAD.
-      for (size_t n = 0; n < features.num_frames; n++) {
-        double p_active = p[n] * video_vad_[n];
-        double p_passive = (1 - p[n]) * (1 - video_vad_[n]);
-        p[n] = rtc::SafeClamp(p_active / (p_active + p_passive), 0.01, 0.99);
-      }
-      if (vad_->VoicingProbability(features, p) < 0)
-        return -1;
-      for (size_t n = 0; n < features.num_frames; n++) {
-        audio_content_->Update(features.rms[n], p[n]);
-        double ac = audio_content_->AudioContent();
-        if (audio_content_fid_ != NULL) {
-          fwrite(&ac, sizeof(ac), 1, audio_content_fid_);
-        }
-        if (ac > kAgcAnalWindowSamples * activity_threshold_) {
-          combined_vad[n] = 1;
-        } else {
-          combined_vad[n] = 0;
-        }
-      }
-      video_index_ = 0;
-    }
-    return static_cast<int>(features.num_frames);
-  }
-
-  void Reset() { audio_content_->Reset(); }
-
-  void SetActivityThreshold(double activity_threshold) {
-    activity_threshold_ = activity_threshold;
-  }
-
- private:
-  int video_index_;
-  double activity_threshold_;
-  double video_vad_[kMaxNumFrames];
-  std::unique_ptr<LoudnessHistogram> audio_content_;
-  std::unique_ptr<VadAudioProc> audio_processing_;
-  std::unique_ptr<PitchBasedVad> vad_;
-  std::unique_ptr<StandaloneVad> standalone_vad_;
-
-  FILE* audio_content_fid_;
-};
-
-void void_main(int argc, char* argv[]) {
-  webrtc::AgcStat agc_stat;
-
-  FILE* pcm_fid = fopen(argv[1], "rb");
-  ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1];
-
-  if (argc < 2) {
-    fprintf(stderr, "\nNot Enough arguments\n");
-  }
-
-  FILE* true_vad_fid = NULL;
-  ASSERT_GT(strlen(FLAG_true_vad), 0u) << "Specify the file containing true "
-                                          "VADs using --true_vad flag.";
-  true_vad_fid = fopen(FLAG_true_vad, "rb");
-  ASSERT_TRUE(true_vad_fid != NULL)
-      << "Cannot open the active list " << FLAG_true_vad;
-
-  FILE* results_fid = NULL;
-  if (strlen(FLAG_result) > 0) {
-    // True if this is the first time writing to this function and we add a
-    // header to the beginning of the file.
-    bool write_header;
-    // Open in the read mode. If it fails, the file doesn't exist and has to
-    // write a header for it. Otherwise no need to write a header.
-    results_fid = fopen(FLAG_result, "r");
-    if (results_fid == NULL) {
-      write_header = true;
-    } else {
-      fclose(results_fid);
-      write_header = false;
-    }
-    // Open in append mode.
-    results_fid = fopen(FLAG_result, "a");
-    ASSERT_TRUE(results_fid != NULL)
-        << "Cannot open the file, " << FLAG_result << ", to write the results.";
-    // Write the header if required.
-    if (write_header) {
-      fprintf(results_fid,
-              "%% Total Active,  Misdetection,  "
-              "Total inactive,  False Positive,  On-sets,  Missed segments,  "
-              "Average response\n");
-    }
-  }
-
-  FILE* video_vad_fid = NULL;
-  if (strlen(FLAG_video_vad) > 0) {
-    video_vad_fid = fopen(FLAG_video_vad, "rb");
-    ASSERT_TRUE(video_vad_fid != NULL)
-        << "Cannot open the file, " << FLAG_video_vad
-        << " to read video-based VAD decisions.\n";
-  }
-
-  // AgsStat will be the owner of this file and will close it at its
-  // destructor.
-  FILE* audio_content_fid = NULL;
-  if (strlen(FLAG_audio_content) > 0) {
-    audio_content_fid = fopen(FLAG_audio_content, "wb");
-    ASSERT_TRUE(audio_content_fid != NULL)
-        << "Cannot open file, " << FLAG_audio_content
-        << " to write audio-content.\n";
-    agc_stat.set_audio_content_file(audio_content_fid);
-  }
-
-  webrtc::AudioFrame frame;
-  frame.num_channels_ = 1;
-  frame.sample_rate_hz_ = 16000;
-  frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;
-  const size_t kSamplesToRead =
-      frame.num_channels_ * frame.samples_per_channel_;
-
-  agc_stat.SetActivityThreshold(FLAG_activity_threshold);
-
-  int ret_val = 0;
-  int num_frames = 0;
-  int agc_vad[kMaxNumFrames];
-  uint8_t true_vad[kMaxNumFrames];
-  double p_video = 0.5;
-  int total_active = 0;
-  int total_passive = 0;
-  int total_false_positive = 0;
-  int total_missed_detection = 0;
-  int onset_adaptation = 0;
-  int num_onsets = 0;
-  bool onset = false;
-  uint8_t previous_true_vad = 0;
-  int num_not_adapted = 0;
-  size_t true_vad_index = 0;
-  bool in_false_positive_region = false;
-  int total_false_positive_duration = 0;
-  bool video_adapted = false;
-  while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),
-                                 kSamplesToRead, pcm_fid)) {
-    assert(true_vad_index < kMaxNumFrames);
-    ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
-                        true_vad_fid))
-        << "Size mismatch between True-VAD and the PCM file.\n";
-    if (video_vad_fid != NULL) {
-      ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid))
-          << "Not enough video-based VAD probabilities.";
-    }
-
-    // Negative video activity indicates that the video-based VAD is not yet
-    // adapted. Disregards the learning phase in statistics.
-    if (p_video < 0) {
-      if (video_adapted) {
-        fprintf(stderr,
-                "Negative video probabilities ONLY allowed at the "
-                "beginning of the sequence, not in the middle.\n");
-        exit(1);
-      }
-      continue;
-    } else {
-      video_adapted = true;
-    }
-
-    num_frames++;
-    uint8_t last_true_vad;
-    if (true_vad_index == 0) {
-      last_true_vad = previous_true_vad;
-    } else {
-      last_true_vad = true_vad[true_vad_index - 1];
-    }
-    if (last_true_vad == 1 && true_vad[true_vad_index] == 0) {
-      agc_stat.Reset();
-    }
-    true_vad_index++;
-
-    DitherSilence(&frame);
-
-    ret_val = agc_stat.AddAudio(frame, p_video, agc_vad);
-    ASSERT_GE(ret_val, 0);
-
-    if (ret_val > 0) {
-      ASSERT_EQ(true_vad_index, static_cast<size_t>(ret_val));
-      for (int n = 0; n < ret_val; n++) {
-        if (true_vad[n] == 1) {
-          total_active++;
-          if (previous_true_vad == 0) {
-            num_onsets++;
-            onset = true;
-          }
-          if (agc_vad[n] == 0) {
-            total_missed_detection++;
-            if (onset)
-              onset_adaptation++;
-          } else {
-            in_false_positive_region = false;
-            onset = false;
-          }
-        } else if (true_vad[n] == 0) {
-          // Check if |on_set| flag is still up. If so it means that we totally
-          // missed an active region
-          if (onset)
-            num_not_adapted++;
-          onset = false;
-
-          total_passive++;
-          if (agc_vad[n] == 1) {
-            total_false_positive++;
-            in_false_positive_region = true;
-          }
-          if (in_false_positive_region) {
-            total_false_positive_duration++;
-          }
-        } else {
-          ASSERT_TRUE(false) << "Invalid value for true-VAD.\n";
-        }
-        previous_true_vad = true_vad[n];
-      }
-      true_vad_index = 0;
-    }
-  }
-
-  if (results_fid != NULL) {
-    fprintf(results_fid, "%4d  %4d  %4d  %4d  %4d  %4d  %4.0f %4.0f\n",
-            total_active, total_missed_detection, total_passive,
-            total_false_positive, num_onsets, num_not_adapted,
-            static_cast<float>(onset_adaptation) / (num_onsets + 1e-12),
-            static_cast<float>(total_false_positive_duration) /
-                (total_passive + 1e-12));
-  }
-  fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", total_active,
-          total_missed_detection, total_passive, total_false_positive,
-          num_onsets, num_not_adapted,
-          static_cast<float>(onset_adaptation) / (num_onsets + 1e-12),
-          static_cast<float>(total_false_positive_duration) /
-              (total_passive + 1e-12));
-
-  fclose(true_vad_fid);
-  fclose(pcm_fid);
-  if (video_vad_fid != NULL) {
-    fclose(video_vad_fid);
-  }
-  if (results_fid != NULL) {
-    fclose(results_fid);
-  }
-}
-
-}  // namespace webrtc
-
-int main(int argc, char* argv[]) {
-  if (argc == 1) {
-    // Print usage information.
-    std::cout
-        << "\nCompute the number of misdetected and false-positive frames. "
-           "Not\n"
-           " that for each frame of audio (10 ms) there should be one true\n"
-           " activity. If any video-based activity is given, there should also "
-           "be\n"
-           " one probability per frame.\n"
-           "Run with --help for more details on available flags.\n"
-           "\nUsage:\n\n"
-           "activity_metric input_pcm [options]\n"
-           "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "
-           "format.\n\n";
-    return 0;
-  }
-  rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true);
-  if (FLAG_help) {
-    rtc::FlagList::Print(nullptr, false);
-    return 0;
-  }
-  webrtc::void_main(argc, argv);
-  return 0;
-}