| #include <iostream> |
| #include <vector> |
| #include <cmath> |
| #include <iomanip> |
|
|
| |
| #include "wav.h" |
| #include "time_stamp.h" |
| #include "vad_iterator.h" |
|
|
|
|
| int main(int argc, char* argv[]) { |
| if (argc < 3) { |
| std::cerr << "Usage: " << argv[0] << " <model_absolute_path>" << " <audio_file_absolute_path>" << std::endl; |
| return 1; |
| } |
|
|
| |
| std::string model_path = argv[1]; |
| std::string wav_path = argv[2]; |
|
|
| |
| |
| |
| |
| wav::WavReader wav_reader(wav_path); |
| int numSamples = wav_reader.num_samples(); |
| std::vector<float> input_wav(static_cast<size_t>(numSamples)); |
| for (size_t i = 0; i < static_cast<size_t>(numSamples); i++) { |
| input_wav[i] = static_cast<float>(*(wav_reader.data() + i)); |
| } |
| |
| VadIterator vad(model_path); |
|
|
| |
| vad.process(input_wav); |
|
|
| |
| std::vector<timestamp_t> stamps = vad.get_speech_timestamps(); |
|
|
| |
| const float sample_rate_float = 16000.0f; |
| for (size_t i = 0; i < stamps.size(); i++) { |
| float start_sec = std::rint((stamps[i].start / sample_rate_float) * 10.0f) / 10.0f; |
| float end_sec = std::rint((stamps[i].end / sample_rate_float) * 10.0f) / 10.0f; |
| std::cout << "Speech detected from " |
| << std::fixed << std::setprecision(1) << start_sec |
| << " s to " |
| << std::fixed << std::setprecision(1) << end_sec |
| << " s" |
| << " [ " << stamps[i].start << " " << stamps[i].end <<" ]" |
| << std::endl; |
| } |
|
|
| |
| vad.reset(); |
|
|
| return 0; |
| } |