package main import ( "flag" "fmt" "log" "os" "github.com/streamer45/silero-vad-go/speech" "github.com/go-audio/wav" ) func main() { modelPathPtr := flag.String("model_path", "./pretrained_models/silero_vad/silero_vad.onnx", "silero vad onnx model") filenamePtr := flag.String("filename", "", "input wav audio file") silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it") speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side") sampleRatePtr := flag.Uint64("sample_rate", 8000, "sample rate") flag.Parse() var modelPath string = *modelPathPtr var filename string = *filenamePtr var silenceTime float32 = float32(*silenceTimePtr) var speechPadTime float32 = float32(*speechPadTimePtr) var sampleRate int = int(*sampleRatePtr) fmt.Println(filename) var silenceTimeMs int = int(silenceTime * 1e3) var speechPadTimeMs int = int(speechPadTime * 1e3) sd, err := speech.NewDetector(speech.DetectorConfig{ ModelPath: modelPath, SampleRate: sampleRate, Threshold: 0.5, MinSilenceDurationMs: silenceTimeMs, SpeechPadMs: speechPadTimeMs, }) if err != nil { log.Fatalf("failed to create speech detector: %s", err) } f, err := os.Open(filename) if err != nil { log.Fatalf("failed to open sample audio file: %s", err) } defer f.Close() dec := wav.NewDecoder(f) if ok := dec.IsValidFile(); !ok { log.Fatalf("invalid WAV file") } buf, err := dec.FullPCMBuffer() if err != nil { log.Fatalf("failed to get PCM buffer") } pcmBuf := buf.AsFloat32Buffer() segments, err := sd.Detect(pcmBuf.Data) if err != nil { log.Fatalf("Detect failed: %s", err) } for _, s := range segments { fmt.Printf("speech starts at %0.2fs\n", s.SpeechStartAt) if s.SpeechEndAt > 0 { fmt.Printf("speech ends at %0.2fs\n", s.SpeechEndAt) } } err = sd.Destroy() if err != nil { log.Fatalf("failed to destroy detector: %s", err) } }