Spaces:
Sleeping
Sleeping
File size: 2,097 Bytes
165d1b6 5deec0c d20c2c8 165d1b6 5deec0c 6a85bff 5deec0c 6a85bff 5deec0c 9e67bc2 5deec0c 165d1b6 5deec0c 6a85bff 165d1b6 5deec0c 165d1b6 6a85bff 165d1b6 b06194e e8560e7 165d1b6 b06194e 165d1b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
package main
import (
"flag"
"fmt"
"log"
"os"
"github.com/streamer45/silero-vad-go/speech"
"github.com/go-audio/wav"
)
func main() {
modelPathPtr := flag.String("model_path", "./pretrained_models/silero_vad/silero_vad.onnx", "silero vad onnx model")
filenamePtr := flag.String("filename", "", "input wav audio file")
silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it")
speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side")
sampleRatePtr := flag.Int("sample_rate", 8000, "sample rate")
flag.Parse()
var modelPath string = *modelPathPtr
var filename string = *filenamePtr
var silenceTime float32 = float32(*silenceTimePtr)
var speechPadTime float32 = float32(*speechPadTimePtr)
var sampleRate int = float32(*sampleRatePtr)
fmt.Println(filename)
var silenceTimeMs int = int(silenceTime * 1e3)
var speechPadTimeMs int = int(speechPadTime * 1e3)
sd, err := speech.NewDetector(speech.DetectorConfig{
ModelPath: modelPath,
SampleRate: sampleRate,
Threshold: 0.5,
MinSilenceDurationMs: silenceTimeMs,
SpeechPadMs: speechPadTimeMs,
})
if err != nil {
log.Fatalf("failed to create speech detector: %s", err)
}
f, err := os.Open(filename)
if err != nil {
log.Fatalf("failed to open sample audio file: %s", err)
}
defer f.Close()
dec := wav.NewDecoder(f)
if ok := dec.IsValidFile(); !ok {
log.Fatalf("invalid WAV file")
}
buf, err := dec.FullPCMBuffer()
if err != nil {
log.Fatalf("failed to get PCM buffer")
}
pcmBuf := buf.AsFloat32Buffer()
segments, err := sd.Detect(pcmBuf.Data)
if err != nil {
log.Fatalf("Detect failed: %s", err)
}
for _, s := range segments {
fmt.Printf("speech starts at %0.2fs\n", s.SpeechStartAt)
if s.SpeechEndAt > 0 {
fmt.Printf("speech ends at %0.2fs\n", s.SpeechEndAt)
}
}
err = sd.Destroy()
if err != nil {
log.Fatalf("failed to destroy detector: %s", err)
}
}
|