Spaces:
Sleeping
Sleeping
package main | |
import ( | |
"flag" | |
"fmt" | |
"log" | |
"os" | |
"github.com/streamer45/silero-vad-go/speech" | |
"github.com/go-audio/wav" | |
) | |
func main() { | |
modelPathPtr := flag.String("model_path", "./pretrained_models/silero_vad/silero_vad.onnx", "silero vad onnx model") | |
filenamePtr := flag.String("filename", "", "input wav audio file") | |
silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it") | |
speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side") | |
sampleRatePtr := flag.Int("sample_rate", 8000, "sample rate") | |
flag.Parse() | |
var modelPath string = *modelPathPtr | |
var filename string = *filenamePtr | |
var silenceTime float32 = float32(*silenceTimePtr) | |
var speechPadTime float32 = float32(*speechPadTimePtr) | |
var sampleRate int = float32(*sampleRatePtr) | |
fmt.Println(filename) | |
var silenceTimeMs int = int(silenceTime * 1e3) | |
var speechPadTimeMs int = int(speechPadTime * 1e3) | |
sd, err := speech.NewDetector(speech.DetectorConfig{ | |
ModelPath: modelPath, | |
SampleRate: sampleRate, | |
Threshold: 0.5, | |
MinSilenceDurationMs: silenceTimeMs, | |
SpeechPadMs: speechPadTimeMs, | |
}) | |
if err != nil { | |
log.Fatalf("failed to create speech detector: %s", err) | |
} | |
f, err := os.Open(filename) | |
if err != nil { | |
log.Fatalf("failed to open sample audio file: %s", err) | |
} | |
defer f.Close() | |
dec := wav.NewDecoder(f) | |
if ok := dec.IsValidFile(); !ok { | |
log.Fatalf("invalid WAV file") | |
} | |
buf, err := dec.FullPCMBuffer() | |
if err != nil { | |
log.Fatalf("failed to get PCM buffer") | |
} | |
pcmBuf := buf.AsFloat32Buffer() | |
segments, err := sd.Detect(pcmBuf.Data) | |
if err != nil { | |
log.Fatalf("Detect failed: %s", err) | |
} | |
for _, s := range segments { | |
fmt.Printf("speech starts at %0.2fs\n", s.SpeechStartAt) | |
if s.SpeechEndAt > 0 { | |
fmt.Printf("speech ends at %0.2fs\n", s.SpeechEndAt) | |
} | |
} | |
err = sd.Destroy() | |
if err != nil { | |
log.Fatalf("failed to destroy detector: %s", err) | |
} | |
} | |