HoneyTian commited on
Commit
8e74df8
1 Parent(s): 5d7199c
Files changed (2) hide show
  1. main.py +2 -2
  2. silero_vad/main.go +3 -1
main.py CHANGED
@@ -66,8 +66,8 @@ def run_silero_vad(filename: str, silence_time: float = 0.3, longest_activate: f
66
 
67
  main_logger.info("do silero vad: {}".format(filename))
68
 
69
- cmd = "vad_bins/silero_vad {}".format(
70
- filename
71
  )
72
  raw_vad_result = Command.popen(cmd)
73
 
 
66
 
67
  main_logger.info("do silero vad: {}".format(filename))
68
 
69
+ cmd = "vad_bins/silero_vad --filename {} --silence_time {}".format(
70
+ filename, silence_time
71
  )
72
  raw_vad_result = Command.popen(cmd)
73
 
silero_vad/main.go CHANGED
@@ -15,6 +15,7 @@ func main() {
15
  silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it")
16
  speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side")
17
  sampleRatePtr := flag.Uint64("sample_rate", 8000, "sample rate")
 
18
  flag.Parse()
19
 
20
  var modelPath string = *modelPathPtr
@@ -22,6 +23,7 @@ func main() {
22
  var silenceTime float32 = float32(*silenceTimePtr)
23
  var speechPadTime float32 = float32(*speechPadTimePtr)
24
  var sampleRate int = int(*sampleRatePtr)
 
25
  fmt.Println(filename)
26
 
27
  var silenceTimeMs int = int(silenceTime * 1e3)
@@ -30,7 +32,7 @@ func main() {
30
  sd, err := speech.NewDetector(speech.DetectorConfig{
31
  ModelPath: modelPath,
32
  SampleRate: sampleRate,
33
- Threshold: 0.5,
34
  MinSilenceDurationMs: silenceTimeMs,
35
  SpeechPadMs: speechPadTimeMs,
36
  })
 
15
  silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it")
16
  speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side")
17
  sampleRatePtr := flag.Uint64("sample_rate", 8000, "sample rate")
18
+ thresholdPtr := flag.Float64("threshold", 0.5, "Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH. It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.")
19
  flag.Parse()
20
 
21
  var modelPath string = *modelPathPtr
 
23
  var silenceTime float32 = float32(*silenceTimePtr)
24
  var speechPadTime float32 = float32(*speechPadTimePtr)
25
  var sampleRate int = int(*sampleRatePtr)
26
+ var threshold float32 = float32(*thresholdPtr)
27
  fmt.Println(filename)
28
 
29
  var silenceTimeMs int = int(silenceTime * 1e3)
 
32
  sd, err := speech.NewDetector(speech.DetectorConfig{
33
  ModelPath: modelPath,
34
  SampleRate: sampleRate,
35
+ Threshold: threshold,
36
  MinSilenceDurationMs: silenceTimeMs,
37
  SpeechPadMs: speechPadTimeMs,
38
  })