Spaces:
Sleeping
Sleeping
update
Browse files- main.py +2 -2
- silero_vad/main.go +3 -1
main.py
CHANGED
@@ -66,8 +66,8 @@ def run_silero_vad(filename: str, silence_time: float = 0.3, longest_activate: f
|
|
66 |
|
67 |
main_logger.info("do silero vad: {}".format(filename))
|
68 |
|
69 |
-
cmd = "vad_bins/silero_vad {}".format(
|
70 |
-
filename
|
71 |
)
|
72 |
raw_vad_result = Command.popen(cmd)
|
73 |
|
|
|
66 |
|
67 |
main_logger.info("do silero vad: {}".format(filename))
|
68 |
|
69 |
+
cmd = "vad_bins/silero_vad --filename {} --silence_time {}".format(
|
70 |
+
filename, silence_time
|
71 |
)
|
72 |
raw_vad_result = Command.popen(cmd)
|
73 |
|
silero_vad/main.go
CHANGED
@@ -15,6 +15,7 @@ func main() {
|
|
15 |
silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it")
|
16 |
speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side")
|
17 |
sampleRatePtr := flag.Uint64("sample_rate", 8000, "sample rate")
|
|
|
18 |
flag.Parse()
|
19 |
|
20 |
var modelPath string = *modelPathPtr
|
@@ -22,6 +23,7 @@ func main() {
|
|
22 |
var silenceTime float32 = float32(*silenceTimePtr)
|
23 |
var speechPadTime float32 = float32(*speechPadTimePtr)
|
24 |
var sampleRate int = int(*sampleRatePtr)
|
|
|
25 |
fmt.Println(filename)
|
26 |
|
27 |
var silenceTimeMs int = int(silenceTime * 1e3)
|
@@ -30,7 +32,7 @@ func main() {
|
|
30 |
sd, err := speech.NewDetector(speech.DetectorConfig{
|
31 |
ModelPath: modelPath,
|
32 |
SampleRate: sampleRate,
|
33 |
-
Threshold:
|
34 |
MinSilenceDurationMs: silenceTimeMs,
|
35 |
SpeechPadMs: speechPadTimeMs,
|
36 |
})
|
|
|
15 |
silenceTimePtr := flag.Float64("silence_time", 0.1, "in the end of each speech chunk wait for min_silence_duration_ms before separating it")
|
16 |
speechPadTimePtr := flag.Float64("speech_pad_time", 0.03, "final speech chunks are padded by speech_pad_ms each side")
|
17 |
sampleRatePtr := flag.Uint64("sample_rate", 8000, "sample rate")
|
18 |
+
thresholdPtr := flag.Float64("threshold", 0.5, "Speech threshold. Silero VAD outputs speech probabilities for each audio chunk, probabilities ABOVE this value are considered as SPEECH. It is better to tune this parameter for each dataset separately, but "lazy" 0.5 is pretty good for most datasets.")
|
19 |
flag.Parse()
|
20 |
|
21 |
var modelPath string = *modelPathPtr
|
|
|
23 |
var silenceTime float32 = float32(*silenceTimePtr)
|
24 |
var speechPadTime float32 = float32(*speechPadTimePtr)
|
25 |
var sampleRate int = int(*sampleRatePtr)
|
26 |
+
var threshold float32 = float32(*thresholdPtr)
|
27 |
fmt.Println(filename)
|
28 |
|
29 |
var silenceTimeMs int = int(silenceTime * 1e3)
|
|
|
32 |
sd, err := speech.NewDetector(speech.DetectorConfig{
|
33 |
ModelPath: modelPath,
|
34 |
SampleRate: sampleRate,
|
35 |
+
Threshold: threshold,
|
36 |
MinSilenceDurationMs: silenceTimeMs,
|
37 |
SpeechPadMs: speechPadTimeMs,
|
38 |
})
|