HoneyTian commited on
Commit
45a808d
1 Parent(s): 0e450e7
Files changed (1) hide show
  1. main.py +14 -6
main.py CHANGED
@@ -37,12 +37,13 @@ def process_uploaded_file(
37
  filename: str,
38
  silence_time: float = 0.3,
39
  longest_activate: float = 3.0,
 
40
  threshold: float = 0.5,
41
  ) -> Tuple[str, str]:
42
  if vad_engine == "nx_vad":
43
  return run_nx_vad(filename, silence_time, longest_activate)
44
  elif vad_engine == "silero_vad":
45
- return run_silero_vad(filename, silence_time, threshold)
46
  else:
47
  return f"vad engine invalid: {vad_engine}", ""
48
 
@@ -67,13 +68,17 @@ def run_nx_vad(filename: str, silence_time: float = 0.3, longest_activate: float
67
  return vad_timestamps, raw_vad_result
68
 
69
 
70
- def run_silero_vad(filename: str, silence_time: float = 0.3, threshold: float = 0.5) -> Tuple[str, str]:
 
 
 
 
71
  filename = Path(filename).as_posix()
72
 
73
  main_logger.info("do silero vad: {}".format(filename))
74
 
75
- cmd = "vad_bins/silero_vad --filename {} --silence_time {} --threshold {}".format(
76
- filename, silence_time, threshold
77
  )
78
  raw_vad_result = Command.popen(cmd)
79
 
@@ -123,8 +128,9 @@ def main():
123
 
124
  with gr.Row():
125
  uploaded_vad_engine = gr.Dropdown(choices=["nx_vad", "silero_vad"], value="nx_vad", label="vad_engine")
126
- uploaded_silence_time = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.01, label="silence time")
127
- uploaded_longest_activate = gr.Slider(minimum=0.0, maximum=20.0, value=3.0, step=0.1, label="longest activate")
 
128
  uploaded_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="threshold")
129
  upload_button = gr.Button("Run VAD", variant="primary")
130
 
@@ -139,6 +145,7 @@ def main():
139
  uploaded_file,
140
  uploaded_silence_time,
141
  uploaded_longest_activate,
 
142
  uploaded_threshold,
143
  ],
144
  outputs=[
@@ -155,6 +162,7 @@ def main():
155
  uploaded_file,
156
  uploaded_silence_time,
157
  uploaded_longest_activate,
 
158
  uploaded_threshold,
159
  ],
160
  outputs=[
 
37
  filename: str,
38
  silence_time: float = 0.3,
39
  longest_activate: float = 3.0,
40
+ speech_pad_time: float = 0.03,
41
  threshold: float = 0.5,
42
  ) -> Tuple[str, str]:
43
  if vad_engine == "nx_vad":
44
  return run_nx_vad(filename, silence_time, longest_activate)
45
  elif vad_engine == "silero_vad":
46
+ return run_silero_vad(filename, silence_time, speech_pad_time, threshold)
47
  else:
48
  return f"vad engine invalid: {vad_engine}", ""
49
 
 
68
  return vad_timestamps, raw_vad_result
69
 
70
 
71
+ def run_silero_vad(filename: str,
72
+ silence_time: float = 0.3,
73
+ speech_pad_time: float = 0.03,
74
+ threshold: float = 0.5
75
+ ) -> Tuple[str, str]:
76
  filename = Path(filename).as_posix()
77
 
78
  main_logger.info("do silero vad: {}".format(filename))
79
 
80
+ cmd = "vad_bins/silero_vad --filename {} --silence_time {} --speech_pad_time {} --threshold {}".format(
81
+ filename, silence_time, speech_pad_time, threshold
82
  )
83
  raw_vad_result = Command.popen(cmd)
84
 
 
128
 
129
  with gr.Row():
130
  uploaded_vad_engine = gr.Dropdown(choices=["nx_vad", "silero_vad"], value="nx_vad", label="vad_engine")
131
+ uploaded_silence_time = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.01, label="silence_time")
132
+ uploaded_longest_activate = gr.Slider(minimum=0.0, maximum=20.0, value=3.0, step=0.1, label="longest_activate")
133
+ uploaded_speech_pad_time = gr.Slider(minimum=0.0, maximum=20.0, value=3.0, step=0.1, label="speech_pad_time")
134
  uploaded_threshold = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="threshold")
135
  upload_button = gr.Button("Run VAD", variant="primary")
136
 
 
145
  uploaded_file,
146
  uploaded_silence_time,
147
  uploaded_longest_activate,
148
+ uploaded_speech_pad_time,
149
  uploaded_threshold,
150
  ],
151
  outputs=[
 
162
  uploaded_file,
163
  uploaded_silence_time,
164
  uploaded_longest_activate,
165
+ uploaded_speech_pad_time,
166
  uploaded_threshold,
167
  ],
168
  outputs=[