mizoru commited on
Commit
a00d114
1 Parent(s): b0ab37c

mroe fixes

Browse files
Files changed (3) hide show
  1. app.py +14 -3
  2. requirements.txt +2 -1
  3. vad_utils.py +1 -1
app.py CHANGED
@@ -4,14 +4,25 @@ from vad_utils import get_speech_probs, make_visualization, probs2speech_timesta
4
  import torch
5
 
6
  probs = None
 
7
  def process_audio(audio_input):
8
  global probs
 
9
  wav = read_audio(audio_input, sampling_rate=16_000)
 
10
  probs = get_speech_probs(wav, sampling_rate=16_000)
11
  return make_visualization(probs, 512 / 16_000)
12
 
13
  def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
14
- return probs2speech_timestamps(probs, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms)
 
 
 
 
 
 
 
 
15
 
16
  def main():
17
 
@@ -20,8 +31,8 @@ def main():
20
  with gr.Blocks() as demo:
21
  with gr.Row():
22
  audio_input = gr.Audio(type="filepath")
23
- button1 = gr.Button("Process Audio")
24
- figure = gr.Image()
25
 
26
  button1.click(process_audio, inputs=[audio_input], outputs=figure)
27
 
 
4
  import torch
5
 
6
  probs = None
7
+ audio_length_samples = None
8
  def process_audio(audio_input):
9
  global probs
10
+ global audio_length_samples
11
  wav = read_audio(audio_input, sampling_rate=16_000)
12
+ audio_length_samples = len(wav)
13
  probs = get_speech_probs(wav, sampling_rate=16_000)
14
  return make_visualization(probs, 512 / 16_000)
15
 
16
  def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
17
+ print(probs)
18
+ timestamps = probs2speech_timestamps(probs, audio_length_samples,
19
+ threshold = threshold,
20
+ min_speech_duration_ms = min_speech_duration_ms,
21
+ min_silence_duration_ms=min_silence_duration_ms,
22
+ window_size_samples=window_size_samples,
23
+ speech_pad_ms=speech_pad_ms)
24
+ print(timestamps)
25
+ return timestamps
26
 
27
  def main():
28
 
 
31
  with gr.Blocks() as demo:
32
  with gr.Row():
33
  audio_input = gr.Audio(type="filepath")
34
+ button1 = gr.Button("Compute Probabilities")
35
+ figure = gr.Plot()
36
 
37
  button1.click(process_audio, inputs=[audio_input], outputs=figure)
38
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torchaudio
2
  onnxruntime
3
- gradio
 
 
1
  torchaudio
2
  onnxruntime
3
+ gradio
4
+ pandas
vad_utils.py CHANGED
@@ -157,7 +157,7 @@ def probs2speech_timestamps(speech_probs, audio_length_samples,
157
 
158
  def make_visualization(probs, step):
159
  import pandas as pd
160
- pd.DataFrame({'probs': probs},
161
  index=[x * step for x in range(len(probs))]).plot(figsize=(16, 8),
162
  kind='area', ylim=[0, 1.05], xlim=[0, len(probs) * step],
163
  xlabel='seconds',
 
157
 
158
  def make_visualization(probs, step):
159
  import pandas as pd
160
+ return pd.DataFrame({'probs': probs},
161
  index=[x * step for x in range(len(probs))]).plot(figsize=(16, 8),
162
  kind='area', ylim=[0, 1.05], xlim=[0, len(probs) * step],
163
  xlabel='seconds',