mizoru commited on
Commit
8e14b4c
1 Parent(s): 865b8d5

introduce state for probs and other

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -5,23 +5,24 @@ import torch
5
  import pandas as pd
6
  import gdown
7
 
8
- probs = None
9
  audio_length_samples = None
10
  def process_audio(audio_input):
11
- global probs
12
- global audio_length_samples
13
  wav = read_audio(audio_input, sampling_rate=16_000)
14
  audio_length_samples = len(wav)
15
  probs = get_speech_probs(wav, sampling_rate=16_000)
16
- return make_visualization(probs, 512 / 16_000)
 
 
17
 
18
- def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
19
  timestamps = probs2speech_timestamps(probs, audio_length_samples,
20
  threshold = threshold,
21
  min_speech_duration_ms = min_speech_duration_ms,
22
  min_silence_duration_ms=min_silence_duration_ms,
23
  window_size_samples=window_size_samples,
24
- speech_pad_ms=speech_pad_ms)
 
 
 
25
  df = pd.DataFrame(timestamps)
26
  df["note"] = ""
27
  df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
@@ -34,8 +35,9 @@ def download_gdrive(id):
34
  return output_file
35
 
36
  def main():
37
-
38
  with gr.Blocks() as demo:
 
 
39
  with gr.Row():
40
  info = """Input the Google Drive file id from the shared link.
41
  It comes after https://drive.google.com/file/d/ <id here.
@@ -50,7 +52,7 @@ def main():
50
 
51
  download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
52
 
53
- button1.click(process_audio, inputs=[audio_input], outputs=figure)
54
 
55
  with gr.Row():
56
  threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
@@ -63,7 +65,7 @@ def main():
63
  with gr.Row():
64
  output_df = gr.DataFrame()
65
 
66
- button2.click(process_parameters, inputs=[threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
67
  outputs=[output_file, output_df])
68
 
69
  demo.launch()
 
5
  import pandas as pd
6
  import gdown
7
 
 
8
  audio_length_samples = None
9
  def process_audio(audio_input):
 
 
10
  wav = read_audio(audio_input, sampling_rate=16_000)
11
  audio_length_samples = len(wav)
12
  probs = get_speech_probs(wav, sampling_rate=16_000)
13
+ return make_visualization(probs, 512 / 16_000), probs, audio_length_samples
14
+
15
+ def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
16
 
 
17
  timestamps = probs2speech_timestamps(probs, audio_length_samples,
18
  threshold = threshold,
19
  min_speech_duration_ms = min_speech_duration_ms,
20
  min_silence_duration_ms=min_silence_duration_ms,
21
  window_size_samples=window_size_samples,
22
+ speech_pad_ms=speech_pad_ms,
23
+ return_seconds=True,
24
+ rounding=3)
25
+
26
  df = pd.DataFrame(timestamps)
27
  df["note"] = ""
28
  df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
 
35
  return output_file
36
 
37
  def main():
 
38
  with gr.Blocks() as demo:
39
+ probs = gr.State()
40
+ audio_length_samples = gr.State()
41
  with gr.Row():
42
  info = """Input the Google Drive file id from the shared link.
43
  It comes after https://drive.google.com/file/d/ <id here.
 
52
 
53
  download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
54
 
55
+ button1.click(process_audio, inputs=[audio_input], outputs=[figure, probs, audio_length_samples])
56
 
57
  with gr.Row():
58
  threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
 
65
  with gr.Row():
66
  output_df = gr.DataFrame()
67
 
68
+ button2.click(process_parameters, inputs=[probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
69
  outputs=[output_file, output_df])
70
 
71
  demo.launch()