Neprox commited on
Commit
40da39c
·
1 Parent(s): 17123b4

Add slider

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -6,14 +6,13 @@ from datasets import Dataset, Audio
6
  from moviepy.editor import AudioFileClip
7
 
8
  pipe = pipeline(model="Neprox/model")
9
- MAX_SEGMENTS = 10 # 5 minutes
10
 
11
  def download_from_youtube(url):
12
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
13
  fpath = streams.first().download()
14
  return fpath
15
 
16
- def divide_into_30s_segments(audio_fpath):
17
  if not os.path.exists("segmented_audios"):
18
  os.makedirs("segmented_audios")
19
 
@@ -21,14 +20,16 @@ def divide_into_30s_segments(audio_fpath):
21
  n_full_segments = int(sound.duration / 30)
22
  len_last_segment = sound.duration % 30
23
 
24
- if n_full_segments > MAX_SEGMENTS:
25
- n_full_segments = MAX_SEGMENTS
 
26
  len_last_segment = 0
27
 
28
  segment_paths = []
29
  segment_start_times = []
30
 
31
- for i in range(n_full_segments + 1):
 
32
  start = i * 30
33
 
34
  # Skip last segment if it is smaller than two seconds
@@ -49,10 +50,10 @@ def divide_into_30s_segments(audio_fpath):
49
  return segment_paths, segment_start_times
50
 
51
 
52
- def transcribe(audio, url):
53
  if url:
54
  fpath = download_from_youtube(url)
55
- segment_paths, segment_start_times = divide_into_30s_segments(fpath)
56
 
57
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio())
58
  print(audio_dataset)
@@ -70,7 +71,8 @@ iface = gr.Interface(
70
  fn=transcribe,
71
  inputs=[
72
  gr.Audio(source="microphone", type="filepath"),
73
- gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed")
 
74
  ],
75
  outputs="text",
76
  title="Whisper Small Swedish",
 
6
  from moviepy.editor import AudioFileClip
7
 
8
  pipe = pipeline(model="Neprox/model")
 
9
 
10
  def download_from_youtube(url):
11
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
12
  fpath = streams.first().download()
13
  return fpath
14
 
15
+ def divide_into_30s_segments(audio_fpath, seconds_max):
16
  if not os.path.exists("segmented_audios"):
17
  os.makedirs("segmented_audios")
18
 
 
20
  n_full_segments = int(sound.duration / 30)
21
  len_last_segment = sound.duration % 30
22
 
23
+ max_segments = int(seconds_max / 30)
24
+ if n_full_segments > max_segments:
25
+ n_full_segments = max_segments
26
  len_last_segment = 0
27
 
28
  segment_paths = []
29
  segment_start_times = []
30
 
31
+ segments_available = n_full_segments + 1
32
+ for i in range(min(segments_available, max_segments)):
33
  start = i * 30
34
 
35
  # Skip last segment if it is smaller than two seconds
 
50
  return segment_paths, segment_start_times
51
 
52
 
53
+ def transcribe(audio, url, seconds_max):
54
  if url:
55
  fpath = download_from_youtube(url)
56
+ segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
57
 
58
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio())
59
  print(audio_dataset)
 
71
  fn=transcribe,
72
  inputs=[
73
  gr.Audio(source="microphone", type="filepath"),
74
+ gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="YouTube URL")
75
+ gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe")
76
  ],
77
  outputs="text",
78
  title="Whisper Small Swedish",