MohamedRashad commited on
Commit
243a26e
1 Parent(s): 3571303

chore: Update app.py to support task selection in Arabic Whisper Code-Switching Edition

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -28,11 +28,12 @@ pipe = pipeline(
28
  )
29
 
30
  @spaces.GPU(duration=120)
31
- def transcribe(inputs):
32
  if inputs is None:
33
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
34
 
35
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe", "language": "arabic"}, return_timestamps=True)["text"]
 
36
  return text
37
 
38
 
@@ -71,7 +72,7 @@ def chunks_to_subtitle(chunks):
71
  return subtitle
72
 
73
  @spaces.GPU(duration=120)
74
- def yt_transcribe(yt_url):
75
  html_embed_str = _return_yt_html_embed(yt_url)
76
 
77
  with tempfile.TemporaryDirectory() as tmpdirname:
@@ -83,7 +84,8 @@ def yt_transcribe(yt_url):
83
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
84
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
85
 
86
- output = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe", "language": "arabic"}, return_timestamps=True)
 
87
  subtitle = chunks_to_subtitle(output["chunks"])
88
 
89
  return html_embed_str, subtitle
@@ -95,6 +97,7 @@ mf_transcribe = gr.Interface(
95
  fn=transcribe,
96
  inputs=[
97
  gr.Audio(sources="microphone", type="filepath"),
 
98
  ],
99
  outputs="text",
100
  title="Arabic Whisper Code-Switching Edition: Transcribe Microphone",
@@ -110,6 +113,7 @@ file_transcribe = gr.Interface(
110
  fn=transcribe,
111
  inputs=[
112
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
 
113
  ],
114
  outputs="text",
115
  title="Arabic Whisper Code-Switching Edition: Transcribe Audio",
@@ -125,6 +129,7 @@ yt_transcribe_demo = gr.Interface(
125
  fn=yt_transcribe,
126
  inputs=[
127
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
 
128
  ],
129
  outputs=["html", "text"],
130
  title="Arabic Whisper Code-Switching Edition: Transcribe YouTube Video",
 
28
  )
29
 
30
  @spaces.GPU(duration=120)
31
+ def transcribe(inputs, task):
32
  if inputs is None:
33
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
34
 
35
+ generate_kwargs = {"task": task, "language": "arabic" if task == "transcribe" else "english"}
36
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs=generate_kwargs, return_timestamps=True)["text"]
37
  return text
38
 
39
 
 
72
  return subtitle
73
 
74
  @spaces.GPU(duration=120)
75
+ def yt_transcribe(yt_url, task):
76
  html_embed_str = _return_yt_html_embed(yt_url)
77
 
78
  with tempfile.TemporaryDirectory() as tmpdirname:
 
84
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
85
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
86
 
87
+ generate_kwargs = {"task": task, "language": "arabic" if task == "transcribe" else "english"}
88
+ output = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs=generate_kwargs, return_timestamps=True)
89
  subtitle = chunks_to_subtitle(output["chunks"])
90
 
91
  return html_embed_str, subtitle
 
97
  fn=transcribe,
98
  inputs=[
99
  gr.Audio(sources="microphone", type="filepath"),
100
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
101
  ],
102
  outputs="text",
103
  title="Arabic Whisper Code-Switching Edition: Transcribe Microphone",
 
113
  fn=transcribe,
114
  inputs=[
115
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
116
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
117
  ],
118
  outputs="text",
119
  title="Arabic Whisper Code-Switching Edition: Transcribe Audio",
 
129
  fn=yt_transcribe,
130
  inputs=[
131
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
132
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
133
  ],
134
  outputs=["html", "text"],
135
  title="Arabic Whisper Code-Switching Edition: Transcribe YouTube Video",