reach-vb HF staff commited on
Commit
f2b1f57
1 Parent(s): 5290d3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -8,7 +8,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
8
  import tempfile
9
  import os
10
 
11
- MODEL_NAME = "openai/whisper-large-v3"
12
  BATCH_SIZE = 8
13
  FILE_LIMIT_MB = 1000
14
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
@@ -83,7 +83,7 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
83
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
84
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
85
 
86
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
87
 
88
  return html_embed_str, text
89
 
@@ -94,12 +94,12 @@ mf_transcribe = gr.Interface(
94
  fn=transcribe,
95
  inputs=[
96
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
97
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
98
  ],
99
  outputs="text",
100
  layout="horizontal",
101
  theme="huggingface",
102
- title="Whisper Large V3: Transcribe Audio",
103
  description=(
104
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
105
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
@@ -112,12 +112,12 @@ file_transcribe = gr.Interface(
112
  fn=transcribe,
113
  inputs=[
114
  gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
115
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
116
  ],
117
  outputs="text",
118
  layout="horizontal",
119
  theme="huggingface",
120
- title="Whisper Large V3: Transcribe Audio",
121
  description=(
122
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
123
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
@@ -130,12 +130,12 @@ yt_transcribe = gr.Interface(
130
  fn=yt_transcribe,
131
  inputs=[
132
  gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
133
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe")
134
  ],
135
  outputs=["html", "text"],
136
  layout="horizontal",
137
  theme="huggingface",
138
- title="Whisper Large V3: Transcribe YouTube",
139
  description=(
140
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
141
  f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
 
8
  import tempfile
9
  import os
10
 
11
+ MODEL_NAME = "distil-whisper/distil-small.en"
12
  BATCH_SIZE = 8
13
  FILE_LIMIT_MB = 1000
14
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
 
83
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
84
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
85
 
86
+ text = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)["text"]
87
 
88
  return html_embed_str, text
89
 
 
94
  fn=transcribe,
95
  inputs=[
96
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
97
+ #gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
98
  ],
99
  outputs="text",
100
  layout="horizontal",
101
  theme="huggingface",
102
+ title="Distil-Whisper small: Transcribe Audio",
103
  description=(
104
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
105
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
 
112
  fn=transcribe,
113
  inputs=[
114
  gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
115
+ # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
116
  ],
117
  outputs="text",
118
  layout="horizontal",
119
  theme="huggingface",
120
+ title="Distil-Whisper small: Transcribe Audio",
121
  description=(
122
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
123
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
 
130
  fn=yt_transcribe,
131
  inputs=[
132
  gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
133
+ # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe")
134
  ],
135
  outputs=["html", "text"],
136
  layout="horizontal",
137
  theme="huggingface",
138
+ title="Distil-Whisper small: Transcribe YouTube",
139
  description=(
140
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
141
  f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"