Spaces:

GIanlucaRub
/

whisper-it

Runtime error

App Files Files Community

GIanlucaRub commited on Dec 7, 2022

Commit

023302c

1 Parent(s): 799a761

Update app.py

Browse files

Added support for youtube video

Files changed (1) hide show

app.py +67 -11

app.py CHANGED Viewed

@@ -1,18 +1,74 @@
-from transformers import pipeline
 import gradio as gr
-pipe = pipeline(model="GIanlucaRub/whisper-tiny-it-4")  # change to "your-username/the-name-you-picked"
-def transcribe(audio):
     text = pipe(audio)["text"]
     return text
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs="text",
-    title="Whisper Tiny Italian",
-    description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
-)
-iface.launch()

 import gradio as gr
+from pytube import YouTube
+from transformers import pipeline
+import os
+pipe = pipeline(model="GIanlucaRub/whisper-tiny-it-6")  # change to "your-username/the-name-you-picked"
+def transcribe_yt(link):
+  yt = YouTube(link)
+  audio = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
+  text = pipe(audio)["text"]
+  os.remove(audio)
+  return text
+def transcribe_audio(audio):
     text = pipe(audio)["text"]
     return text
+def populate_metadata(link):
+  yt = YouTube(link)
+  return yt.thumbnail_url, yt.title
+title="Youtube Whisperer"
+description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
+block = gr.Blocks()
+with block:
+    gr.HTML(
+        """
+            <div style="text-align: center; max-width: 500px; margin: 0 auto;">
+              <div>
+                <h1>Youtube Whisperer</h1>
+              </div>
+              <p style="margin-bottom: 10px; font-size: 94%">
+                Speech to text transcription of Youtube videos using OpenAI's Whisper
+              </p>
+            </div>
+        """
+    )
+    with gr.Group():
+        with gr.Box():
+          text = gr.Textbox(
+              label="Transcription",
+              placeholder="Transcription Output",
+              lines=5)
+          microphone=gr.Audio(source="microphone", type="filepath")
+          with gr.Row().style(mobile_collapse=False, equal_height=True):
+              btn_microphone = gr.Button("Transcribe microphone audio")
+          audio_uploaded=gr.Audio(source="upload", type="filepath")
+          with gr.Row().style(mobile_collapse=False, equal_height=True):
+              btn_audio_uploaded = gr.Button("Transcribe audio uploaded")
+          link = gr.Textbox(label="YouTube Link")
+          with gr.Row().style(mobile_collapse=False, equal_height=True):
+              btn_youtube = gr.Button("Transcribe Youtube video")
+          with gr.Row().style(mobile_collapse=False, equal_height=True):
+            title = gr.Label(label="Video Title", placeholder="Title")
+            img = gr.Image(label="Thumbnail")
+          # Events
+          btn_youtube.click(transcribe_yt, inputs=[link], outputs=[text])
+          btn_microphone.click(transcribe_audio, inputs=[microphone], outputs=[text])
+          btn_audio_uploaded.click(transcribe_audio, inputs=[audio_uploaded], outputs=[text])
+          link.change(populate_metadata, inputs=[link], outputs=[img, title])
+block.launch(debug=True)