Spaces:

khalidey
/

ID2223-Lab2-Whisper

Runtime error

App Files Files Community

khalidey commited on Dec 8, 2022

Commit

20baefb

•

1 Parent(s): b152137

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -0

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from transformers import pipeline
 import gradio as gr
 pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV")  # change to "your-username/the-name-you-picked"
 pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')
@@ -9,6 +11,21 @@ def transcribe(audio):
     generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
     return text, generated_text
 with gr.Blocks() as demo:
     gr.Markdown("Whisper Small Swedish + Swedish GPT")
     gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
@@ -26,6 +43,12 @@ with gr.Blocks() as demo:
             gr.Textbox(label="Recognized speech from recordings"),
             gr.Textbox(label="Swedish-gpt generated speech from recordings")
         ]
     upload_button.click(
         fn=transcribe,
         inputs=upload_file,
@@ -36,5 +59,10 @@ with gr.Blocks() as demo:
         inputs=record_file,
         outputs=record_outputs,
     )
 demo.launch()

 from transformers import pipeline
 import gradio as gr
+from pytube import YouTube
 pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV")  # change to "your-username/the-name-you-picked"
 pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')
     generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
     return text, generated_text
+def youtube_link(url):
+    # Obtains the audio of the youtube video and returns the path of the mp4 file
+    streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
+    path = streams.first().download()
+    return path
+def youtube_transcribe(url):
+    path = youtube_link(url)
+    audio_dataset = Dataset.from_dict({"audio": path}).cast_column("audio", Audio(sampling_rate=16000))
+    text = pipe1(audio_dataset["audio"])["text"]
 with gr.Blocks() as demo:
     gr.Markdown("Whisper Small Swedish + Swedish GPT")
     gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
             gr.Textbox(label="Recognized speech from recordings"),
             gr.Textbox(label="Swedish-gpt generated speech from recordings")
         ]
+    with gr.TabItem("Transcribe from Youtube URL"):
+        url = gr.Text(max_lines=1, label="Transcribe from YouTube URL")
+        youtube_button = gr.Button("Submit for recognition")
+        youtube_outputs = [
+            gr.Textbox(label="Recognized speech from URL"),
+        ]
     upload_button.click(
         fn=transcribe,
         inputs=upload_file,
         inputs=record_file,
         outputs=record_outputs,
     )
+    youtube_button.click(
+    fn=youtube_transcribe,
+    inputs=url,
+    outputs=youtube_outputs,
+    )
 demo.launch()