khalidey's picture
Update app.py
9aa5fc9
from transformers import pipeline
import gradio as gr
from pytube import YouTube
from datasets import Dataset, Audio
import os
from moviepy.editor import AudioFileClip
pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV") # change to "your-username/the-name-you-picked"
pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')
def transcribe(audio):
text = pipe1(audio)["text"]
generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
return text, generated_text
def youtube_link(url):
# Obtains the audio of the youtube video and returns the path of the mp4 file
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
path = streams.first().download()
return path
def convert_to_wav(path):
audio = AudioFileClip(path)
audio_frame = audio.subclip(0, -2)
audio_frame.write_audiofile(f"audio.wav")
return f"audio.wav"
def youtube_transcribe(url):
path = youtube_link(url)
path_wav = convert_to_wav(path)
audio_dataset = Dataset.from_dict({"audio": [path_wav]}).cast_column("audio", Audio(sampling_rate=16000))
text = pipe1(audio_dataset["audio"])
return text[0]["text"]
with gr.Blocks() as demo:
gr.Markdown("Whisper Small Swedish + Swedish GPT")
gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
with gr.TabItem("Upload from disk"):
upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk")
upload_button = gr.Button("Submit for recognition")
upload_outputs = [
gr.Textbox(label="Recognized speech from uploaded file"),
gr.Textbox(label="Swedish-gpt generated speech from uploaded file")
]
with gr.TabItem("Record from microphone"):
record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone")
record_button = gr.Button("Submit for recognition")
record_outputs = [
gr.Textbox(label="Recognized speech from recordings"),
gr.Textbox(label="Swedish-gpt generated speech from recordings")
]
with gr.TabItem("Transcribe from Youtube URL"):
url = gr.Text(max_lines=1, label="Transcribe from YouTube URL")
youtube_button = gr.Button("Submit for recognition")
youtube_outputs = [
gr.Textbox(label="Recognized speech from URL")
]
upload_button.click(
fn=transcribe,
inputs=upload_file,
outputs=upload_outputs,
)
record_button.click(
fn=transcribe,
inputs=record_file,
outputs=record_outputs,
)
youtube_button.click(
fn=youtube_transcribe,
inputs=url,
outputs=youtube_outputs,
)
demo.launch()