Akseluhr's picture
Update app.py
145198e
raw
history blame
1.36 kB
from transformers import pipeline
import gradio as gr
from pytube import YouTube
import os
# Get model from my model repo
pipe = pipeline(model="Akseluhr/whisper-small-sv-SE-auhr-v2")
def get_audio(url):
yt = YouTube(url) # Downloads yt video
video = yt.streams.filter(only_audio=True).first() # Gets the audio of the video
print(video)
out_file=video.download(output_path=".") # Write the stream to disk
base, ext = os.path.splitext(out_file) # Split the path
new_file = base+'.mp3'
os.rename(out_file, new_file) # Convert to .mp3
audio_file = new_file
return audio_file
def transcribe(rec=None, file=None, url=""):
if rec is not None:
audio = rec
elif file is not None:
audio = file
elif url is not "":
audio = get_audio(url)
else:
return "Provide a recording or a file."
text = pipe(audio)["text"]
return text
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath", optional=True),
gr.Audio(source="upload", type="filepath", optional=True),
gr.Textbox(placeholder='Enter the Youtube video URL', label='URL', optional=True),
],
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper model.",
)
iface.launch()