Spaces:
Running
Running
from transformers import pipeline | |
import gradio as gr | |
from pytube import YouTube | |
import os | |
pipe = pipeline(model="dussen/whisper-small-nl-hc") | |
print(pipe) | |
def download_audio(url, output_path='downloads'): | |
try: | |
# Create a YouTube object | |
yt = YouTube(url) | |
# Get the audio stream with the highest quality | |
audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first() | |
audio_stream.download(output_path) | |
print(f"Downloaded audio to {output_path}") | |
# If a video.mp4 file already exists, delete it | |
if os.path.exists(f"{output_path}/video.mp4"): | |
os.remove(f"{output_path}/video.mp4") | |
print("Downloading video...") | |
# Change the name of the file to video.mp4 | |
default_filename = audio_stream.default_filename | |
mp4_path = f"{output_path}/{default_filename}" | |
mp3_path = f"{output_path}/video.mp3" | |
os.rename(mp4_path, mp3_path) | |
print("Downloaded video") | |
print("Transcribing audio...") | |
print("Type of audio: ", type(mp3_path)) | |
# Use the model to transcribe the audio | |
text = pipe(mp3_path)["text"] | |
print(f"Transcribed audio: {text}") | |
# Delete the audio file | |
os.remove(mp3_path) | |
return text | |
except Exception as e: | |
print(f"Error: {e}") | |
def audio_to_text(audio): | |
text = pipe(audio)["text"] | |
print(text) | |
return text | |
def radio_to_text(radio_url): | |
# A radio livestream | |
stream_url = radio_url | |
r = requests.get(stream_url, stream=True) | |
with open('stream.mp3', 'wb') as f: | |
try: | |
for block in r.iter_content(1024): | |
f.write(block) | |
except KeyboardInterrupt: | |
pass | |
text = pipe("stream.mp3")["text"] | |
print(text) | |
return text | |
iface_video_url = gr.Interface( | |
fn=download_audio, | |
inputs="text", | |
outputs="text", | |
title="Whisper Small Dutch - Use a YouTube URL", | |
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface_audio = gr.Interface( | |
fn=audio_to_text, | |
inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
outputs="text", | |
title="Whisper Small Dutch - Use your microphone", | |
description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface_radio = gr.Interface( | |
fn=radio_to_text, | |
inputs="text", | |
outputs="text", | |
title="Whisper Small Dutch - Use a radio URL", | |
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.", | |
) | |
app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"]) | |
if __name__ == "__main__": | |
app.launch() | |