from transformers import pipeline import gradio as gr import requests from moviepy.editor import * import os os.system("pip uninstall -y gradio") os.system("pip install gradio==4.8.0") pipe = pipeline(model="esnagy/whisper-small-hu") def transcribe_audio(audio_file): print("Transcribing audio: ", audio_file) text = pipe(audio_file)["text"] return text def transcribe_video(video_url): # Download the video from the URL video_filename = "temp_video.mp4" with open(video_filename, "wb") as f: response = requests.get(video_url) f.write(response.content) # Load the video using moviepy video = VideoFileClip(video_filename) audio = video.audio audio_file = "temp_audio.wav" audio.write_audiofile(audio_file, codec="pcm_s16le") text = transcribe_audio(audio_file) # Remove temporary files os.remove(video_filename) os.remove(audio_file) return text def transcribe(video_url="", audio=None): print("[transcribe] Transcribing...") print("[transcribe] video_url: ", video_url) print("[transcribe] audio: ", audio) if video_url.strip() != "": return transcribe_video(video_url) else: return transcribe_audio(audio) iface = gr.Interface( fn=transcribe, inputs=[ gr.Textbox(label="Enter video URL", placeholder="Or leave empty to use microphone"), gr.Audio(sources=["microphone"], type="filepath"), ], outputs="text", title="Whisper Small Hungarian", description="Realtime demo for Hungarian speech recognition using a fine-tuned Whisper small model. Enter a video URL or record your voice to transcribe.", ) iface.launch()