import os import uuid import yt_dlp as youtube_dl from typing import Generator from faster_whisper import WhisperModel import pandas as pd from typing import Generator from faster_whisper import WhisperModel import pandas as pd import gradio as gr class YouTubeTranscriber: def __init__(self, model_path: str): self.model = WhisperModel(model_path) def download_audio(self, url: str, preferred_quality: str = "192") -> str: file_name = f"{uuid.uuid4()}.mp3" output_path = os.path.join("/tmp", file_name) # Use /tmp directory for temporary storage ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': preferred_quality, }], 'outtmpl': output_path, # Specify the output path and file name } try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=False) video_title = info_dict.get('title', 'Unknown title') print(f"Downloading audio for: {video_title}") ydl.download([url]) print(f"Audio file saved as: {output_path}") return output_path except youtube_dl.utils.DownloadError as e: print(f"Error downloading audio: {e}") return None def transcribe_audio(self, path: str) -> Generator: print(f"Reading {path}") segments, _ = self.model.transcribe(path) return segments def process_segments(self, segments: Generator) -> pd.DataFrame: result = [] for i, segment in enumerate(segments): result.append({ 'chunk_id': f"chunk_{i}", 'chunk_length': segment.end - segment.start, 'text': segment.text, 'start_time': segment.start, 'end_time': segment.end }) df = pd.DataFrame(result) return df # Function to be called by the Gradio interface def transcribe_youtube_video(url: str, model_path: str = "distil-large-v2") -> str: yt_transcriber = YouTubeTranscriber(model_path) audio_path = yt_transcriber.download_audio(url) if audio_path: segments = yt_transcriber.transcribe_audio(audio_path) df = yt_transcriber.process_segments(segments) output_csv = os.path.join("/tmp", f"{uuid.uuid4()}.csv") df.to_csv(output_csv, index=False) return output_csv else: return "Failed to download audio." import gradio as gr interface = gr.Interface( fn=transcribe_youtube_video, inputs=[ gr.Textbox(lines=1, placeholder="Enter YouTube URL here...", label="YouTube URL"), gr.Textbox(lines=1, label="Whisper Model Path") ], outputs=gr.File(label="Transcribed Segments CSV"), # Use gr.File directly title="YouTube Audio Transcriber", description="Enter a YouTube URL to download the audio and transcribe it using Whisper." ) # Launch the interface interface.launch()