import gradio as gr import yt_dlp as yt import whisper import os import torch import logging # Set up logging logging.basicConfig(filename='transcription_logs.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Create a temporary download folder if it doesn't exist temp_download_dir = os.path.join(os.getcwd(), "temp_download") os.makedirs(temp_download_dir, exist_ok=True) # Function to download audio from the given URL def download_audio(url): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': os.path.join(temp_download_dir, '%(title)s.%(ext)s'), } with yt.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) downloaded_file = ydl.prepare_filename(info_dict) # Generate a new file name by replacing spaces with underscores new_filename = os.path.join(temp_download_dir, os.path.basename(downloaded_file).replace(" ", "_")) # Check if the new file name exists and create a unique name if necessary base, extension = os.path.splitext(new_filename) counter = 1 while os.path.exists(new_filename): new_filename = f"{base}_{counter}{extension}" counter += 1 # Rename the file os.rename(downloaded_file, new_filename) if os.path.exists(new_filename): return new_filename else: raise Exception("Failed to download and rename audio file.") # Function to transcribe audio to SRT format def transcribe_to_srt(file_path): if torch.cuda.is_available(): model = whisper.load_model("medium", device="cuda") else: model = whisper.load_model("medium") result = model.transcribe(file_path) srt_content = "" for i, segment in enumerate(result["segments"]): start = segment["start"] end = segment["end"] text = segment["text"] srt_content += f"{i + 1}\n" srt_content += f"{start:.3f}".replace(".", ",") + " --> " + f"{end:.3f}".replace(".", ",") + "\n" srt_content += text + "\n\n" return srt_content def transcribe_video(url): try: logging.info(f"Transcribing video from URL: {url}") audio_file = download_audio(url) logging.info(f"Downloaded audio file: {audio_file}") srt_content = transcribe_to_srt(audio_file) logging.info("Transcription completed successfully!") # Optionally, remove the audio file after transcription # os.remove(audio_file) return srt_content except Exception as e: logging.error(f"An error occurred: {e}") return f"An error occurred: {e}" iface = gr.Interface(fn=transcribe_video, inputs="text", outputs="text", live=True, title="YouTube/TikTok Video to SRT Transcription") # Display the logs in the interface log_viewer = gr.Textbox(text="Logs will appear here...", readonly=True, height=200) log_handler = logging.StreamHandler(log_viewer) log_handler.setLevel(logging.INFO) logging.getLogger().addHandler(log_handler) iface.launch()