import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound import textwrap import requests from fp.fp import FreeProxy import random # Configure session with proxies def configure_session(): try: proxy = FreeProxy(rand=True, timeout=1).get() proxies = { "http": proxy, "https": proxy } session = requests.Session() session.proxies = proxies session.headers.update({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" }) return session except: return None def fetch_transcript(video_url): try: if not video_url: return "Missing video URL", [] # Handle different YouTube formats if "watch?v=" in video_url: video_id = video_url.split("watch?v=")[-1].split("&")[0] elif "youtu.be/" in video_url: video_id = video_url.split("youtu.be/")[-1].split("?")[0] elif "/shorts/" in video_url: video_id = video_url.split("/shorts/")[-1].split("?")[0] else: return "Invalid YouTube URL format", [] # Create a new session with proxy for each request session = configure_session() if session: transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies=session.proxies) else: transcript = YouTubeTranscriptApi.get_transcript(video_id) full_text = " ".join([entry["text"] for entry in transcript]) chunks = textwrap.wrap(full_text, width=4000, break_long_words=False) return f"Word count: {len(full_text.split())}", chunks except (TranscriptsDisabled, NoTranscriptFound) as e: return str(e), [] except Exception as e: proxy_used = session.proxies['http'] if session and hasattr(session, 'proxies') else 'None' return f"Error: {str(e)} (Proxy: {proxy_used})", [] iface = gr.Interface( fn=fetch_transcript, inputs=gr.Textbox(label="YouTube URL", placeholder="Paste YouTube URL here..."), outputs=[ gr.Textbox(label="Status"), gr.Textbox(label="Transcript Chunks", lines=10) ], title="YouTube Transcript Fetcher with Proxy Rotation", description="Fetches transcripts using rotating proxies to avoid rate limits" ) iface.launch()