|
import gradio as gr |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound |
|
import textwrap |
|
import requests |
|
from fp.fp import FreeProxy |
|
import random |
|
|
|
|
|
def configure_session(): |
|
try: |
|
proxy = FreeProxy(rand=True, timeout=1).get() |
|
proxies = { |
|
"http": proxy, |
|
"https": proxy |
|
} |
|
session = requests.Session() |
|
session.proxies = proxies |
|
session.headers.update({ |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" |
|
}) |
|
return session |
|
except: |
|
return None |
|
|
|
def fetch_transcript(video_url): |
|
try: |
|
if not video_url: |
|
return "Missing video URL", [] |
|
|
|
|
|
if "watch?v=" in video_url: |
|
video_id = video_url.split("watch?v=")[-1].split("&")[0] |
|
elif "youtu.be/" in video_url: |
|
video_id = video_url.split("youtu.be/")[-1].split("?")[0] |
|
elif "/shorts/" in video_url: |
|
video_id = video_url.split("/shorts/")[-1].split("?")[0] |
|
else: |
|
return "Invalid YouTube URL format", [] |
|
|
|
|
|
session = configure_session() |
|
if session: |
|
transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies=session.proxies) |
|
else: |
|
transcript = YouTubeTranscriptApi.get_transcript(video_id) |
|
|
|
full_text = " ".join([entry["text"] for entry in transcript]) |
|
chunks = textwrap.wrap(full_text, width=4000, break_long_words=False) |
|
return f"Word count: {len(full_text.split())}", chunks |
|
|
|
except (TranscriptsDisabled, NoTranscriptFound) as e: |
|
return str(e), [] |
|
except Exception as e: |
|
proxy_used = session.proxies['http'] if session and hasattr(session, 'proxies') else 'None' |
|
return f"Error: {str(e)} (Proxy: {proxy_used})", [] |
|
|
|
iface = gr.Interface( |
|
fn=fetch_transcript, |
|
inputs=gr.Textbox(label="YouTube URL", placeholder="Paste YouTube URL here..."), |
|
outputs=[ |
|
gr.Textbox(label="Status"), |
|
gr.Textbox(label="Transcript Chunks", lines=10) |
|
], |
|
title="YouTube Transcript Fetcher with Proxy Rotation", |
|
description="Fetches transcripts using rotating proxies to avoid rate limits" |
|
) |
|
|
|
iface.launch() |