File size: 2,409 Bytes
c565645
 
 
 
1f87fff
 
f97eb77
1f87fff
f97eb77
 
1f87fff
 
f97eb77
bbe2d12
 
1f87fff
f97eb77
 
 
 
 
 
1f87fff
 
c565645
 
 
 
 
1f87fff
 
 
 
 
 
 
 
 
 
 
f97eb77
 
 
 
 
 
1f87fff
 
c565645
 
 
 
 
 
f97eb77
 
c565645
 
 
1f87fff
 
 
 
 
f97eb77
1f87fff
c565645
 
1f87fff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
import textwrap
import requests
from fp.fp import FreeProxy
import random

# Configure session with proxies
def configure_session():
    try:
        proxy = FreeProxy(rand=True, timeout=1).get()
        proxies = {
            "http": proxy,
            "https": proxy
        }
        session = requests.Session()
        session.proxies = proxies
        session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        })
        return session
    except:
        return None

def fetch_transcript(video_url):
    try:
        if not video_url:
            return "Missing video URL", []

        # Handle different YouTube formats
        if "watch?v=" in video_url:
            video_id = video_url.split("watch?v=")[-1].split("&")[0]
        elif "youtu.be/" in video_url:
            video_id = video_url.split("youtu.be/")[-1].split("?")[0]
        elif "/shorts/" in video_url:
            video_id = video_url.split("/shorts/")[-1].split("?")[0]
        else:
            return "Invalid YouTube URL format", []

        # Create a new session with proxy for each request
        session = configure_session()
        if session:
            transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies=session.proxies)
        else:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)

        full_text = " ".join([entry["text"] for entry in transcript])
        chunks = textwrap.wrap(full_text, width=4000, break_long_words=False)
        return f"Word count: {len(full_text.split())}", chunks

    except (TranscriptsDisabled, NoTranscriptFound) as e:
        return str(e), []
    except Exception as e:
        proxy_used = session.proxies['http'] if session and hasattr(session, 'proxies') else 'None'
        return f"Error: {str(e)} (Proxy: {proxy_used})", []

iface = gr.Interface(
    fn=fetch_transcript,
    inputs=gr.Textbox(label="YouTube URL", placeholder="Paste YouTube URL here..."),
    outputs=[
        gr.Textbox(label="Status"),
        gr.Textbox(label="Transcript Chunks", lines=10)
    ],
    title="YouTube Transcript Fetcher with Proxy Rotation",
    description="Fetches transcripts using rotating proxies to avoid rate limits"
)

iface.launch()