Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # | |
| # YouTube to X (Twitter) Thread Generator | |
| # This Gradio app automates the process of turning a YouTube video | |
| # into a multi-part X thread with corresponding video clips. | |
| # | |
| # --- 1. Installation --- | |
| # Ensure you have all necessary packages installed: | |
| # pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas | |
| # --- 2. Imports --- | |
| import gradio as gr | |
| import os | |
| import re | |
| import threading | |
| import time | |
| import glob | |
| from supadata import Supadata | |
| import google.generativeai as genai | |
| from pydantic import BaseModel, Field | |
| from datetime import timedelta | |
| import yt_dlp | |
| from moviepy.video.io.VideoFileClip import VideoFileClip | |
| import tweepy | |
| import pandas as pd | |
| import traceback | |
| # --- 3. Video Cleanup System --- | |
| def cleanup_old_videos(): | |
| """Clean up video files older than 15 minutes""" | |
| try: | |
| current_time = time.time() | |
| # Find all video files | |
| video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"] | |
| for pattern in video_patterns: | |
| for file_path in glob.glob(pattern): | |
| try: | |
| # Check if file is older than 15 minutes (900 seconds) | |
| file_age = current_time - os.path.getmtime(file_path) | |
| if file_age > 900: # 15 minutes = 900 seconds | |
| os.remove(file_path) | |
| print(f"ποΈ Cleaned up old video file: {file_path}") | |
| except Exception as e: | |
| print(f"Failed to remove {file_path}: {e}") | |
| except Exception as e: | |
| print(f"Cleanup error: {e}") | |
| def start_cleanup_scheduler(): | |
| """Start the background cleanup scheduler""" | |
| def cleanup_loop(): | |
| while True: | |
| time.sleep(900) # Wait 15 minutes (900 seconds) | |
| cleanup_old_videos() | |
| cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True) | |
| cleanup_thread.start() | |
| print("π§Ή Video cleanup scheduler started (runs every 15 minutes)") | |
| # --- 4. Pydantic Model for Structured LLM Output --- | |
| class StructuredXPosts(BaseModel): | |
| """Defines the expected JSON structure from the AI model.""" | |
| post_contents: list[str] = Field(description="A list of content for X posts.") | |
| timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.") | |
| # --- 5. Helper Functions --- | |
| def get_youtube_id(url: str) -> str | None: | |
| """Extracts the YouTube video ID from various URL formats.""" | |
| regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
| match = re.search(regex, url) | |
| return match.group(1) if match else None | |
| def ms_to_hhmmss(ms: int) -> str: | |
| """Converts milliseconds to HH:MM:SS format.""" | |
| sec = ms // 1000 | |
| return str(timedelta(seconds=sec)) | |
| def time_to_seconds(t: str) -> float: | |
| """Converts a HH:MM:SS or MM:SS string to total seconds.""" | |
| parts = [float(p) for p in t.strip().split(":")] | |
| if len(parts) == 3: | |
| return parts[0] * 3600 + parts[1] * 60 + parts[2] | |
| if len(parts) == 2: | |
| return parts[0] * 60 + parts[1] | |
| return parts[0] | |
| # --- 6. AI Prompt Template --- | |
| HEAD_PROMPT_TEMPLATE = """ | |
| Below is a transcript of a [VIDEO_TYPE] video. | |
| I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE]. | |
| Opener Post Format: | |
| [MAIN_HOOK_STATEMENT]: | |
| [KEY_POINT_1] | |
| [KEY_POINT_2] | |
| [KEY_POINT_3] | |
| [CONTEXT_OR_SETUP] | |
| [INTRIGUING_HOOK_LINE] π§΅ | |
| Follow-up Posts Format: | |
| Each follow-up post should: | |
| Start with an engaging hook related to the subject. | |
| Present 2-4 key points or insights from the transcript. | |
| Maintain narrative flow toward the conclusion. | |
| Closing Post Format: | |
| [KEY_TAKEAWAYS_OR_ADVICE]: | |
| [ACTIONABLE_POINT_1] | |
| [ACTIONABLE_POINT_2] | |
| [ACTIONABLE_POINT_3] | |
| [MEMORABLE_CLOSING_LINE] | |
| CRITICAL INSTRUCTIONS: | |
| 1. Do not include any markdown formatting in the posts. But include line breaks for better readability. | |
| 2. Do not include any hashtags in the posts. | |
| 3. Only the first post should have the π§΅ emoji. | |
| 4. Each post must be less than 280 characters. | |
| 5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute. | |
| """ | |
| # --- 7. Main Processing Function --- | |
| def create_video_thread( | |
| youtube_url: str, | |
| num_posts: int, | |
| video_type: str, | |
| subject_type: str, | |
| post_to_x: bool, | |
| twitter_api_key: str, | |
| twitter_api_secret: str, | |
| twitter_access_token: str, | |
| twitter_access_secret: str, | |
| progress=gr.Progress(track_tqdm=True) | |
| ): | |
| """ | |
| The main workflow function that powers the Gradio app. | |
| Orchestrates transcript fetching, AI content generation, video clipping, and posting. | |
| """ | |
| # --- HARDCODED API KEYS --- | |
| # WARNING: This is a security risk for public applications. | |
| supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d" | |
| gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ" | |
| try: | |
| # --- Stage 0: Validation & Setup --- | |
| progress(0, desc="π Starting...") | |
| if not all([youtube_url, num_posts, video_type, subject_type]): | |
| raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.") | |
| if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]): | |
| raise gr.Error("To post to X, all four X API keys are required.") | |
| yt_video_id = get_youtube_id(youtube_url) | |
| if not yt_video_id: | |
| raise gr.Error("Invalid YouTube URL. Could not extract video ID.") | |
| # --- Stage 1: Get Transcript --- | |
| progress(0.1, desc="π Fetching video transcript...") | |
| supadata = Supadata(api_key=supadata_api_key) | |
| transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en") | |
| if not transcript.content: | |
| raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.") | |
| transcript_arr = [ | |
| "{} [{} - {}]".format( | |
| chunk.text.strip().replace("\n", " "), | |
| ms_to_hhmmss(int(chunk.offset)), | |
| ms_to_hhmmss(int(chunk.offset) + int(chunk.duration)) | |
| ) | |
| for chunk in transcript.content | |
| ] | |
| # --- Stage 2: Generate Posts with LLM --- | |
| progress(0.25, desc="π€ Generating X thread with AI...") | |
| genai.configure(api_key=gemini_api_key) | |
| head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type) | |
| full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}""" | |
| model = genai.GenerativeModel('gemini-1.5-flash') | |
| response = model.generate_content( | |
| full_prompt, | |
| generation_config=genai.types.GenerationConfig(response_mime_type="application/json") | |
| ) | |
| structured_data = StructuredXPosts.model_validate_json(response.text) | |
| all_post_contents = structured_data.post_contents | |
| all_timestamps = structured_data.timestamps | |
| if not all_post_contents or not all_timestamps: | |
| raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.") | |
| # --- Stage 3: Download Video --- | |
| progress(0.5, desc="π₯ Downloading original YouTube video (this may take a moment)...") | |
| video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}" | |
| output_path_template = "downloaded_video.%(ext)s" | |
| ydl_opts = { | |
| 'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]', | |
| 'outtmpl': output_path_template, | |
| 'merge_output_format': 'mp4', | |
| 'quiet': True, | |
| } | |
| downloaded_filepath = "" | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| result = ydl.extract_info(video_url_full, download=True) | |
| base, _ = os.path.splitext(ydl.prepare_filename(result)) | |
| downloaded_filepath = base + '.mp4' | |
| if not os.path.exists(downloaded_filepath): | |
| raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}") | |
| # --- Stage 4: Clip Videos --- | |
| progress(0.7, desc="βοΈ Slicing video into clips...") | |
| video = VideoFileClip(downloaded_filepath) | |
| output_clips = [] | |
| for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")): | |
| try: | |
| start_str, end_str = r.split("-") | |
| start_sec = time_to_seconds(start_str.strip()) | |
| end_sec = time_to_seconds(end_str.strip()) | |
| if start_sec >= end_sec or end_sec > video.duration: continue | |
| subclip = video.subclip(start_sec, end_sec) | |
| clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4" | |
| subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None) | |
| output_clips.append(clip_output_path) | |
| except Exception as e: | |
| print(f"Skipping clip for timestamp '{r}' due to error: {e}") | |
| continue | |
| video.close() | |
| df = pd.DataFrame({ | |
| "Post Content": all_post_contents[:len(output_clips)], | |
| "Timestamp": all_timestamps[:len(output_clips)] | |
| }) | |
| # --- Stage 5: Post to X (Optional) --- | |
| tweet_links_md = "### Tweet URLs\n*Posting to X was not selected.*" | |
| if post_to_x: | |
| progress(0.9, desc="ποΈ Posting thread to X...") | |
| client = tweepy.Client( | |
| consumer_key=twitter_api_key, | |
| consumer_secret=twitter_api_secret, | |
| access_token=twitter_access_token, | |
| access_token_secret=twitter_access_secret | |
| ) | |
| auth = tweepy.OAuth1UserHandler( | |
| consumer_key=twitter_api_key, | |
| consumer_secret=twitter_api_secret, | |
| access_token=twitter_access_token, | |
| access_token_secret=twitter_access_secret | |
| ) | |
| api = tweepy.API(auth) | |
| previous_tweet_id = None | |
| tweet_links = [] | |
| user_info = client.get_me(user_fields=["username"]).data | |
| username = user_info.username | |
| for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"): | |
| media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True) | |
| tweet = client.create_tweet( | |
| text=df["Post Content"].iloc[i], | |
| media_ids=[media.media_id], | |
| in_reply_to_tweet_id=previous_tweet_id | |
| ) | |
| previous_tweet_id = tweet.data['id'] | |
| tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}") | |
| client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id) | |
| tweet_links_md = "### β Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)]) | |
| progress(1, desc="π Done!") | |
| # Clean up the main downloaded video immediately | |
| if os.path.exists(downloaded_filepath): | |
| os.remove(downloaded_filepath) | |
| # Note: Clip files will be automatically cleaned up by the background scheduler | |
| return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True) | |
| except Exception as e: | |
| traceback.print_exc() | |
| error_message = f"An error occurred: {e}" | |
| return error_message, pd.DataFrame(), [], gr.update(visible=False) | |
| # --- 8. Gradio UI Layout --- | |
| with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app: | |
| gr.Markdown("# π YouTube to X Thread Generator") | |
| gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 1. Input Video & Content Details") | |
| youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI") | |
| num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread") | |
| with gr.Row(): | |
| video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'") | |
| subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'") | |
| with gr.Accordion("π X/Twitter API Keys (Optional)", open=False): | |
| gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*") | |
| twitter_api_key = gr.Textbox(label="X API Key", type="password") | |
| twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password") | |
| twitter_access_token = gr.Textbox(label="X Access Token", type="password") | |
| twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password") | |
| with gr.Row(elem_id="action_buttons"): | |
| post_to_x_checkbox = gr.Checkbox(label="β Post Thread directly to X?", value=False) | |
| submit_btn = gr.Button("Generate Thread", variant="primary") | |
| with gr.Column(scale=3): | |
| gr.Markdown("### 2. Generated Content & Clips") | |
| status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True) | |
| posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True) | |
| clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto") | |
| tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False) | |
| submit_btn.click( | |
| fn=create_video_thread, | |
| inputs=[ | |
| youtube_url, num_posts, video_type, subject_type, | |
| post_to_x_checkbox, | |
| twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret | |
| ], | |
| outputs=[status_output, posts_output, clips_output, tweet_urls_output] | |
| ) | |
| if __name__ == "__main__": | |
| # Start the automatic video cleanup scheduler | |
| start_cleanup_scheduler() | |
| # Launch the app | |
| app.launch(debug=True, share=True) |