import gradio as gr import cv2 import whisper import spacy from PIL import Image from diffusers import StableDiffusionPipeline #stable model to be updated if used import torch import logging import os import io # Disable WANDB logging and configure logging level logging.disable(logging.WARNING) os.environ["WANDB_DISABLED"] = "true" # Load models whisper_model = whisper.load_model("base") spacy.prefer_gpu() spacy_nlp = spacy.load("en_core_web_sm") #Initialize the model stable_diffusion_pipeline = StableDiffusionPipeline.from_pretrained( "CompVis/stable-diffusion-v1-2", torch_dtype=torch.float16 ).to("cuda" if torch.cuda.is_available() else "cpu") def extract_keyframes(video_path, frame_interval=30, num_frames=5): try: cap = cv2.VideoCapture(video_path) frames = [] success, frame = cap.read() count = 0 while success and count < num_frames: if count % frame_interval == 0: frames.append(frame) success, frame = cap.read() count += 1 cap.release() return frames except Exception as e: logging.error("Error extracting keyframes:", exc_info=e) return None def test_extract_keyframes(): video_path = "video.mp4" frames = extract_keyframes(video_path) assert frames is not None, "Keyframe extraction failed" assert len(frames) > 0, "No keyframes extracted" print("Keyframe extraction test passed") test_extract_keyframes() def transcribe_audio(video_path): try: result = whisper_model.transcribe(video_path) return result['text'] except Exception as e: logging.error("Error transcribing audio:", exc_info=e) return None def test_transcribe_audio(): video_path = "video.mp4" transcription = transcribe_audio(video_path) assert transcription is not None, "Transcription failed" assert len(transcription) > 0, "Empty transcription" print("Transcription test passed") test_transcribe_audio() def extract_keywords(text): try: if not text or not text.strip(): logging.warning("Empty or whitespace-only text: No keywords extracted") return [] doc = spacy_nlp(text) keywords = [chunk.text for chunk in doc.noun_chunks] if not keywords: logging.warning("No keywords extracted from the text") return keywords except Exception as e: logging.error("Error extracting keywords:", exc_info=e) return [] def test_extract_keywords(): text = "This is a test text for keyword extraction." keywords = extract_keywords(text) assert keywords is not None, "Keyword extraction failed" assert len(keywords) > 0, "No keywords extracted" print("Keyword extraction test passed") test_extract_keywords() def generate_thumbnails(frames, keywords, num_thumbnails=3): try: thumbnails = [] for frame in frames: for _ in range(num_thumbnails): prompt = "A visually striking image of " + ", ".join(keywords) generated_image = stable_diffusion_pipeline(prompt, init_image=frame).images[0] thumbnails.append(generated_image) return thumbnails except Exception as e: logging.exception("Error generating thumbnails:", exc_info=e) return None def process_video(video): try: # Determine the video path based on the type of input video_path = video.name if hasattr(video, 'name') else video # Extract Keyframes frames = extract_keyframes(video_path) if frames is None: return handle_error("Error extracting keyframes. Please check the video file.") # Transcribe Audio transcription = transcribe_audio(video_path) if transcription is None: return handle_error("Error transcribing audio. Please check the audio quality.") # Extract Keywords keywords = extract_keywords(transcription) if not keywords: return handle_error("Error extracting keywords. Please check the transcription.") # Use the first keyword as title, the full transcription as text, and a generic text placement description title = keywords[0] if keywords else "Thumbnail" text = transcription text_placement = "white letter center at bottom, modern and dynamic" # Generate Thumbnails thumbnail_images = generate_thumbnails(frames, keywords) if not thumbnail_images: return handle_error("Error generating thumbnails. Please try again later.") return thumbnail_images, "Thumbnails generated successfully." except Exception as e: logging.exception("Unexpected error:", exc_info=e) return handle_error("An unexpected error occurred. Please try again later.") def handle_error(error_message): # Return a placeholder image and the error message placeholder = Image.new('RGB', (512, 512), color = (255, 0, 0)) # Placeholder image (red square) return [placeholder], error_message # Gradio interface interface = gr.Interface( fn=process_video, inputs=gr.Video(label="Upload Video"), outputs=[ gr.Gallery(label="Generated Thumbnails"), gr.Textbox(label="Status", lines=2, placeholder="Status message will appear here...") ], title="YouTube Thumbnail Generator", description="Upload a video and generate multiple thumbnails using the video content and transcription.", live=True ) interface.launch()