Spaces:

maria355
/

AI-Video-Script-and-Storyboard-Generator

Sleeping

App Files Files Community

AI-Video-Script-and-Storyboard-Generator / app.py

maria355

Update app.py

17840c3 verified 5 months ago

raw

history blame contribute delete

22 kB

	import streamlit as st
	import google.generativeai as genai
	from huggingface_hub import InferenceClient
	import requests
	from PIL import Image
	import io
	import json
	import time
	import zipfile
	import tempfile
	import os
	from gtts import gTTS

	# Configure page
	st.set_page_config(
	page_title="AI Video Script & Storyboard Generator",
	page_icon="🎬",
	layout="wide"
	)
	# Initialize session state
	if 'generated_script' not in st.session_state:
	st.session_state.generated_script = None
	if 'storyboard_images' not in st.session_state:
	st.session_state.storyboard_images = []
	if 'gif_preview' not in st.session_state:
	st.session_state.gif_preview = None

	# API Configuration
	def load_api_keys():
	"""Load API keys from secrets or environment"""
	try:
	gemini_api_key = st.secrets.get("GEMINI_API_KEY") or os.getenv("GEMINI_API_KEY")
	hf_token = st.secrets.get("HF_TOKEN") or os.getenv("HF_TOKEN")

	if not gemini_api_key or not hf_token:
	st.error("❌ API Keys not found. Please configure GEMINI_API_KEY and HF_TOKEN")
	st.stop()

	return gemini_api_key, hf_token
	except Exception as e:
	st.error(f"❌ Error loading API keys: {str(e)}")
	st.stop()

	# Load API keys
	gemini_api_key, hf_token = load_api_keys()

	# Configure Gemini API
	genai.configure(api_key=gemini_api_key)

	# Initialize Hugging Face client
	client = InferenceClient(token=hf_token)

	# Main title
	st.title("🎬 AI Video Script & Storyboard Generator")
	st.markdown("Create professional video scripts and visual storyboards with AI assistance")

	# Input section
	st.header("📝 Video Specifications")

	col1, col2 = st.columns(2)

	with col1:
	video_topic = st.text_area(
	"Video Topic",
	placeholder="Enter your video topic or detailed description...",
	height=100
	)

	video_length = st.selectbox(
	"Video Length",
	["30 seconds", "1 minute", "2 minutes", "3 minutes", "5 minutes", "Custom"]
	)

	if video_length == "Custom":
	custom_length = st.number_input("Custom length (seconds)", min_value=10, max_value=600, value=60)
	video_length = f"{custom_length} seconds"

	style = st.selectbox(
	"Video Style",
	["Explainer", "Cinematic", "Tutorial", "Vlog", "Animation", "Documentary", "Commercial"]
	)

	with col2:
	tone = st.selectbox(
	"Tone/Emotion",
	["Professional", "Funny", "Serious", "Dramatic", "Inspirational", "Casual", "Educational"]
	)

	platform = st.selectbox(
	"Target Platform",
	["YouTube", "TikTok", "Instagram Reels", "LinkedIn", "Presentation", "General"]
	)

	art_style = st.selectbox(
	"Storyboard Art Style",
	["Realistic", "Cartoon", "Cinematic", "Minimalistic", "Sketch", "Digital Art"]
	)

	# Functions for AI generation
	def generate_script_with_gemini(topic, length, style, tone, platform):
	"""Generate video script using Gemini API"""
	try:
	model = genai.GenerativeModel('gemini-1.5-flash')

	prompt = f"""
	Create a detailed video script for the following specifications:

	Topic: {topic}
	Length: {length}
	Style: {style}
	Tone: {tone}
	Platform: {platform}

	Format the output as JSON with the following structure:
	{{
	"title": "Video Title",
	"total_duration": "{length}",
	"scenes": [
	{{
	"scene_number": 1,
	"duration": "10 seconds",
	"description": "Visual description for storyboard",
	"dialogue": "Script/narration text",
	"camera_angle": "Wide shot/Close-up/etc",
	"visual_elements": "Key visual elements to include"
	}}
	]
	}}

	Make sure the scenes add up to the total duration and are engaging for {platform}.
	Include specific visual descriptions that can be used to generate storyboard images.
	Return only valid JSON, no additional text.
	"""

	response = model.generate_content(prompt)
	response_text = response.text.strip()

	# Clean JSON response
	if response_text.startswith("```json"):
	response_text = response_text[7:-3]
	elif response_text.startswith("```"):
	response_text = response_text[3:-3]

	script_data = json.loads(response_text)
	return script_data

	except Exception as e:
	st.error(f"Error generating script: {str(e)}")
	return generate_fallback_script(topic, length, style, tone, platform)

	def generate_fallback_script(topic, length, style, tone, platform):
	"""Generate a simple fallback script"""
	try:
	# Parse length
	if "second" in length.lower():
	total_seconds = int(length.split()[0])
	elif "minute" in length.lower():
	minutes = int(length.split()[0])
	total_seconds = minutes * 60
	else:
	total_seconds = 60

	# Create scenes
	num_scenes = max(3, min(8, total_seconds // 10)) # 3-8 scenes
	scene_duration = total_seconds // num_scenes

	scenes = []
	scene_types = ["opening", "main content", "detail", "conclusion"]

	for i in range(num_scenes):
	scene_type = scene_types[min(i, len(scene_types)-1)]
	scene = {
	"scene_number": i + 1,
	"duration": f"{scene_duration} seconds",
	"description": f"A {style.lower()} {scene_type} scene about {topic}, showing professional visuals in {art_style.lower()} style",
	"dialogue": f"Engaging {tone.lower()} narration about {topic} for scene {i+1}",
	"camera_angle": ["Wide shot", "Medium shot", "Close-up", "Over shoulder"][i % 4],
	"visual_elements": f"Professional visuals related to {topic}, {style.lower()} cinematography"
	}
	scenes.append(scene)

	return {
	"title": f"{topic} - {style} Video",
	"total_duration": length,
	"scenes": scenes
	}
	except Exception as e:
	st.error(f"Error creating fallback script: {str(e)}")
	return None

	def generate_storyboard_image_stable(scene_description, art_style, max_retries=3):
	"""Generate storyboard image with better error handling"""

	style_prompts = {
	"Realistic": "photorealistic, professional, high quality, detailed",
	"Cartoon": "cartoon style, animated, colorful, illustration, Disney-like",
	"Cinematic": "cinematic, dramatic lighting, film still, movie scene",
	"Minimalistic": "minimalist, clean, simple, modern design",
	"Sketch": "pencil sketch, hand-drawn, artistic, line art",
	"Digital Art": "digital art, concept art, vibrant colors, detailed"
	}

	# Create enhanced prompt
	base_prompt = f"{scene_description}"
	style_enhancement = style_prompts.get(art_style, "professional, high quality")
	enhanced_prompt = f"{base_prompt}, {style_enhancement}, storyboard frame"

	# Try different approaches
	approaches = [
	enhanced_prompt,
	f"storyboard illustration: {base_prompt}, {style_enhancement}",
	f"{base_prompt}, simple illustration, clean design"
	]

	for attempt, prompt in enumerate(approaches):
	try:
	# Use a more reliable model
	image = client.text_to_image(
	prompt,
	model="runwayml/stable-diffusion-v1-5" # More reliable model
	)

	if image and hasattr(image, 'size'):
	return image
	else:
	raise Exception("Invalid image returned")

	except Exception as e:
	if attempt < len(approaches) - 1:
	time.sleep(2) # Wait before retry
	continue
	else:
	# Create a placeholder image as last resort
	return create_placeholder_image(f"Scene: {scene_description[:50]}...")

	return None

	def create_placeholder_image(text):
	"""Create a placeholder image with text"""
	try:
	from PIL import Image, ImageDraw, ImageFont

	# Create a simple placeholder
	img = Image.new('RGB', (512, 384), color=(200, 200, 200))
	draw = ImageDraw.Draw(img)

	# Try to use default font
	try:
	font = ImageFont.load_default()
	except:
	font = None

	# Add text
	text_lines = text.split(' ')
	line_height = 30
	y_pos = 150

	for i in range(0, len(text_lines), 4): # 4 words per line
	line = ' '.join(text_lines[i:i+4])
	draw.text((50, y_pos), line, fill=(50, 50, 50), font=font)
	y_pos += line_height
	if y_pos > 300: # Don't overflow
	break

	return img
	except Exception:
	return None

	def create_gif_preview(images, script_data):
	"""Create a GIF preview"""
	try:
	# Filter valid images
	valid_images = [img for img in images if img is not None]
	if not valid_images:
	return None

	# Resize images
	target_size = (400, 300)
	resized_images = []

	for image in valid_images:
	try:
	resized_img = image.resize(target_size, Image.Resampling.LANCZOS)
	resized_images.append(resized_img)
	except Exception:
	continue

	if not resized_images:
	return None

	# Create GIF
	gif_buffer = io.BytesIO()
	resized_images[0].save(
	gif_buffer,
	format='GIF',
	save_all=True,
	append_images=resized_images[1:] if len(resized_images) > 1 else [],
	duration=2500, # 2.5 seconds per frame
	loop=0
	)
	gif_buffer.seek(0)
	return gif_buffer

	except Exception as e:
	st.error(f"Error creating GIF: {str(e)}")
	return None

	def text_to_speech(text, language='en'):
	"""Convert text to speech using gTTS"""
	try:
	# Limit text length to avoid issues
	if len(text) > 500:
	text = text[:500] + "..."

	tts = gTTS(text=text, lang=language, slow=False)
	audio_buffer = io.BytesIO()
	tts.write_to_fp(audio_buffer)
	audio_buffer.seek(0)
	return audio_buffer
	except Exception as e:
	st.error(f"Error generating speech: {str(e)}")
	return None

	def create_download_zip(images, script_data):
	"""Create a ZIP file with all content"""
	try:
	zip_buffer = io.BytesIO()

	with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
	# Add script as JSON
	script_json = json.dumps(script_data, indent=2)
	zip_file.writestr("script.json", script_json)

	# Add script as readable text
	script_text = f"Title: {script_data.get('title', '')}\n"
	script_text += f"Duration: {script_data.get('total_duration', '')}\n\n"

	for i, scene in enumerate(script_data.get('scenes', []), 1):
	script_text += f"=== SCENE {i} ===\n"
	script_text += f"Duration: {scene.get('duration', '')}\n"
	script_text += f"Camera: {scene.get('camera_angle', '')}\n"
	script_text += f"Description: {scene.get('description', '')}\n"
	script_text += f"Dialogue: {scene.get('dialogue', '')}\n"
	script_text += f"Visual Elements: {scene.get('visual_elements', '')}\n\n"

	zip_file.writestr("script.txt", script_text)

	# Add images
	for i, image in enumerate(images):
	if image:
	img_buffer = io.BytesIO()
	try:
	image.save(img_buffer, format='PNG')
	zip_file.writestr(f"scene_{i+1:02d}.png", img_buffer.getvalue())
	except Exception:
	continue

	zip_buffer.seek(0)
	return zip_buffer
	except Exception as e:
	st.error(f"Error creating ZIP file: {str(e)}")
	return None

	# Main generation button
	if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
	if not video_topic.strip():
	st.error("Please enter a video topic")
	else:
	# Generate script
	with st.spinner("🤖 Generating script with AI..."):
	script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)

	if script_data:
	st.session_state.generated_script = script_data
	st.success("✅ Script generated successfully!")

	# Generate storyboard images
	st.info("🎨 Generating storyboard images (this may take a few minutes)...")
	images = []

	# Create progress tracking
	progress_container = st.container()
	with progress_container:
	progress_bar = st.progress(0)
	status_text = st.empty()

	total_scenes = len(script_data['scenes'])

	for i, scene in enumerate(script_data['scenes']):
	status_text.text(f"Generating image {i+1}/{total_scenes}: Scene {i+1}")

	try:
	image = generate_storyboard_image_stable(
	scene['description'],
	art_style
	)
	images.append(image)

	if image:
	st.success(f"✅ Scene {i+1} generated successfully")
	else:
	st.warning(f"⚠️ Scene {i+1} failed, using placeholder")

	except Exception as e:
	st.error(f"❌ Error generating scene {i+1}: {str(e)}")
	images.append(None)

	progress_bar.progress((i + 1) / total_scenes)

	# Rate limiting
	if i < total_scenes - 1: # Don't wait after last image
	time.sleep(3) # Wait 3 seconds between requests

	status_text.text("✅ Storyboard generation complete!")

	st.session_state.storyboard_images = images
	st.success(f"✅ Generated {len([img for img in images if img is not None])} out of {len(images)} storyboard images!")

	else:
	st.error("Failed to generate script. Please try again.")

	# Display results
	if st.session_state.generated_script:
	script_data = st.session_state.generated_script

	st.header("📜 Generated Script")
	st.subheader(f"🎬 {script_data.get('title', 'Video Title')}")
	st.write(f"Duration: {script_data.get('total_duration', 'N/A')}")

	# Display script in tabs
	tab1, tab2, tab3 = st.tabs(["📝 Script Details", "🖼️ Storyboard", "📥 Export"])

	with tab1:
	for i, scene in enumerate(script_data.get('scenes', []), 1):
	with st.expander(f"Scene {i} - {scene.get('duration', 'N/A')}", expanded=False):
	col1, col2 = st.columns(2)

	with col1:
	st.write("Visual Description:")
	st.write(scene.get('description', 'N/A'))
	st.write("Camera Angle:")
	st.write(scene.get('camera_angle', 'N/A'))

	with col2:
	st.write("Dialogue/Narration:")
	st.write(scene.get('dialogue', 'N/A'))
	st.write("Visual Elements:")
	st.write(scene.get('visual_elements', 'N/A'))

	# Text-to-speech
	dialogue = scene.get('dialogue', '')
	if dialogue and st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
	with st.spinner("Generating audio..."):
	audio_buffer = text_to_speech(dialogue)
	if audio_buffer:
	st.audio(audio_buffer.getvalue(), format='audio/mp3')

	with tab2:
	if st.session_state.storyboard_images:
	st.subheader("🎨 Storyboard Images")

	# Show images in a grid
	cols_per_row = 2
	for i in range(0, len(st.session_state.storyboard_images), cols_per_row):
	cols = st.columns(cols_per_row)

	for j, col in enumerate(cols):
	idx = i + j
	if idx < len(st.session_state.storyboard_images):
	image = st.session_state.storyboard_images[idx]
	scene = script_data['scenes'][idx] if idx < len(script_data['scenes']) else {}

	with col:
	if image:
	st.image(image, caption=f"Scene {idx+1}", use_container_width=True)
	else:
	st.write(f"❌ Scene {idx+1} - Image failed to generate")

	st.write(f"Duration: {scene.get('duration', 'N/A')}")
	st.write(f"Description: {scene.get('description', 'N/A')[:100]}...")

	# GIF Preview section
	st.subheader("📱 Animated Preview")
	col1, col2 = st.columns([1, 2])

	with col1:
	if st.button("🎬 Create GIF Preview"):
	with st.spinner("Creating animated preview..."):
	gif_buffer = create_gif_preview(
	st.session_state.storyboard_images,
	script_data
	)
	if gif_buffer:
	st.session_state.gif_preview = gif_buffer
	st.success("GIF preview created!")
	else:
	st.error("Failed to create GIF preview")

	with col2:
	if st.session_state.gif_preview:
	st.image(st.session_state.gif_preview.getvalue(), caption="Storyboard Preview")
	else:
	st.info("Generate storyboard images first using the button above.")

	with tab3:
	st.subheader("📥 Download Options")

	col1, col2, col3 = st.columns(3)

	with col1:
	# Script download
	script_json = json.dumps(script_data, indent=2)
	st.download_button(
	label="📄 Download Script (JSON)",
	data=script_json,
	file_name=f"script_{int(time.time())}.json",
	mime="application/json"
	)

	with col2:
	# ZIP download
	if st.session_state.storyboard_images:
	zip_data = create_download_zip(st.session_state.storyboard_images, script_data)
	if zip_data:
	st.download_button(
	label="📦 Download Complete Package",
	data=zip_data.getvalue(),
	file_name=f"storyboard_package_{int(time.time())}.zip",
	mime="application/zip"
	)

	with col3:
	# GIF download
	if st.session_state.gif_preview:
	st.download_button(
	label="🎬 Download GIF Preview",
	data=st.session_state.gif_preview.getvalue(),
	file_name=f"storyboard_preview_{int(time.time())}.gif",
	mime="image/gif"
	)

	# Sidebar
	with st.sidebar:
	st.markdown("### 📚 How to Use")
	st.markdown("""
	1. Enter Details: Describe your video topic and preferences
	2. Generate: Click the generate button and wait
	3. Review: Check the script and storyboard images
	4. Export: Download your files
	""")

	st.markdown("### 🔧 Features")
	st.markdown("""
	- ✅ AI-powered script generation
	- ✅ Visual storyboard creation
	- ✅ Text-to-speech narration
	- ✅ GIF preview generation
	- ✅ Complete package download
	""")

	st.markdown("### 🐛 Debug Info")
	if st.button("🧪 Test Image Generation"):
	with st.spinner("Testing image generation..."):
	test_image = generate_storyboard_image_stable(
	"A simple test scene with a person",
	"Cartoon"
	)
	if test_image:
	st.image(test_image, caption="Test Image", width=200)
	st.success("✅ Image generation working!")
	else:
	st.error("❌ Image generation failed!")

	st.markdown("### ⚙️ Settings")
	st.markdown("""
	Required API Keys:
	- GEMINI_API_KEY
	- HF_TOKEN (Hugging Face)

	Models Used:
	- Script: Gemini 1.5 Flash
	- Images: Stable Diffusion v1.5
	- Speech: Google TTS
	""")

	# Footer
	st.markdown("---")
	st.markdown("🤖 Powered by: Gemini AI • Hugging Face • Google TTS")
	st.markdown("💡 Tips: Be specific in your descriptions for better results!")