SKRIPTZ / app.py
skylinkd's picture
Update app.py
bba812d verified
# -*- coding: utf-8 -*-
import os
import sys
import re
import random
# THIS IS THE FIX - PART 1
os.environ['GRADIO_SUPPRESS_PROGRESS'] = 'true'
# THIS IS THE FIX - PART 2: Clean up console logs from Gradio
import logging
logging.getLogger('gradio').setLevel(logging.ERROR)
import cv2
import numpy as np
import gradio as gr
from gradio import Progress
import shutil
import subprocess
from PIL import Image, ImageDraw, ImageFont, ImageOps
from datetime import datetime
from threading import Lock
import base64
import json
import io
# --- Dependency Check ---
try:
from controlnet_aux import (
CannyDetector, MLSDdetector, HEDdetector,
LineartDetector, OpenposeDetector, NormalBaeDetector
)
from gradio_client import Client
from rembg import remove
import librosa
except ImportError as e:
print("="*80)
print(f"ERROR: Missing dependency -> {e}")
print("Please install all required packages by running:")
print("pip install -r requirements.txt")
print("="*80)
sys.exit(1)
# --- AI Model Dependency Check ---
try:
import whisper
except ImportError:
print("="*80)
print("WARNING: 'openai-whisper' not installed. The Transcription tab will be disabled.")
print("To enable it, run: pip install -U openai-whisper")
print("="*80)
whisper = None
# --- Global Variables & Setup ---
TEMP_DIR = "temp_gradio"
os.makedirs(TEMP_DIR, exist_ok=True)
model_load_lock = Lock()
loaded_detectors = {}
whisper_model = None
# --- ACTION REQUIRED ---
# Please download a sound file, name it 'finish_sound.mp3',
# and place it in the same directory as this app.py file.
# A good one can be found here: https://cdn.pixabay.com/download/audio/2022/03/15/audio_2289a72212.mp3?filename=whistle-108713.mp3
# --- Default Presets for Transfer Tab (Flat Dictionary) ---
DEFAULT_LINK_PRESETS = {
# Text To Image
"FLUX.1-schnell (black-forest-labs)": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-schnell",
"FLUX.1-schnell (Rooc)": "https://huggingface.co/spaces/Rooc/FLUX.1-schnell",
"FLUX.1-schnell (evalstate)": "https://huggingface.co/spaces/evalstate/flux1_schnell",
"FLUX.1-schnell (hysts-mcp)": "https://huggingface.co/spaces/hysts-mcp/FLUX.1-schnell",
"FLUX.1-schnell (cbensimon)": "https://huggingface.co/spaces/cbensimon/FLUX-1-schnell-mcp",
"FLUX.1-dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-dev",
"FLUX.1-dev-quantized": "https://huggingface.co/spaces/multimodalart/FLUX.1-dev-quantized",
"FLUX.1-dev_NotASI": "https://huggingface.co/spaces/NotASI/FLUX.1-dev",
"FLUX.1-dev_hysts": "https://huggingface.co/spaces/hysts-mcp/FLUX.1-dev",
"HiDream-I1-Dev": "https://huggingface.co/spaces/HiDream-ai/HiDream-I1-Dev",
"UnfilteredAI-NSFW-gen-v2": "https://huggingface.co/spaces/armen425221356/UnfilteredAI-NSFW-gen-v2_self_parms",
"InfiniteYou-FLUX": "https://huggingface.co/spaces/ByteDance/InfiniteYou-FLUX",
"Stable Diffusion 3.5 Large (arad1367)": "https://huggingface.co/spaces/arad1367/Stable_Diffusion_3_5_Large_Customized",
"Stable Diffusion 3.5 Large Turbo (doevent)": "https://huggingface.co/spaces/doevent/stable-diffusion-3.5-large-turbo",
# Virtual Try-On & Character
"OutfitAnyone": "https://huggingface.co/spaces/HumanAIGC/OutfitAnyone",
"Kolors Virtual Try-On": "https://huggingface.co/spaces/Kwai-Kolors/Kolors-Virtual-Try-On",
"Miragic Virtual Try-On": "https://huggingface.co/spaces/Miragic-AI/Miragic-Virtual-Try-On",
"OutfitAnyway": "https://huggingface.co/spaces/selfit-camera/OutfitAnyway",
"IDM-VTON": "https://huggingface.co/spaces/yisol/IDM-VTON",
"InstantCharacter": "https://huggingface.co/spaces/InstantX/InstantCharacter",
"InstantID": "https://huggingface.co/spaces/InstantX/InstantID",
# AI Lip-Sync & Talking Avatars
"LivePortrait": "https://huggingface.co/spaces/Han-123/LivePortrait",
"LivePortrait (CPU)": "https://huggingface.co/spaces/K00B404/LivePortrait_cpu",
"D-ID Live Portrait AI": "https://www.d-id.com/liveportrait-4/",
"Synthesia Avatars": "https://www.synthesia.io/features/avatars",
"Papercup": "https://www.papercup.com/",
"Hedra": "https://www.hedra.com",
"LemonSlice": "https://lemonslice.com",
"Vozo AI": "https://www.vozo.ai/lip-sync",
"Gooey AI Lipsync": "https://gooey.ai/Lipsync",
"Sync.so": "https://sync.so",
"LipDub AI": "https://www.lipdub.ai",
"Magic Hour": "https://magichour.ai",
"Lifelike AI": "https://www.lifelikeai.io",
"DeepMotion": "https://www.deepmotion.com",
"Elai.io": "https://elai.io",
"Rephrase.ai": "https://www.rephrase.ai",
"Colossyan": "https://www.colossyan.com",
"HeyGen (Movio)": "https://www.heygen.com",
"Murf Studio": "https://murf.ai",
# Image Editing & Upscaling
"FLUX Fill/Outpaint": "https://huggingface.co/spaces/multimodalart/flux-fill-outpaint",
"ReSize Image Outpainting": "https://huggingface.co/spaces/VIDraft/ReSize-Image-Outpainting",
"IC-Light (Relighting)": "https://huggingface.co/spaces/lllyasviel/IC-Light",
"IC-Light v2-vary": "https://huggingface.co/spaces/lllyasviel/iclight-v2-vary",
"Kontext Relight": "https://huggingface.co/spaces/kontext-community/kontext-relight",
"SUPIR Upscaler": "https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR",
# Video Generation & FramePacks
"Framepacks (atunc29)": "https://huggingface.co/spaces/atunc29/Framepacks",
"Framepack i2v (ginigen)": "https://huggingface.co/spaces/ginigen/framepack-i2v",
"Framepack i2v (beowcow)": "https://huggingface.co/spaces/beowcow/framepack-i2v",
"Framepack i2v (lisonallen)": "https://huggingface.co/spaces/lisonallen/framepack-i2v",
"FramePack F1 (Latyrine)": "https://huggingface.co/spaces/Latyrine/FramePack-F1",
"FramePack F1 (linoyts)": "https://huggingface.co/spaces/linoyts/FramePack-F1",
"FramePack Rotate (tori29umai)": "https://huggingface.co/spaces/tori29umai/FramePack_rotate_landscape",
"FramePack Rotate (bep40)": "https://huggingface.co/spaces/bep40/FramePack_rotate_landscape",
"FramePack Rotate (VIDraft)": "https://huggingface.co/spaces/VIDraft/FramePack_rotate_landscape",
"Framepack-H111 (rahul7star)": "https://huggingface.co/spaces/rahul7star/Framepack-H111",
"FLUX.1 Kontext Dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-Kontext-Dev",
"Wan2-1-fast": "https://huggingface.co/spaces/multimodalart/wan2-1-fast",
"LTX-video-distilled": "https://huggingface.co/spaces/Lightricks/ltx-video-distilled",
"RunwayML": "https://app.runwayml.com/video-tools/teams/rinaabdine1/ai-tools/generate",
"Pika Labs": "https://pika.art/",
"Kling AI": "https://app.klingai.com/global/image-to-video/frame-mode",
# Video Interpolation & Slow Motion
"RIFE (remzloev)": "https://huggingface.co/spaces/remzloev/Rife",
"VFI Converter (Agung1453)": "https://huggingface.co/spaces/Agung1453/Video-Frame-Interpolation-Converter",
"ZeroGPU Upscaler/Interpolation": "https://huggingface.co/spaces/inoculatemedia/zerogpu-upscaler-interpolation",
"Frame Interpolation (meta-artem)": "https://huggingface.co/spaces/meta-artem/frame-interpolation",
"Video Frame Interpolation (guardiancc)": "https://huggingface.co/spaces/guardiancc/video_frame_interpolation",
"Video Frame Interpolation (freealise)": "https://huggingface.co/spaces/freealise/video_frame_interpolation",
"Framer (wwen1997)": "https://huggingface.co/spaces/wwen1997/Framer",
"Inter4k VideoInterpolator": "https://huggingface.co/spaces/vimleshc57/Inter4k_VideoInterpolator",
# AnimateDiff & Advanced Animation
"AnimateDiff Lightning (ByteDance)": "https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning",
"AnimateDiff Lightning (SahaniJi)": "https://huggingface.co/spaces/SahaniJi/AnimateDiff-Lightning",
"AnimateDiff (fatima14)": "https://huggingface.co/spaces/fatima14/AnimateDiff",
"AnimateDiff Video Gen (faizanR)": "https://huggingface.co/spaces/faizanR/animatediff-video-generator",
"Text-to-Animation Fast (MisterProton)": "https://huggingface.co/spaces/MisterProton/text-to-Animation-Fast-AnimateDiff",
"Text-to-Animation Fast (Rowdy013)": "https://huggingface.co/spaces/Rowdy013/text-to-Animation-Fast",
# StyleGAN & Portrait Motion
"StyleGAN-Human Interpolation (hysts)": "https://huggingface.co/spaces/hysts/StyleGAN-Human-Interpolation",
"StyleGAN-Human (Gradio-Blocks)": "https://huggingface.co/spaces/Gradio-Blocks/StyleGAN-Human",
# Film & Style Models
"MGM-Film-Diffusion (tonyassi)": "https://huggingface.co/spaces/tonyassi/MGM-Film-Diffusion",
"CineDiffusion (takarajordan)": "https://huggingface.co/spaces/takarajordan/CineDiffusion",
"FLUX Film Foto (MartsoBodziu1994)": "https://huggingface.co/spaces/MartsoBodziu1994/alvdansen-flux_film_foto",
"FLUX Style Shaping": "https://huggingface.co/spaces/multimodalart/flux-style-shaping",
"Film (Stijnijzelenberg)": "https://huggingface.co/spaces/Stijnijzelenberg/film",
"Film Eras (abbiewoodbridge)": "https://huggingface.co/spaces/abbiewoodbridge/Film_Eras",
"Film Genre Classifier (Rezuwan)": "https://huggingface.co/spaces/Rezuwan/film_genre_classifier",
"RunwayML (Faizbulbul)": "https://huggingface.co/spaces/Faizbulbul/Runwaymlfaiz",
# Text-to-3D
"Step1X-3D": "https://huggingface.co/spaces/stepfun-ai/Step1X-3D",
"TRELLIS TextTo3D (PUM4CH3N)": "https://huggingface.co/spaces/PUM4CH3N/TRELLIS_TextTo3D",
"TRELLIS TextTo3D (cavargas10)": "https://huggingface.co/spaces/cavargas10/TRELLIS-Texto3D",
"TRELLIS TextTo3D (dkatz2391)": "https://huggingface.co/spaces/dkatz2391/TRELLIS_TextTo3D_Try2",
"Sparc3D": "https://huggingface.co/spaces/ilcve21/Sparc3D",
"Hunyuan3D-2.1": "https://huggingface.co/spaces/tencent/Hunyuan3D-2.1",
# Image Captioning & Interrogation
"BLIP-2 (hysts)": "https://huggingface.co/spaces/hysts/BLIP2",
"BLIP-3o": "https://huggingface.co/spaces/BLIP3o/blip-3o",
"Blip-Dalle3 (DarwinAnim8or)": "https://huggingface.co/spaces/DarwinAnim8or/Blip-Dalle3",
"BLIP API (Jonu1)": "https://huggingface.co/spaces/Jonu1/blip-image-captioning-api",
"BLIP API (muxiddin19)": "https://huggingface.co/spaces/muxiddin19/blip-image-captioning-api",
# Diffusion & Sketching Tools
"DiffSketcher (SVGRender)": "https://huggingface.co/spaces/SVGRender/DiffSketcher",
"Diffusion WikiArt (kaupane)": "https://huggingface.co/spaces/kaupane/diffusion-wikiart",
"Diffusers Image Fill (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-image-fill",
"Diffusers Fast Inpaint (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-fast-inpaint",
# Audio & Voice Tools
"ThinkSound (FunAudioLLM)": "https://huggingface.co/spaces/FunAudioLLM/ThinkSound",
"TTS Unlimited (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited",
"Voice Clon (tonyassi)": "https://huggingface.co/spaces/tonyassi/voice-clon",
# Scripting & Writing Tools
"SKRIPTZ (skylinkd)": "https://huggingface.co/spaces/skylinkd/SKRIPTZ",
# AI Frameworks & Platforms
"Hugging Face Hub": "https://huggingface.co",
"Hugging Face Transformers": "https://huggingface.co/docs/transformers/en/index",
"Hugging Face Inference API": "https://huggingface.co/inference-api/",
# Miscellaneous Video Tools
"SpatialTrackerV2 (Yuxihenry)": "https://huggingface.co/spaces/Yuxihenry/SpatialTrackerV2",
"MTVCraft (BAAI)": "https://huggingface.co/spaces/BAAI/MTVCraft",
# Miscellaneous Tools
"EBSynth (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/EBSynth",
"MoodSpace (huzey)": "https://huggingface.co/spaces/huzey/MoodSpace",
"TR0N (Layer6)": "https://huggingface.co/spaces/Layer6/TR0N",
"TUTOR (nathannarrik)": "https://huggingface.co/spaces/nathannarrik/TUTOR",
"Sport Model 1 (CHEN11102)": "https://huggingface.co/spaces/CHEN11102/sportmodel1",
"VBench Leaderboard (Vchitect)": "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
}
# --- Model Loading ---
DETECTOR_CONFIG = {
"Canny": {"class": CannyDetector, "args": {}},
"Lineart": {"class": LineartDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"MLSD": {"class": MLSDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"OpenPose": {"class": OpenposeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"NormalBAE": {"class": NormalBaeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"SoftEdge (HED)": {"class": HEDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
}
def get_detector(name):
with model_load_lock:
if name not in loaded_detectors:
print(f"Loading {name} model...")
config = DETECTOR_CONFIG[name]
if "pretrained_model_or_path" in config["args"]:
detector_class = config["class"]
loaded_detectors[name] = detector_class.from_pretrained(**config["args"])
else:
loaded_detectors[name] = config["class"](**config["args"])
print(f"{name} model loaded.")
return loaded_detectors[name]
def load_whisper_model(model_name="base"):
global whisper_model
if whisper:
with model_load_lock:
if whisper_model is None or whisper_model.name != model_name:
print(f"Loading Whisper model '{model_name}'... (This may download files on first run)")
whisper_model = whisper.load_model(model_name)
print("Whisper model loaded.")
return whisper_model
return None
get_detector("Canny") # Pre-load Canny detector
# --- Utility Functions ---
def parse_color(color_str):
"""
Parses a color string from Gradio's ColorPicker.
It can handle hex strings ('#RRGGBB') or the problematic
rgba float format ('rgba(r,g,b,a)').
Returns a tuple (r, g, b) for PIL.
"""
if not isinstance(color_str, str):
return color_str # Should already be a tuple or other valid format
if color_str.startswith('rgba'):
parts = re.findall(r"[\d\.]+", color_str)
if len(parts) >= 3:
# Convert float parts to integers
return (int(float(parts[0])), int(float(parts[1])), int(float(parts[2])))
# If it's a hex string or a named color, PIL can handle it directly.
return color_str
def rotate_image(image, rotation):
if rotation == "90 Degrees Clockwise":
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
elif rotation == "90 Degrees Counter-Clockwise":
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
elif rotation == "180 Degrees":
return cv2.rotate(image, cv2.ROTATE_180)
return image
def manipulate_image(image, operation):
if image is None:
raise gr.Error("Please upload an image first.")
if operation == "Invert Colors":
return cv2.bitwise_not(image)
elif operation == "Flip Horizontal":
return cv2.flip(image, 1)
elif operation == "Flip Vertical":
return cv2.flip(image, 0)
elif operation == "Rotate 90Β° Right":
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
elif operation == "Rotate 90Β° Left":
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
else:
return image
def manipulate_video(video_path, operation):
if not video_path:
raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"manipulated_video_{timestamp}.mp4")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise gr.Error("Error opening video file.")
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_width, out_height = width, height
if operation in ["Rotate 90Β° Right", "Rotate 90Β° Left"]:
out_width, out_height = height, width
writer = cv2.VideoWriter(output_video_path, fourcc, fps, (out_width, out_height))
for _ in range(frame_count):
ret, frame = cap.read()
if not ret:
break
processed_frame = manipulate_image(frame, operation)
writer.write(processed_frame)
cap.release()
writer.release()
return output_video_path
def get_media_duration(media_path):
if not media_path: return 0.0
try:
cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", media_path]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return float(result.stdout.strip())
except Exception as e:
print(f"Could not get duration for {media_path}: {e}")
return 0.0
def get_video_dimensions(video_path):
if not video_path: return 0, 0
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): return 0, 0
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
except Exception: return 0, 0
def get_video_fps(video_path):
if not video_path: return 24.0
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): return 24.0
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
return fps if fps > 0 else 24.0
except Exception: return 24.0
def has_audio_stream(video_path):
"""Checks if a video file has at least one audio stream."""
if not video_path:
return False
try:
cmd = [
"ffprobe", "-v", "error", "-select_streams", "a",
"-show_entries", "stream=codec_type", "-of",
"default=noprint_wrappers=1:nokey=1", video_path
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout.strip() != ""
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def run_ffmpeg_command(cmd, desc="Processing with FFMPEG..."):
try:
print(f"Running FFMPEG command: {' '.join(cmd)}")
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
check=False
)
if process.returncode != 0:
full_output = f"--- FFMPEG & GRADIO ERROR LOG ---\n\n" \
f"FFMPEG COMMAND:\n{' '.join(cmd)}\n\n" \
f"FFMPEG STDERR:\n{process.stderr}\n\n" \
f"FFMPEG STDOUT:\n{process.stdout}"
raise subprocess.CalledProcessError(process.returncode, cmd, output=full_output)
except subprocess.CalledProcessError as e:
raise gr.Error(f"FFMPEG failed!\n\nDetails:\n{e.output}")
except FileNotFoundError:
raise gr.Error("FFMPEG not found. Please ensure ffmpeg is installed and in your system's PATH.")
def batch_image_processor(files, processing_function, job_name, **kwargs):
if not files: raise gr.Error("Please upload at least one image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for file_obj in files:
try:
base, _ = os.path.splitext(os.path.basename(file_obj.name))
if job_name == "zoom_videos":
output_filename = f"{base}.mp4"
elif job_name == "bg_removed":
output_filename = f"{base}.png"
else:
output_filename = os.path.basename(file_obj.name)
output_path = os.path.join(job_temp_dir, output_filename)
processing_function(input_path=file_obj.name, output_path=output_path, **kwargs)
output_paths.append(output_path)
except Exception as e:
print(f"Skipping file {file_obj.name} due to error: {e}")
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No images could be processed from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path, job_temp_dir
def process_batch_images_with_detector(files, detector_name):
detector = get_detector(detector_name)
def apply_detector(input_path, output_path, **kwargs):
with Image.open(input_path).convert("RGB") as img:
processed = detector(img, detect_resolution=512, image_resolution=1024)
processed.save(output_path)
output_paths, zip_path, _ = batch_image_processor(files, apply_detector, f"controlnet_{detector_name}")
return output_paths, zip_path
def process_video_with_detector(video_path, detector_name):
if not video_path: raise gr.Error("Please upload a video first.")
detector = get_detector(detector_name)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"job_{timestamp}")
input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames")
os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True)
output_video_path = os.path.join(TEMP_DIR, f"{detector_name.lower()}_output_{timestamp}.mp4")
cap = cv2.VideoCapture(video_path)
frame_count, frame_rate = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path)
for i in range(frame_count):
success, frame = cap.read()
if not success: break
cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame)
cap.release()
input_files = sorted(os.listdir(input_frames_dir))
for filename in input_files:
with Image.open(os.path.join(input_frames_dir, filename)).convert("RGB") as image:
result_pil = detector(image, detect_resolution=512, image_resolution=1024)
result_np = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR)
cv2.imwrite(os.path.join(output_frames_dir, filename), result_np)
cmd = ["ffmpeg", "-framerate", str(frame_rate), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling Video")
shutil.rmtree(job_temp_dir)
return output_video_path
def extract_first_last_frame(video_path):
if not video_path: raise gr.Error("Please upload a video first.")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): raise gr.Error("Failed to open video file.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count < 1:
cap.release()
raise gr.Error("Video has no frames.")
if frame_count < 2:
success, frame_img = cap.read()
cap.release()
if not success: raise gr.Error("Could not read the only frame.")
frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)
return [frame_rgb, frame_rgb.copy()]
success, first_frame_img = cap.read()
if not success: raise gr.Error("Could not read the first frame.")
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count - 1)
success, last_frame_img = cap.read()
if not success: raise gr.Error("Could not read the last frame.")
cap.release()
return [cv2.cvtColor(first_frame_img, cv2.COLOR_BGR2RGB), cv2.cvtColor(last_frame_img, cv2.COLOR_BGR2RGB)]
def video_to_frames_extractor(video_path, skip_rate, rotation, do_resize, out_w, out_h, out_format, jpg_quality):
if not video_path: raise gr.Error("Please upload a video first.")
if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): raise gr.Error("Failed to open video file.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count < 1: cap.release(); raise gr.Error("Video appears to have no frames.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"v2f_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
frame_paths = []
saved_count = 0
for i in range(frame_count):
success, frame = cap.read()
if not success: break
if i % skip_rate != 0: continue
frame = rotate_image(frame, rotation)
if do_resize: frame = cv2.resize(frame, (out_w, out_h), interpolation=cv2.INTER_LANCZOS4)
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
file_ext = out_format.lower()
frame_path = os.path.join(job_temp_dir, f"frame_{saved_count:05d}.{file_ext}")
if out_format == "JPG": frame_pil.save(frame_path, quality=jpg_quality)
else: frame_pil.save(frame_path)
frame_paths.append(frame_path)
saved_count += 1
cap.release()
if not frame_paths: shutil.rmtree(job_temp_dir); raise gr.Error("Could not extract any frames.")
zip_base_name = os.path.join(TEMP_DIR, f"frames_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return frame_paths[:100], zip_path
def create_video_from_frames(files, fps, rotation, do_resize, out_w, out_h):
if not files: raise gr.Error("Please upload frame images first.")
if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"f2v_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
filenames = []
for i, file in enumerate(files):
ext = os.path.splitext(file.name)[1]
temp_path = os.path.join(job_temp_dir, f"frame_{i:05d}{ext}")
shutil.copy(file.name, temp_path); filenames.append(temp_path)
output_video_path = os.path.join(TEMP_DIR, f"video_from_frames_{timestamp}.mp4")
first_frame_img = rotate_image(cv2.imread(filenames[0]), rotation)
h, w, _ = first_frame_img.shape
if do_resize: w, h = out_w, out_h
w -= w % 2; h -= h % 2
temp_processed_dir = os.path.join(job_temp_dir, "processed"); os.makedirs(temp_processed_dir, exist_ok=True)
for i, filename in enumerate(filenames):
frame = rotate_image(cv2.imread(filename), rotation)
frame = cv2.resize(frame, (w, h), interpolation=cv2.INTER_LANCZOS4)
cv2.imwrite(os.path.join(temp_processed_dir, f"pframe_{i:05d}.png"), frame)
cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(temp_processed_dir, "pframe_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling Video")
shutil.rmtree(job_temp_dir)
return output_video_path
def image_to_looping_video(image_array, duration, audio_path=None):
if image_array is None: raise gr.Error("Please upload an image first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
temp_image_path = os.path.join(TEMP_DIR, f"temp_image_{timestamp}.png")
output_video_path = os.path.join(TEMP_DIR, f"looping_video_{timestamp}.mp4")
img = Image.fromarray(image_array)
img.save(temp_image_path)
width, height = img.size
width -= width % 2; height -= height % 2
cmd = ["ffmpeg", "-loop", "1", "-i", temp_image_path]
if audio_path:
cmd.extend(["-i", audio_path, "-c:a", "aac", "-shortest"])
cmd.extend(["-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-vf", f"scale={width}:{height}", "-y", output_video_path])
run_ffmpeg_command(cmd, "Creating Looping Video...")
os.remove(temp_image_path)
return output_video_path
def create_zoom_videos(files, duration, zoom_ratio, zoom_direction, combine_videos, audio_path=None):
if not files:
raise gr.Error("Please upload at least one image.")
fps = 30
total_frames = int(duration * fps)
zoom_step = (zoom_ratio - 1.0) / total_frames
zoom_coords = {
"Center": "x=iw/2-(iw/zoom)/2:y=ih/2-(ih/zoom)/2", "Top": "x=iw/2-(iw/zoom)/2:y=0", "Bottom": "x=iw/2-(iw/zoom)/2:y=ih-(ih/zoom)",
"Left": "x=0:y=ih/2-(ih/zoom)/2", "Right": "x=iw-(iw/zoom):y=ih/2-(ih/zoom)/2", "Top-Left": "x=0:y=0",
"Top-Right": "x=iw-(iw/zoom):y=0", "Bottom-Left": "x=0:y=ih-(ih/zoom)", "Bottom-Right": "x=iw-(iw/zoom):y=ih-(ih/zoom)",
}
def process_single_image(input_path, output_path, **kwargs):
audio_for_clip = kwargs.get('audio_for_clip')
zoom_filter = (f"scale=3840:-1,zoompan=z='min(zoom+{zoom_step},{zoom_ratio})':{zoom_coords[zoom_direction]}:d={total_frames}:s=1920x1080:fps={fps}")
cmd = ["ffmpeg", "-loop", "1", "-i", input_path]
if audio_for_clip:
cmd.extend(["-i", audio_for_clip, "-c:a", "aac", "-shortest"])
cmd.extend(["-vf", zoom_filter, "-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-b:v", "5M", "-y", output_path])
run_ffmpeg_command(cmd, f"Creating zoom video for {os.path.basename(input_path)}")
batch_kwargs = {}
if not combine_videos and audio_path:
batch_kwargs['audio_for_clip'] = audio_path
video_paths, zip_path, job_temp_dir = batch_image_processor(files, process_single_image, "zoom_videos", **batch_kwargs)
if not combine_videos:
return video_paths, None, zip_path
if not video_paths:
raise gr.Error("No videos were created to be combined.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
silent_combined_path = os.path.join(job_temp_dir, f"combined_silent_{timestamp}.mp4")
if len(video_paths) > 1:
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in video_paths:
f.write(f"file '{os.path.abspath(path)}'\n")
run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_combined_path], "Combining Videos")
else:
shutil.copy(video_paths[0], silent_combined_path)
if audio_path:
final_video_path = os.path.join(TEMP_DIR, f"combined_audio_{timestamp}.mp4")
run_ffmpeg_command(["ffmpeg", "-i", silent_combined_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", final_video_path], "Adding audio...")
else:
final_video_path = os.path.join(TEMP_DIR, f"combined_final_{timestamp}.mp4")
shutil.move(silent_combined_path, final_video_path)
return None, final_video_path, zip_path
def change_video_speed(video_path, speed_multiplier):
if not video_path: raise gr.Error("Please upload a video first.")
if speed_multiplier <= 0: raise gr.Error("Speed multiplier must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"speed_change_{timestamp}.mp4")
pts_value = 1 / speed_multiplier
cmd = ["ffmpeg", "-i", video_path, "-filter:v", f"setpts={pts_value}*PTS", "-an", "-y", output_video_path]
run_ffmpeg_command(cmd, "Changing Video Speed")
return output_video_path
def reverse_video(video_path, audio_option):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"reversed_video_{timestamp}.mp4")
filters = ["reverse"]
if audio_option == "Reverse Audio": filters.append("areverse")
cmd = ["ffmpeg", "-i", video_path, "-vf", filters[0]]
if len(filters) > 1: cmd.extend(["-af", filters[1]])
if audio_option == "Remove Audio": cmd.append("-an")
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path])
run_ffmpeg_command(cmd, "Reversing video...")
return output_video_path
def add_audio_to_video(video_path, audio_path):
if not video_path: raise gr.Error("Please upload a video.")
if not audio_path: raise gr.Error("Please upload an audio file.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"video_with_audio_{timestamp}.mp4")
cmd = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path]
run_ffmpeg_command(cmd, "Adding Audio to Video")
return output_video_path
def extract_audio(video_path, audio_format="mp3"):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_audio_path = os.path.join(TEMP_DIR, f"extracted_audio_{timestamp}.{audio_format}")
cmd = ["ffmpeg", "-i", video_path, "-vn"] # -vn strips video
if audio_format == "mp3": cmd.extend(["-c:a", "libmp3lame", "-q:a", "2"]) # VBR quality
elif audio_format == "aac": cmd.extend(["-c:a", "aac", "-b:a", "192k"])
elif audio_format == "wav": cmd.extend(["-c:a", "pcm_s16le"])
cmd.extend(["-y", output_audio_path])
run_ffmpeg_command(cmd, "Extracting audio...")
return output_audio_path
def create_gif_from_video(video_path, start_time, end_time):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_gif_path, palette_path = os.path.join(TEMP_DIR, f"video_to_gif_{timestamp}.gif"), os.path.join(TEMP_DIR, f"palette_{timestamp}.png")
duration_filter = []
if start_time > 0 or end_time > 0:
if end_time > 0 and end_time <= start_time: raise gr.Error("End time must be after start time.")
if start_time > 0: duration_filter.extend(["-ss", str(start_time)])
if end_time > 0: duration_filter.extend(["-to", str(end_time)])
run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-vf", "fps=15,scale=480:-1:flags=lanczos,palettegen", "-y", palette_path])
run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-i", palette_path, "-filter_complex", "fps=15,scale=480:-1:flags=lanczos[x];[x][1:v]paletteuse", "-y", output_gif_path])
os.remove(palette_path)
return output_gif_path
def get_frame_at_time(video_path, time_in_seconds=0):
if not video_path: return None
try:
command = ['ffmpeg', '-ss', str(time_in_seconds), '-i', video_path, '-vframes', '1', '-f', 'image2pipe', '-c:v', 'png', '-']
pipe = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return Image.open(io.BytesIO(pipe.stdout)).convert("RGB")
except Exception as e:
print(f"Error extracting frame for crop preview: {e}")
cap = cv2.VideoCapture(video_path); cap.set(cv2.CAP_PROP_POS_MSEC, time_in_seconds * 1000)
success, frame = cap.read(); cap.release()
if success: return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
return None
def crop_video(video_path, x, y, w, h, do_resize, out_w, out_h):
if not video_path: raise gr.Error("Please upload a video first.")
w, h, x, y = int(w), int(h), int(x), int(y)
w -= w % 2; h -= h % 2
if w <= 0 or h <= 0: raise gr.Error("Crop dimensions must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"cropped_video_{timestamp}.mp4")
vf_filters = [f"crop={w}:{h}:{x}:{y}"]
if do_resize:
if out_w <= 0 or out_h <= 0: raise gr.Error("Resize dimensions must be positive.")
out_w, out_h = int(out_w), int(out_h)
out_w -= out_w % 2; out_h -= out_h % 2
vf_filters.append(f"scale={out_w}:{out_h}")
cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(vf_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Cropping video...")
return output_video_path
def trim_video(video_path, start_time, end_time):
if not video_path: raise gr.Error("Please upload a video first.")
if start_time < 0: start_time = 0
if end_time <= start_time: end_time = 0
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"trimmed_video_{timestamp}.mp4")
cmd = ["ffmpeg", "-i", video_path, "-ss", str(start_time)]
if end_time > 0: cmd.extend(["-to", str(end_time)])
cmd.extend(["-c:v", "libx264", "-c:a", "copy", "-pix_fmt", "yuv420p", "-y", output_video_path])
run_ffmpeg_command(cmd, "Trimming Video")
return output_video_path
def apply_video_watermark(video_path, text, position, opacity, size_scale, color):
if not video_path: raise gr.Error("Please upload a video first.")
if not text: raise gr.Error("Watermark text cannot be empty.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"watermarked_video_{timestamp}.mp4")
_ , video_h = get_video_dimensions(video_path)
if video_h == 0:
video_h = 720 # Fallback
escaped_text = text.replace("'", r"'\''").replace(":", r"\:").replace(",", r"\,")
pos_map = {"Top-Left": "x=20:y=20", "Top-Right": "x=w-tw-20:y=20", "Bottom-Left": "x=20:y=h-th-20", "Bottom-Right": "x=w-tw-20:y=h-th-20", "Center": "x=(w-tw)/2:y=(h-th)/2"}
font_opacity = opacity / 100.0
font_size = int(video_h / (50 - (size_scale * 3.5)))
drawtext_filter = (
f"drawtext="
f"text='{escaped_text}':"
f"{pos_map[position]}:"
f"fontsize={font_size}:"
f"fontcolor={color}@{font_opacity}"
)
cmd = [
"ffmpeg", "-i", video_path,
"-vf", drawtext_filter,
"-c:a", "copy",
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-y", output_video_path
]
run_ffmpeg_command(cmd, "Applying text watermark...")
return output_video_path
def remove_video_background(video_path):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"bg_rem_job_{timestamp}"); input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames")
os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True)
cap = cv2.VideoCapture(video_path); frame_count, fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path)
for i in range(frame_count):
success, frame = cap.read()
if not success: break
cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame)
cap.release()
for filename in sorted(os.listdir(input_frames_dir)):
with Image.open(os.path.join(input_frames_dir, filename)) as img:
remove(img).save(os.path.join(output_frames_dir, filename))
output_video_path = os.path.join(TEMP_DIR, f"bg_removed_{timestamp}.webm")
cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p", "-auto-alt-ref", "0", "-b:v", "1M", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling transparent video...")
shutil.rmtree(job_temp_dir)
return output_video_path
def transcribe_media(media_path, model_name):
if media_path is None: raise gr.Error("Please upload a video or audio file first.")
model = load_whisper_model(model_name)
if model is None: raise gr.Error("Whisper model is not available.")
audio_path = media_path.name
base_name = os.path.splitext(os.path.basename(media_path.name))[0]
if audio_path.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')):
audio_path_temp = os.path.join(TEMP_DIR, f"{base_name}.mp3")
try:
run_ffmpeg_command(["ffmpeg", "-i", audio_path, "-q:a", "0", "-map", "a", "-y", audio_path_temp])
audio_path = audio_path_temp
except gr.Error as e:
if "does not contain any stream" in str(e): raise gr.Error("The uploaded video has no audio track.")
else: raise e
result = model.transcribe(audio_path, verbose=False)
def format_ts(s):
h, r = divmod(s, 3600); m, s = divmod(r, 60)
return f"{int(h):02}:{int(m):02}:{int(s):02},{int((s-int(s))*1000):03}"
srt_path = os.path.join(TEMP_DIR, f"{base_name}.srt")
vtt_path = os.path.join(TEMP_DIR, f"{base_name}.vtt")
with open(srt_path, "w", encoding="utf-8") as srt_f, open(vtt_path, "w", encoding="utf-8") as vtt_f:
vtt_f.write("WEBVTT\n\n")
for i, seg in enumerate(result["segments"]):
start, end, text = seg['start'], seg['end'], seg['text'].strip()
srt_f.write(f"{i + 1}\n{format_ts(start)} --> {format_ts(end)}\n{text}\n\n")
vtt_f.write(f"{format_ts(start).replace(',', '.')} --> {format_ts(end).replace(',', '.')}\n{text}\n\n")
return result["text"], [srt_path, vtt_path]
def transcribe_and_prep_burn(media_file, model_name):
if not media_file: raise gr.Error("Please upload a file first.")
is_video = media_file.name.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm'))
text, files = transcribe_media(media_file, model_name)
if is_video: return text, files, media_file.name, gr.update(visible=True)
else: return text, files, None, gr.update(visible=False)
def burn_subtitles(video_path, srt_file_obj, font_size_scale, font_color):
if not video_path: raise gr.Error("Original video path not found. Please re-transcribe.")
if not srt_file_obj or not srt_file_obj[0].name: raise gr.Error("SRT file not found. Please re-transcribe.")
srt_path = srt_file_obj[0].name
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"subtitled_video_{timestamp}.mp4")
_, video_h = get_video_dimensions(video_path)
if video_h == 0: video_h = 720
divisor = 32 - (font_size_scale * 2)
calculated_font_size = int(video_h / divisor)
color_bgr = font_color[5:7] + font_color[3:5] + font_color[1:3]
ffmpeg_color = f"&H00{color_bgr.upper()}"
escaped_srt_path = srt_path.replace('\\', '/').replace(':', '\\:')
vf_filter = f"subtitles='{escaped_srt_path}':force_style='Fontsize={calculated_font_size},PrimaryColour={ffmpeg_color},BorderStyle=1,Outline=1,Shadow=0.5,MarginV=15'"
cmd = ["ffmpeg", "-i", video_path, "-vf", vf_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Burning subtitles into video...")
return output_video_path
def remove_background_single(input_path, output_path, **kwargs):
with Image.open(input_path) as img:
remove(img).save(output_path)
def remove_background_batch(files):
output_paths, zip_path, _ = batch_image_processor(files, remove_background_single, "bg_removed")
return output_paths, zip_path
def resize_convert_single_image(input_path, output_path, **kwargs):
output_format = kwargs.get('output_format', 'JPG')
quality = kwargs.get('quality', 95)
enable_resize = kwargs.get('enable_resize', False)
max_w = kwargs.get('max_w', 1024)
max_h = kwargs.get('max_h', 1024)
resize_mode = kwargs.get('resize_mode', "Fit (preserve aspect ratio)")
with Image.open(input_path) as img:
if output_format in ['JPG', 'WEBP'] and img.mode in ['RGBA', 'P', 'LA']:
img = img.convert("RGB")
if enable_resize:
if resize_mode == "Fit (preserve aspect ratio)":
img.thumbnail((max_w, max_h), Image.Resampling.LANCZOS)
else: # Stretch
img = img.resize((max_w, max_h), Image.Resampling.LANCZOS)
save_kwargs = {}
pil_format = 'JPEG' if output_format == 'JPG' else output_format
if pil_format in ['JPEG', 'WEBP']:
save_kwargs['quality'] = quality
img.save(output_path, pil_format, **save_kwargs)
def batch_resize_convert_images(files, output_format, quality, enable_resize, max_w, max_h, resize_mode):
if not files: raise gr.Error("Please upload at least one image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_name = "resized_converted"
job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
processing_kwargs = {
'output_format': output_format, 'quality': quality, 'enable_resize': enable_resize,
'max_w': max_w, 'max_h': max_h, 'resize_mode': resize_mode
}
for file_obj in files:
try:
base, _ = os.path.splitext(os.path.basename(file_obj.name))
output_filename = f"{base}.{output_format.lower()}"
output_path = os.path.join(job_temp_dir, output_filename)
resize_convert_single_image(file_obj.name, output_path, **processing_kwargs)
output_paths.append(output_path)
except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}"); continue
if not output_paths: shutil.rmtree(job_temp_dir); raise gr.Error("No images could be processed.")
zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths[:100], zip_path
def apply_watermark_single(input_path, output_path, watermark_text, position, opacity):
with Image.open(input_path).convert("RGBA") as image:
if not watermark_text: raise ValueError("Watermark text cannot be empty.")
txt = Image.new("RGBA", image.size, (255, 255, 255, 0))
try: font = ImageFont.truetype("DejaVuSans.ttf", int(image.width / 20))
except IOError: font = ImageFont.load_default()
d = ImageDraw.Draw(txt); bbox = d.textbbox((0, 0), watermark_text, font=font); w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
pos_map = {"Top-Left":(10,10), "Top-Right":(image.width-w-10,10), "Bottom-Left":(10,image.height-h-10), "Bottom-Right":(image.width-w-10,image.height-h-10), "Center":((image.width-w)/2,(image.height-h)/2)}
d.text(pos_map[position], watermark_text, font=font, fill=(255, 255, 255, int(255 * (opacity / 100))))
Image.alpha_composite(image, txt).convert("RGB").save(output_path)
def apply_watermark_batch(files, watermark_text, position, opacity):
if not watermark_text: raise gr.Error("Please provide watermark text.")
processing_func = lambda input_path, output_path: apply_watermark_single(
input_path, output_path, watermark_text=watermark_text, position=position, opacity=opacity
)
output_paths, zip_path, _ = batch_image_processor(files, processing_func, "watermarked")
return output_paths, zip_path
def convert_compress_video(video_path, out_format, v_codec, crf_value, scale_option, a_codec, a_bitrate):
if not video_path: raise gr.Error("Please upload a video to convert.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"converted_{timestamp}.{out_format.lower()}"
output_path = os.path.join(TEMP_DIR, output_filename)
cmd = ["ffmpeg", "-i", video_path]
vf_filters = []
if scale_option != "Original":
w, h = get_video_dimensions(video_path)
if w > 0 and h > 0:
target_h = int(scale_option.replace('p', ''))
target_w = round(w * target_h / h / 2) * 2
vf_filters.append(f"scale={target_w}:{target_h}")
if vf_filters: cmd.extend(["-vf", ",".join(vf_filters)])
cmd.extend(["-c:v", v_codec])
if v_codec in ["libx264", "libx265"]: cmd.extend(["-crf", str(crf_value)])
cmd.extend(["-pix_fmt", "yuv420p"])
if a_codec == "copy": cmd.extend(["-c:a", "copy"])
else: cmd.extend(["-c:a", a_codec, "-b:a", f"{a_bitrate}k"])
cmd.extend(["-y", output_path])
run_ffmpeg_command(cmd, "Converting and Compressing Video...")
return output_path
def apply_video_fade(video_path, fade_in_duration, fade_out_duration):
if not video_path: raise gr.Error("Please upload a video.")
video_duration = get_media_duration(video_path)
if fade_in_duration + fade_out_duration > video_duration: raise gr.Error("The sum of fade durations cannot be greater than the video duration.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"faded_video_{timestamp}.mp4")
fade_filters = []
if fade_in_duration > 0: fade_filters.append(f"fade=t=in:st=0:d={fade_in_duration}")
if fade_out_duration > 0: fade_out_start = video_duration - fade_out_duration; fade_filters.append(f"fade=t=out:st={fade_out_start}:d={fade_out_duration}")
if not fade_filters: gr.Info("No fade applied."); return video_path
cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(fade_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Applying video fade...")
return output_video_path
def trim_and_fade_audio(audio_path, start_time, end_time, fade_in_duration, fade_out_duration):
if not audio_path: raise gr.Error("Please upload an audio file.")
audio_duration = get_media_duration(audio_path)
if start_time < 0: start_time = 0
if end_time <= 0 or end_time > audio_duration: end_time = audio_duration
if start_time >= end_time: raise gr.Error("Start time must be less than end time.")
trimmed_duration = end_time - start_time
if fade_in_duration + fade_out_duration > trimmed_duration: raise gr.Error("Sum of fade durations cannot be greater than the trimmed audio duration.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_audio_path = os.path.join(TEMP_DIR, f"edited_audio_{timestamp}.mp3")
af_filters = []
if fade_in_duration > 0: af_filters.append(f"afade=t=in:st=0:d={fade_in_duration}")
if fade_out_duration > 0: fade_out_start = trimmed_duration - fade_out_duration; af_filters.append(f"afade=t=out:st={fade_out_start}:d={fade_out_duration}")
cmd = ["ffmpeg", "-ss", str(start_time), "-to", str(end_time), "-i", audio_path]
if af_filters: cmd.extend(["-af", ",".join(af_filters)])
cmd.extend(["-y", output_audio_path])
run_ffmpeg_command(cmd, "Trimming and fading audio...")
return output_audio_path
# --- FLUX API ---
FLUX_MODELS = {"FLUX.1-schnell (Fast)": "black-forest-labs/FLUX.1-schnell", "FLUX.1-dev (High Quality)": "black-forest-labs/FLUX.1-dev"}
def call_flux_api(prompt, model_choice, width, height, hf_token):
if not hf_token: raise gr.Error("Hugging Face User Access Token is required.")
try:
client = Client(FLUX_MODELS[model_choice], hf_token=hf_token)
return client.predict(prompt=prompt, seed=0, randomize_seed=True, width=width, height=height, num_inference_steps=8 if "dev" in model_choice else 4, api_name="/infer")[0]
except Exception as e: raise gr.Error(f"API call failed: {e}")
def get_image_as_base64(path):
try:
with open(path, "rb") as f: return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}"
except FileNotFoundError: return None
# --- Transfer Tab Functions (Simplified) ---
def filter_presets(query, all_presets):
if not query:
return gr.update(choices=sorted(list(all_presets.keys())))
filtered_keys = [key for key in all_presets.keys() if query.lower() in key.lower()]
return gr.update(choices=sorted(filtered_keys))
def save_preset(presets, name, url):
if not name or not name.strip():
gr.Warning("Preset name cannot be empty."); return presets, gr.update()
if not url or not url.strip():
gr.Warning("Target URL cannot be empty."); return presets, gr.update()
presets[name] = url
gr.Info(f"Preset '{name}' saved!")
return presets, gr.update(choices=sorted(list(presets.keys())))
def delete_preset(presets, name):
if name in presets:
del presets[name]
gr.Info(f"Preset '{name}' deleted!")
return presets, gr.update(choices=sorted(list(presets.keys())), value=None), ""
gr.Warning(f"Preset '{name}' not found.")
return presets, gr.update(), gr.update()
def load_preset(presets, name):
return presets.get(name, "")
# --- Join/Beat-Sync/Etc Video Feature Functions ---
def ping_pong_video(video_path, audio_option):
if not video_path: raise gr.Error("Please upload a video.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"pingpong_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
reversed_video_path = os.path.join(job_temp_dir, "reversed_temp.mp4")
cmd_reverse = ["ffmpeg", "-i", video_path, "-vf", "reverse"]
if audio_option == "Reverse Audio": cmd_reverse.extend(["-af", "areverse"])
else: cmd_reverse.append("-an")
cmd_reverse.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", reversed_video_path])
run_ffmpeg_command(cmd_reverse)
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
f.write(f"file '{os.path.abspath(video_path)}'\n")
f.write(f"file '{os.path.abspath(reversed_video_path)}'\n")
output_video_path = os.path.join(TEMP_DIR, f"pingpong_video_{timestamp}.mp4")
cmd_join = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", output_video_path]
if audio_option == "Original Audio Only":
cmd_join = ["ffmpeg", "-i", video_path, "-i", reversed_video_path, "-filter_complex", "[0:v][1:v]concat=n=2:v=1[v]", "-map", "[v]", "-map", "0:a?", "-c:a", "copy", "-y", output_video_path]
run_ffmpeg_command(cmd_join)
shutil.rmtree(job_temp_dir)
return output_video_path
# --- STORYBOARD / ANIMATIC CREATOR FUNCTIONS ---
def get_file_type(file_path):
if not file_path: return "unknown"
image_exts = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
video_exts = ['.mp4', '.mov', '.mkv', '.avi', '.webm']
ext = os.path.splitext(file_path.lower())[1]
if ext in image_exts: return "image"
if ext in video_exts: return "video"
return "unknown"
def add_assets_to_bin(files, current_assets):
if not files:
return current_assets, gr.update(value=[a['path'] for a in current_assets] if current_assets else None)
session_id = f"storyboard_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
asset_session_dir = os.path.join(TEMP_DIR, session_id)
os.makedirs(asset_session_dir, exist_ok=True)
updated_asset_list = list(current_assets)
for file_obj in files:
try:
file_type = get_file_type(file_obj.name)
if file_type == "unknown":
gr.Warning(f"Skipping unknown file type: {os.path.basename(file_obj.name)}")
continue
new_path = os.path.join(asset_session_dir, os.path.basename(file_obj.name))
shutil.copy(file_obj.name, new_path)
updated_asset_list.append({"path": new_path, "name": os.path.basename(new_path), "type": file_type})
except Exception as e:
gr.Warning(f"Error adding asset {os.path.basename(file_obj.name)}: {e}")
return updated_asset_list, gr.update(value=[a['path'] for a in updated_asset_list])
def handle_asset_selection(evt: gr.SelectData, assets_state, timeline_state):
if not evt.selected:
return timeline_state, None
selected_asset = assets_state[evt.index]
new_timeline = list(timeline_state)
item_to_add = {
"path": selected_asset['path'],
"name": selected_asset['name'],
"type": selected_asset['type'],
}
if selected_asset['type'] == 'image':
item_to_add.update({
"duration": 3.0,
"start_time": 0,
"original_duration": 0
})
else: # video
original_duration = get_media_duration(selected_asset['path'])
item_to_add.update({
"duration": round(original_duration, 2),
"start_time": 0.0,
"original_duration": round(original_duration, 2)
})
new_timeline.append(item_to_add)
gr.Info(f"Added '{selected_asset['name']}' to timeline.")
preview_frames = None
if selected_asset['type'] == 'video':
try:
preview_frames = extract_first_last_frame(selected_asset['path'])
except Exception as e:
print(f"Could not generate preview for {selected_asset['name']}: {e}")
return new_timeline, preview_frames
def add_all_assets_to_timeline(assets_state, timeline_state):
if not assets_state:
gr.Warning("Asset bin is empty.")
return timeline_state
new_timeline = list(timeline_state)
for asset in assets_state:
item_to_add = {
"path": asset['path'],
"name": asset['name'],
"type": asset['type'],
}
if asset['type'] == 'image':
item_to_add.update({
"duration": 3.0,
"start_time": 0,
"original_duration": 0
})
else: # video
original_duration = get_media_duration(asset['path'])
item_to_add.update({
"duration": round(original_duration, 2),
"start_time": 0.0,
"original_duration": round(original_duration, 2)
})
new_timeline.append(item_to_add)
gr.Info(f"Added {len(assets_state)} assets to the timeline.")
return new_timeline
def update_timeline_df(timeline_state):
if not timeline_state: return gr.update(value=None)
df_data = [[i + 1, item['name'], item['type'], item['duration']] for i, item in enumerate(timeline_state)]
return gr.update(value=df_data)
def handle_timeline_selection(timeline_state, evt: gr.SelectData):
if not evt.selected:
return -1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0
index = evt.index[0]
if not (0 <= index < len(timeline_state)):
return -1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0
selected_item = timeline_state[index]
preview_val = selected_item['path']
duration_val = selected_item['duration']
can_move_up = index > 0
can_move_down = index < len(timeline_state) - 1
if selected_item['type'] == 'video':
start_time = selected_item.get('start_time', 0.0)
end_time = start_time + selected_item['duration']
return (index, preview_val, duration_val,
gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True),
gr.update(visible=True), round(start_time, 2), round(end_time, 2))
else: # Image
return (index, preview_val, duration_val,
gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True),
gr.update(visible=False), 0, 0)
def apply_trim_and_update(timeline_state, selected_index, new_start, new_end):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("No clip selected in timeline.")
return timeline_state, gr.update()
item_to_update = timeline_state[selected_index]
if item_to_update['type'] != 'video':
gr.Warning("Trimming is only available for video clips.")
return timeline_state, gr.update()
original_duration = item_to_update.get('original_duration', 0)
if not (0 <= new_start < new_end and new_end <= original_duration):
gr.Warning(f"Invalid trim times. Must be between 0 and {original_duration:.2f}s, and start must be before end.")
return timeline_state, gr.update()
new_duration = new_end - new_start
new_timeline = list(timeline_state)
new_timeline[selected_index]['start_time'] = round(new_start, 2)
new_timeline[selected_index]['duration'] = round(new_duration, 2)
gr.Info(f"Clip '{item_to_update['name']}' trimmed. New duration is {new_duration:.2f}s.")
return new_timeline, gr.update(value=round(new_duration, 2))
def update_clip_properties(timeline_state, selected_index, new_duration):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("No clip selected in timeline.")
return timeline_state
if new_duration <= 0:
gr.Warning("Duration must be a positive number.")
return timeline_state
new_timeline = list(timeline_state)
item_to_update = new_timeline[selected_index]
if item_to_update['type'] == 'video':
start_time = item_to_update.get('start_time', 0.0)
original_duration = item_to_update.get('original_duration', 0.0)
max_possible_duration = original_duration - start_time
if new_duration > max_possible_duration:
gr.Warning(f"Duration cannot exceed available video length from start time ({max_possible_duration:.2f}s). Clamping value.")
new_duration = max_possible_duration
item_to_update['duration'] = round(new_duration, 2)
gr.Info(f"Updated duration for '{item_to_update['name']}'.")
return new_timeline
def handle_timeline_action(timeline_state, selected_index, action):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("Please select a clip from the timeline first.")
return timeline_state
new_list = list(timeline_state)
if action == "up" and selected_index > 0:
new_list.insert(selected_index - 1, new_list.pop(selected_index))
elif action == "down" and selected_index < len(new_list) - 1:
new_list.insert(selected_index + 1, new_list.pop(selected_index))
elif action == "remove":
new_list.pop(selected_index)
return new_list
def create_animatic(timeline_data, audio_path, out_w, out_h):
if not timeline_data:
raise gr.Error("Timeline is empty. Please add assets to the timeline.")
out_w, out_h = int(out_w), int(out_h)
if out_w <= 0 or out_h <= 0:
raise gr.Error("Output width and height must be positive numbers.")
out_w -= out_w % 2
out_h -= out_h % 2
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"animatic_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
clip_paths = []
output_res_str = f"{out_w}:{out_h}"
for i, item in enumerate(timeline_data):
item_path, item_type, item_duration = item['path'], item['type'], item['duration']
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:04d}.mp4")
vf_filter = f"scale={output_res_str}:force_original_aspect_ratio=decrease,pad={output_res_str}:(ow-iw)/2:(oh-ih)/2"
cmd = ["ffmpeg"]
if item_type == "image":
cmd.extend(["-loop", "1"])
elif item_type == "video":
start_time = item.get('start_time', 0)
if start_time > 0:
cmd.extend(["-ss", str(start_time)])
cmd.extend(["-i", item_path])
cmd.extend([
"-t", str(item_duration), "-c:v", "libx264",
"-pix_fmt", "yuv420p", "-vf", vf_filter,
"-an", "-y", output_clip_path
])
run_ffmpeg_command(cmd, f"Processing clip {i+1}")
clip_paths.append(output_clip_path)
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in clip_paths:
f.write(f"file '{os.path.abspath(path)}'\n")
silent_animatic_path = os.path.join(job_temp_dir, "silent_final.mp4")
run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_animatic_path], "Joining video clips")
final_output_path = os.path.join(TEMP_DIR, f"animatic_final_{timestamp}.mp4")
if audio_path:
run_ffmpeg_command(["ffmpeg", "-i", silent_animatic_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", final_output_path], "Muxing audio")
else:
shutil.move(silent_animatic_path, final_output_path)
shutil.rmtree(job_temp_dir)
return final_output_path
def detect_bpm(audio_path):
if not audio_path:
return "Please upload an audio track first."
try:
y, sr = librosa.load(audio_path)
tempo_val, _ = librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo_val, np.ndarray):
tempo = tempo_val.item()
else:
tempo = float(tempo_val)
if tempo > 0:
return f"Detected BPM: {tempo:.2f}"
else:
return "Could not detect BPM."
except Exception as e:
print(f"--- BPM DETECTION ERROR ---\n{e}")
return "Error: Could not analyze audio file."
def create_rhythmic_animatic(timeline_data, audio_path, measure_choice, out_w, out_h):
if not timeline_data: raise gr.Error("Timeline is empty.")
if not audio_path: raise gr.Error("An audio track is required for rhythmic editing.")
try:
y, sr = librosa.load(audio_path)
tempo_val, _ = librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo_val, np.ndarray):
tempo = tempo_val.item()
else:
tempo = float(tempo_val)
if not tempo or tempo <= 0:
raise gr.Error("Could not determine BPM from audio file.")
except Exception as e:
raise gr.Error(f"Audio analysis failed: {e}")
seconds_per_beat = 60.0 / tempo
seconds_per_measure = seconds_per_beat * 4.0
measure_multipliers = { "2 Measures": 2.0, "1 Measure": 1.0, "1/2 Measure": 0.5, "1/4 Measure (Beat)": 0.25 }
clip_duration = seconds_per_measure * measure_multipliers[measure_choice]
rhythmic_timeline = []
for item in timeline_data:
new_item = item.copy()
if new_item['type'] == 'video':
start_time = new_item.get('start_time', 0)
available_duration = new_item.get('original_duration', 0) - start_time
new_item['duration'] = min(clip_duration, available_duration)
else:
new_item['duration'] = clip_duration
rhythmic_timeline.append(new_item)
gr.Info(f"Re-timed {len(rhythmic_timeline)} clips to ~{clip_duration:.2f}s each based on {tempo:.2f} BPM.")
return create_animatic(rhythmic_timeline, audio_path, out_w, out_h)
# --- NEW CREATIVE FUNCTIONS ---
def _create_auto_trailer_impl(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress: Progress):
"""Internal implementation of the auto-trailer creator."""
if not video_path: raise gr.Error("Please upload a source video.")
source_duration = get_media_duration(video_path)
if source_duration < trailer_duration:
gr.Warning(f"Source video is only {source_duration:.1f}s long. The trailer duration will be capped at the source video length.")
trailer_duration = source_duration
if clip_duration > trailer_duration:
new_clip_duration = trailer_duration / 2 if trailer_duration > 2 else trailer_duration
gr.Warning(f"Clip duration ({clip_duration}s) is longer than the trailer duration ({trailer_duration:.1f}s). Adjusting clip duration to {new_clip_duration:.1f}s.")
clip_duration = new_clip_duration
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"trailer_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
try:
# --- 1. Analyze Video to Find Interesting Clips ---
progress(0, desc="Analyzing video for high-motion scenes...")
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30 # fallback
chunk_duration_frames = int(clip_duration * fps)
video_total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
chunk_scores = []
prev_frame = None
frame_num = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
frame_skip = max(1, int(fps / 5)) # Analyze ~5 frames per second
if frame_num % frame_skip == 0:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
if prev_frame is not None:
frame_delta = cv2.absdiff(prev_frame, gray)
thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
motion_score = np.sum(thresh)
chunk_index = frame_num // chunk_duration_frames
while len(chunk_scores) <= chunk_index:
chunk_scores.append({'start_time': (len(chunk_scores) * clip_duration), 'scores': []})
chunk_scores[chunk_index]['scores'].append(motion_score)
prev_frame = gray
frame_num += 1
if frame_num % 100 == 0:
progress(0.2 * (frame_num / video_total_frames), desc=f"Analyzing frame {frame_num}/{video_total_frames}...")
cap.release()
final_chunks = [{'start_time': chunk['start_time'], 'score': sum(chunk['scores']) / len(chunk['scores'])} for chunk in chunk_scores if chunk['scores']]
if not final_chunks: raise gr.Error("Could not analyze video for motion. Is the video very short or static?")
# --- 2. Select the Best Clips ---
progress(0.2, desc="Selecting the best clips...")
num_clips_to_select = max(1, int(trailer_duration / clip_duration))
selected_clips_info = sorted(sorted(final_chunks, key=lambda x: x['score'], reverse=True)[:num_clips_to_select], key=lambda x: x['start_time'])
# --- 3. Extract and Process Clips ---
extracted_clips, out_w, out_h = [], int(out_w) - (int(out_w) % 2), int(out_h) - (int(out_h) % 2)
for i, clip_info in enumerate(selected_clips_info):
progress(0.2 + (0.5 * (i / len(selected_clips_info))), desc=f"Extracting clip {i+1}/{len(selected_clips_info)}...")
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:03d}.mp4")
vf_filter = f"scale={out_w}:{out_h}:force_original_aspect_ratio=decrease,pad={out_w}:{out_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
cmd = ["ffmpeg", "-y", "-ss", str(clip_info['start_time']), "-i", video_path, "-t", str(clip_duration), "-vf", vf_filter, "-an", "-c:v", "libx264", "-pix_fmt", "yuv420p", output_clip_path]
run_ffmpeg_command(cmd)
extracted_clips.append(output_clip_path)
# --- 4. Stitch Clips with Transitions ---
if not extracted_clips: raise gr.Error("Failed to extract any clips.")
progress(0.7, desc="Stitching clips together...")
final_silent_path = os.path.join(job_temp_dir, "final_silent.mp4")
if transition_style == "None" or len(extracted_clips) == 1:
if len(extracted_clips) == 1:
shutil.copy(extracted_clips[0], final_silent_path)
else:
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in extracted_clips: f.write(f"file '{os.path.abspath(path)}'\n")
run_ffmpeg_command(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", final_silent_path], "Concatenating clips...")
else:
# --- IMPROVEMENT: Build a single, efficient FFMPEG command ---
transition_duration = 0.5
cmd = ["ffmpeg", "-y"]
filter_complex = []
running_duration = 0
for i, clip_path in enumerate(extracted_clips):
cmd.extend(["-i", clip_path])
for i in range(len(extracted_clips) - 1):
input1 = f"[{i}:v]" if i == 0 else f"[v{i-1}]"
input2 = f"[{i+1}:v]"
output = f"[v{i}]"
offset = max(0, running_duration + clip_duration - transition_duration)
filter_complex.append(f"{input1}{input2}xfade=transition={transition_style.lower()}:duration={transition_duration}:offset={offset}{output}")
running_duration += clip_duration - transition_duration
cmd.extend([
"-filter_complex", ";".join(filter_complex),
"-map", f"[v{len(extracted_clips)-2}]",
"-c:v", "libx264", "-pix_fmt", "yuv420p",
final_silent_path
])
run_ffmpeg_command(cmd, "Applying transitions...")
# --- 5. Mux Audio ---
progress(0.95, desc="Adding background music...")
final_output_path = os.path.join(TEMP_DIR, f"trailer_final_{timestamp}.mp4")
if music_path:
run_ffmpeg_command(["ffmpeg", "-y", "-i", final_silent_path, "-i", music_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", final_output_path], "Muxing audio")
else:
shutil.move(final_silent_path, final_output_path)
return final_output_path
finally:
if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir)
def auto_trailer_wrapper(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress=gr.Progress(track_tqdm=True)):
return _create_auto_trailer_impl(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress)
def generate_waveform_video(video_path, style, size, position, color):
if not video_path: raise gr.Error("Please upload a video first.")
if not has_audio_stream(video_path):
raise gr.Error("The uploaded video has no audio track. A waveform cannot be generated.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"waveform_video_{timestamp}.mp4")
pos_map = {
"Bottom": f"overlay=x=(W-w)/2:y=H-h-50",
"Center": f"overlay=x=(W-w)/2:y=(H-h)/2",
"Top": f"overlay=x=(W-w)/2:y=50"
}
safe_color = color.lstrip('#')
filter_complex = (
f"[0:a]showwaves=s={size}:mode={style}:colors={safe_color}:rate=25[wave];"
f"[0:v][wave]{pos_map[position]}"
)
cmd = [
"ffmpeg", "-i", video_path,
"-filter_complex", filter_complex,
"-c:a", "copy",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-y",
output_video_path
]
run_ffmpeg_command(cmd, "Generating Audio Waveform...")
return output_video_path
def create_pip_video(main_video, overlay_media, position, scale):
if not main_video: raise gr.Error("Please upload a main video.")
if not overlay_media: raise gr.Error("Please upload an overlay video or image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"pip_video_{timestamp}.mp4")
scale_filter = f"[1:v]scale=iw*{scale}:-1[scaled_overlay]"
pos_map = {
"Top-Left": "x=10:y=10",
"Top-Center": "x=(W-w)/2:y=10",
"Top-Right": "x=W-w-10:y=10",
"Center-Left": "x=10:y=(H-h)/2",
"Center": "x=(W-w)/2:y=(H-h)/2",
"Center-Right": "x=W-w-10:y=(H-h)/2",
"Bottom-Left": "x=10:y=H-h-10",
"Bottom-Center": "x=(W-w)/2:y=H-h-10",
"Bottom-Right": "x=W-w-10:y=H-h-10"
}
overlay_filter = f"[0:v][scaled_overlay]overlay={pos_map[position]}"
cmd = ["ffmpeg", "-i", main_video, "-i", overlay_media.name]
cmd.extend([
"-filter_complex", f"{scale_filter};{overlay_filter}",
"-map", "0:a?", "-c:a", "copy",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-y",
output_video_path
])
run_ffmpeg_command(cmd, "Creating Picture-in-Picture video...")
return output_video_path
def create_meme(image, text_input, position, font_choice, font_size_scale, text_color, outline_color):
if image is None: raise gr.Error("Please upload an image.")
parsed_text_color = parse_color(text_color)
parsed_outline_color = parse_color(outline_color)
img = Image.fromarray(image).convert("RGB")
draw = ImageDraw.Draw(img)
FONT_MAP = {
"Impact": "impact.ttf",
"Arial": "arial.ttf",
"Arial Black": "ariblk.ttf",
"Comic Sans MS": "comic.ttf",
"Courier New": "cour.ttf",
"Georgia": "georgia.ttf",
"Tahoma": "tahoma.ttf",
"Times New Roman": "times.ttf",
"Trebuchet MS": "trebuc.ttf",
"Verdana": "verdana.ttf"
}
font_path = FONT_MAP.get(font_choice, "impact.ttf")
try:
font_size = int(img.width / 10 * (font_size_scale / 5))
font = ImageFont.truetype(font_path, font_size)
except IOError:
gr.Warning(f"{font_choice} font ('{font_path}') not found. Trying Arial.")
try:
font_path = FONT_MAP["Arial"]
font = ImageFont.truetype(font_path, font_size)
except IOError:
gr.Warning("Arial font not found. Using default font.")
font = ImageFont.load_default()
def draw_text_with_outline(text, x, y):
# Outline
draw.text((x-2, y-2), text, font=font, fill=parsed_outline_color)
draw.text((x+2, y-2), text, font=font, fill=parsed_outline_color)
draw.text((x-2, y+2), text, font=font, fill=parsed_outline_color)
draw.text((x+2, y+2), text, font=font, fill=parsed_outline_color)
# Main Text
draw.text((x, y), text, font=font, fill=parsed_text_color)
if text_input:
bbox = draw.textbbox((0, 0), text_input.upper(), font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (img.width - text_width) / 2
if position == "Top":
y = 10
elif position == "Bottom":
y = img.height - text_height - 10
else: # Center
y = (img.height - text_height) / 2
draw_text_with_outline(text_input.upper(), x, y)
return img
def merge_videos(videos):
if not videos or len(videos) < 2:
raise gr.Error("Please upload at least two videos to merge.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"merge_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
first_video_path = videos[0].name
w, h = get_video_dimensions(first_video_path)
fps = get_video_fps(first_video_path)
w -= w % 2
h -= h % 2
processed_clips = []
for i, video_file in enumerate(videos):
clip_path = os.path.join(job_temp_dir, f"clip_{i}.mp4")
cmd = [
"ffmpeg", "-i", video_file.name,
"-vf", f"scale={w}:{h},setsar=1", "-r", str(fps),
"-c:v", "libx264", "-pix_fmt", "yuv420p",
"-c:a", "aac", "-ar", "44100",
"-y", clip_path
]
run_ffmpeg_command(cmd)
processed_clips.append(clip_path)
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in processed_clips:
f.write(f"file '{os.path.abspath(path)}'\n")
output_video_path = os.path.join(TEMP_DIR, f"merged_video_{timestamp}.mp4")
cmd_merge = [
"ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path,
"-c", "copy", "-y", output_video_path
]
run_ffmpeg_command(cmd_merge, "Merging videos...")
shutil.rmtree(job_temp_dir)
return output_video_path
def _create_automated_slideshow_impl(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress: Progress):
"""Internal implementation of the slideshow creator with progress tracking."""
if not images: raise gr.Error("Please upload at least one image.")
if not audio_path: raise gr.Error("Please upload an audio track for rhythmic editing.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"slideshow_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
try:
out_w, out_h = int(out_w) - (int(out_w) % 2), int(out_h) - (int(out_h) % 2)
output_res_str = f"{out_w}x{out_h}"
fps = 30
transition_duration = 0.5
progress(0, desc="Analyzing audio track...")
try:
y, sr = librosa.load(audio_path)
audio_duration = librosa.get_duration(y=y, sr=sr)
_, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='frames')
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
except Exception as e:
raise gr.Error(f"Audio analysis failed: {e}")
beats_per_clip = {"1 Image per Beat": 1, "1 Image every 2 Beats": 2, "1 Image per Measure (4 Beats)": 4}[rhythm_choice]
clip_start_times = [0.0] + [beat_times[i] for i in range(beats_per_clip, len(beat_times), beats_per_clip)]
MAX_CLIPS = 200
if len(clip_start_times) > MAX_CLIPS:
gr.Warning(f"Audio beat detection resulted in {len(clip_start_times)} clips. Capping at {MAX_CLIPS} to ensure performance.")
clip_start_times = clip_start_times[:MAX_CLIPS]
num_clips = len(clip_start_times)
image_paths = [img.name for img in images]
looped_image_paths = [image_paths[i % len(image_paths)] for i in range(num_clips)]
kb_clips = []
total_steps = num_clips + 1
current_step = 0
for i in range(num_clips):
progress(current_step / total_steps, desc=f"Creating clip {i+1}/{num_clips}")
start_time = clip_start_times[i]
end_time = clip_start_times[i + 1] if i + 1 < len(clip_start_times) else audio_duration
clip_duration = end_time - start_time
if clip_duration <= transition_duration: continue
total_frames = int(clip_duration * fps)
if total_frames <= 0: continue
output_clip_path = os.path.join(job_temp_dir, f"kb_clip_{i:04d}.mp4")
with Image.open(looped_image_paths[i]) as img:
iw, ih = img.size
zoom_levels = {"Subtle": (1.1, 1.15), "Standard": (1.1, 1.25), "Dynamic": (1.2, 1.5)}
start_zoom = 1.0
end_zoom = random.uniform(*zoom_levels[kb_effect_style])
directions = ['top_left', 'top_right', 'bottom_left', 'bottom_right', 'center']
start_pos_name, end_pos_name = random.sample(directions, 2)
def get_xy(pos_name, zoom_val, img_w, img_h):
if pos_name == 'center': return (img_w/2 - (img_w/zoom_val)/2, img_h/2 - (img_h/zoom_val)/2)
if pos_name == 'top_left': return (0, 0)
if pos_name == 'top_right': return (img_w - img_w/zoom_val, 0)
if pos_name == 'bottom_left': return (0, img_h - img_h/zoom_val)
if pos_name == 'bottom_right': return (img_w - img_w/zoom_val, img_h - img_h/zoom_val)
return (0,0)
start_x, start_y = get_xy(start_pos_name, start_zoom, iw, ih)
end_x, end_y = get_xy(end_pos_name, end_zoom, iw, ih)
x_expr = f"{start_x}+({end_x}-({start_x}))*on/({total_frames}-1)"
y_expr = f"{start_y}+({end_y}-({start_y}))*on/({total_frames}-1)"
z_expr = f"if(lte(on,0),{start_zoom},{start_zoom}+({end_zoom}-{start_zoom})*on/({total_frames}-1))"
zoompan_filter = f"zoompan=z='{z_expr}':x='{x_expr}':y='{y_expr}':d={total_frames}:s={output_res_str}:fps={fps}"
cmd = ["ffmpeg", "-y", "-loop", "1", "-i", looped_image_paths[i], "-vf", zoompan_filter, "-t", str(clip_duration), "-c:v", "libx264", "-pix_fmt", "yuv420p", output_clip_path]
run_ffmpeg_command(cmd)
kb_clips.append({"path": output_clip_path, "duration": clip_duration})
current_step += 1
if not kb_clips: raise gr.Error("No clips were generated. The audio may be too short or the rhythm settings too fast.")
progress(current_step / total_steps, desc=f"Applying transitions...")
final_silent_path = os.path.join(job_temp_dir, "final_silent.mp4")
if len(kb_clips) == 1:
shutil.copy(kb_clips[0]['path'], final_silent_path)
else:
all_transitions = ["fade", "wipeleft", "wiperight", "wipeup", "wipedown", "slideleft", "slideright", "slideup", "slidedown", "dissolve"]
cmd = ["ffmpeg", "-y"]
filter_complex = []
running_duration = 0
for i, clip in enumerate(kb_clips):
cmd.extend(["-i", clip['path']])
for i in range(len(kb_clips) - 1):
input1 = f"[{i}:v]" if i == 0 else f"[v{i-1}]"
input2 = f"[{i+1}:v]"
output = f"[v{i}]"
transition = random.choice(all_transitions) if transition_style == "Random" else transition_style.lower()
offset = running_duration + kb_clips[i]['duration'] - transition_duration
filter_complex.append(f"{input1}{input2}xfade=transition={transition}:duration={transition_duration}:offset={offset}{output}")
running_duration += kb_clips[i]['duration'] - transition_duration
cmd.extend(["-filter_complex", ";".join(filter_complex), "-map", f"[v{len(kb_clips)-2}]", "-c:v", "libx264", "-pix_fmt", "yuv420p", final_silent_path])
run_ffmpeg_command(cmd)
progress(0.98, desc="Muxing final audio...")
final_output_path = os.path.join(TEMP_DIR, f"slideshow_final_{timestamp}.mp4")
run_ffmpeg_command(["ffmpeg", "-y", "-i", final_silent_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", final_output_path], "Muxing audio")
return final_output_path
finally:
if os.path.exists(job_temp_dir):
shutil.rmtree(job_temp_dir)
def slideshow_wrapper(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress=gr.Progress(track_tqdm=True)):
return _create_automated_slideshow_impl(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress)
# --- BLING --- CSS AND JS ---
bling_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap');
:root {
--bling-main-font: 'Inter', sans-serif;
--bling-gradient-start: #0f172a;
--bling-gradient-mid: #1e293b;
--bling-gradient-end: #334155;
--bling-accent-color: #38bdf8; /* sky-400 */
}
body, .gradio-container {
font-family: var(--bling-main-font) !important;
background: var(--bling-gradient-start);
background: linear-gradient(135deg, var(--bling-gradient-start) 0%, var(--bling-gradient-mid) 50%, var(--bling-gradient-end) 100%);
background-size: 200% 200%;
animation: gradient-animation 15s ease infinite;
}
@keyframes gradient-animation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
/* Glassmorphism for containers */
.gradio-tabs, .gradio-accordion, .gradio-group {
background: rgba(255, 255, 255, 0.05) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
border-radius: 12px !important;
backdrop-filter: blur(10px) !important;
-webkit-backdrop-filter: blur(10px) !important;
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1) !important;
}
/* Button Bling */
.gradio-button {
transition: all 0.2s ease-in-out !important;
box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important;
}
.gradio-button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0,0,0,0.3) !important;
filter: brightness(1.1);
}
/* Custom Info/Warning Boxes */
.gradio-info {
background: rgba(56, 189, 248, 0.1) !important; /* sky-400 with alpha */
color: #f0f9ff !important; /* sky-50 */
border-left: 4px solid var(--bling-accent-color) !important;
border-radius: 8px !important;
}
.gradio-warning {
background: rgba(251, 191, 36, 0.1) !important; /* amber-400 with alpha */
color: #fffbeb !important; /* amber-50 */
border-left: 4px solid #fbbf24 !important;
border-radius: 8px !important;
}
/* Custom Scrollbars */
::-webkit-scrollbar { width: 8px; }
::-webkit-scrollbar-track { background: rgba(255, 255, 255, 0.1); }
::-webkit-scrollbar-thumb { background-color: var(--bling-accent-color); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background-color: #0ea5e9; } /* sky-500 */
#custom-footer {
text-align: center !important;
padding: 20px 0 5px 0 !important;
font-size: .9em;
color: #94a3b8; /* slate-400 */
}
/* Loading Overlay CSS */
#loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100vw;
height: 100vh;
background-color: rgba(15, 23, 42, 0.8);
z-index: 10000;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
color: white;
font-size: 1.2em;
backdrop-filter: blur(5px);
-webkit-backdrop-filter: blur(5px);
opacity: 0;
visibility: hidden;
transition: opacity 0.3s ease, visibility 0.3s ease;
}
#loading-overlay.visible {
opacity: 1;
visibility: visible;
}
.spinner {
width: 60px;
height: 60px;
border: 5px solid rgba(255, 255, 255, 0.3);
border-top-color: var(--bling-accent-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin-bottom: 20px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
"""
bling_js = """
() => {
// --- JKL Video Control ---
let activeVideo = null;
document.addEventListener('mouseover', (e) => {
if (e.target.tagName === 'VIDEO') {
activeVideo = e.target;
}
});
document.addEventListener('keydown', (e) => {
const activeElement = document.activeElement;
if (activeElement && (activeElement.tagName === 'INPUT' || activeElement.tagName === 'TEXTAREA')) {
return;
}
if (!activeVideo) return;
const frameTime = 1 / 30;
let handled = false;
switch (e.key.toLowerCase()) {
case 'k': activeVideo.paused ? activeVideo.play() : activeVideo.pause(); handled = true; break;
case 'j': activeVideo.currentTime = Math.max(0, activeVideo.currentTime - frameTime); handled = true; break;
case 'l': activeVideo.currentTime += frameTime; handled = true; break;
}
if (handled) e.preventDefault();
});
// --- Loading Overlay ---
function show_overlay(message = 'Processing... Please wait.') {
let overlay = document.getElementById('loading-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'loading-overlay';
overlay.innerHTML = `<div class="spinner"></div><p id="loading-message"></p>`;
document.body.appendChild(overlay);
}
document.getElementById('loading-message').textContent = message;
overlay.classList.add('visible');
}
function hide_overlay() {
const overlay = document.getElementById('loading-overlay');
if (overlay) {
overlay.classList.remove('visible');
}
}
// --- Confetti ---
function fire_confetti() {
const a=document.createElement("script");a.setAttribute("src","https://cdn.jsdelivr.net/npm/canvas-confetti@1.9.2/dist/confetti.browser.min.js"),document.head.appendChild(a),a.onload=()=>{var e=confetti.create(null,{resize:!0,useWorker:!0});e({particleCount:150,spread:90,origin:{y:.6}})}
}
// --- Audio Feedback with Delay and Echo ---
const skriptz_audio = {
context: null,
buffer: null,
isInitialized: false,
};
async function init_audio() {
if (skriptz_audio.isInitialized) return;
try {
skriptz_audio.context = new (window.AudioContext || window.webkitAudioContext)();
// --- FIX FOR BROWSER AUTOPLAY POLICY ---
if (skriptz_audio.context.state === 'suspended') {
await skriptz_audio.context.resume();
}
const response = await fetch('/file=finish_sound.mp3');
if (!response.ok) {
console.warn(`Finish sound not loaded: ${response.status} ${response.statusText}. Ensure 'finish_sound.mp3' is in the root directory and 'allowed_paths' is set in demo.launch().`);
skriptz_audio.isInitialized = true;
return;
}
const arrayBuffer = await response.arrayBuffer();
skriptz_audio.buffer = await skriptz_audio.context.decodeAudioData(arrayBuffer);
} catch (e) {
console.error('Error initializing or decoding audio:', e);
}
skriptz_audio.isInitialized = true;
}
async function play_finish_sound() {
if (!skriptz_audio.isInitialized || !skriptz_audio.context) {
await init_audio();
}
if (!skriptz_audio.buffer || !skriptz_audio.context) return;
// --- FIX FOR BROWSER AUTOPLAY POLICY ---
if (skriptz_audio.context.state === 'suspended') {
await skriptz_audio.context.resume();
}
const context = skriptz_audio.context;
const source = context.createBufferSource();
source.buffer = skriptz_audio.buffer;
const delay = context.createDelay();
delay.delayTime.value = 0.25;
const feedback = context.createGain();
feedback.gain.value = 0.4;
source.connect(context.destination);
source.connect(delay);
delay.connect(feedback);
feedback.connect(delay);
delay.connect(context.destination);
source.start(0);
}
// --- Dynamic Page Title ---
function update_title(tab_name) {
if (tab_name) {
const clean_name = tab_name.replace(/[\\u{1F600}-\\u{1F64F}\\u{1F300}-\\u{1F5FF}\\u{1F680}-\\u{1F6FF}\\u{1F700}-\\u{1F77F}\\u{1F780}-\\u{1F7FF}\\u{1F800}-\\u{1F8FF}\\u{1F900}-\\u{1F9FF}\\u{1FA00}-\\u{1FA6F}\\u{1FA70}-\\u{1FAFF}\\u{2600}-\\u{26FF}\\u{2700}-\\u{27BF}]/gu, '').trim();
document.title = `Skriptz - ${clean_name}`;
} else {
document.title = "Skriptz - Universal Tool";
}
}
// --- Copy to Clipboard ---
function copy_to_clipboard(text_id) {
const text_area = document.getElementById(text_id).querySelector('textarea');
if(text_area) {
text_area.select();
document.execCommand('copy');
const original_button = this.event.target;
const original_text = original_button.innerText;
original_button.innerText = 'Copied!';
setTimeout(() => { original_button.innerText = original_text; }, 2000);
}
}
// --- Storyboard Time Getter ---
function storyboard_get_time(){
const e=document.querySelector('#storyboard_clip_preview video');
return e?e.currentTime:0
}
// Make functions globally accessible for Gradio
window.skriptz_bling = {
show_overlay,
hide_overlay,
fire_confetti,
play_finish_sound,
update_title,
copy_to_clipboard,
storyboard_get_time
};
}
"""
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"),
title="Skriptz - Universal Tool",
css=bling_css,
js=bling_js
) as demo:
gr.HTML("""
<div id="loading-overlay">
<div class="spinner"></div>
<p id="loading-message">Processing... Please wait.</p>
</div>
<script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.9.2/dist/confetti.browser.min.js"></script>
""", visible=False)
logo_b64 = get_image_as_base64("logo.png")
if logo_b64: gr.HTML(f"""<div style="display: flex; justify-content: center; align-items: center; text-align: center; margin-bottom: 20px;"><a href="https://linktr.ee/skylinkd" target="_blank" rel="noopener noreferrer"><img src="{logo_b64}" alt="Skriptz Banner" style="max-width: 100%; max-height: 100px; height: auto;"></a></div>""")
else: gr.Markdown("# Skriptz Universal Tool")
gr.Markdown("<h3 style='text-align: center;'>Your one-stop shop for video and image processing</h3>")
storyboard_get_time_js = "() => { return window.skriptz_bling.storyboard_get_time(); }"
show_overlay_js = "() => { window.skriptz_bling.show_overlay('Working hard... this may take a moment!'); }"
hide_overlay_js = "() => { window.skriptz_bling.hide_overlay(); }"
fire_confetti_and_sound_js = "() => { window.skriptz_bling.fire_confetti(); window.skriptz_bling.play_finish_sound(); }"
copy_transcription_js = "() => { window.skriptz_bling.copy_to_clipboard('transcription_textbox'); }"
with gr.Tabs(elem_id="main_tabs") as main_tabs:
with gr.TabItem("🎬 Storyboard & Animatic", elem_id="storyboard_tab"):
gr.Markdown("## Create Video Animatics from Images and Clips")
gr.Info("1. **Build:** Upload assets, click to add to timeline. 2. **Time:** Set durations, trim videos, or use Rhythmic Editing. 3. **Generate:** Create your final video.")
assets_state = gr.State([])
timeline_state = gr.State([])
selected_timeline_index_state = gr.State(-1)
with gr.Row(equal_height=False):
with gr.Column(scale=2, min_width=400):
with gr.Group():
gr.Markdown("### 1. Asset Bin")
assets_upload_btn = gr.File(label="Upload Images & Video Clips", file_count="multiple", file_types=["image", "video"])
asset_gallery = gr.Gallery(label="Click an asset to add it to the timeline", columns=4, object_fit="contain", height=400)
asset_preview_gallery = gr.Gallery(label="Video Asset Preview (First & Last Frame)", columns=2, height=240, object_fit="contain", interactive=False)
with gr.Row():
add_all_to_timeline_btn = gr.Button("⬇️ Add All to Timeline")
clear_assets_btn = gr.Button("πŸ—‘οΈ Clear Asset Bin")
with gr.Column(scale=3, min_width=600):
with gr.Group():
gr.Markdown("### 2. Timeline & Generation")
timeline_df = gr.DataFrame(headers=["#", "Asset", "Type", "Duration (s)"], datatype=["number", "str", "str", "number"], interactive=False, row_count=(10, "fixed"))
with gr.Row():
timeline_up_btn = gr.Button("⬆️ Move Up", interactive=False)
timeline_down_btn = gr.Button("⬇️ Move Down", interactive=False)
timeline_remove_btn = gr.Button("πŸ—‘οΈ Remove", interactive=False)
clear_timeline_btn = gr.Button("πŸ’₯ Clear Timeline")
gr.Markdown("### 3. Output Settings")
animatic_audio = gr.Audio(label="Project Audio Track (Narration/Music)", type="filepath")
with gr.Row():
animatic_out_w = gr.Number(label="Output Width", value=1920)
animatic_out_h = gr.Number(label="Output Height", value=1080)
generate_animatic_btn = gr.Button("🎬 Generate Manual Animatic", variant="secondary")
with gr.Accordion("🎡 Rhythmic Editing (Beat Sync)", open=False):
gr.Info("This will override manual durations and re-time all clips to match the music's rhythm.")
with gr.Row():
analyze_bpm_btn = gr.Button("πŸ₯ Analyze BPM")
bpm_display = gr.Textbox(label="Audio BPM", interactive=False)
measure_dropdown = gr.Dropdown(
["2 Measures", "1 Measure", "1/2 Measure", "1/4 Measure (Beat)"],
value="1 Measure", label="Cut Duration per Clip"
)
generate_rhythmic_btn = gr.Button("🎢 Generate Rhythmic Animatic", variant="primary")
animatic_output_video = gr.Video(label="Final Video Output", interactive=True, show_download_button=True)
with gr.Column(scale=2, min_width=300):
with gr.Group():
gr.Markdown("### 4. Clip Properties")
gr.Info("Select a clip in the timeline table to edit it.")
clip_preview = gr.Video(label="Selected Clip Preview", interactive=True, elem_id="storyboard_clip_preview")
clip_duration_input = gr.Number(label="Set Duration (seconds)", interactive=True, precision=2)
update_clip_btn = gr.Button("πŸ”„ Update Clip Duration")
with gr.Group(visible=False) as trim_group:
gr.Markdown("#### Video Trimming")
gr.Info("Use player (K=Play, J/L=Frame Step) to find a frame, then use buttons below.")
with gr.Row():
clip_start_time_input = gr.Number(label="Start Time (s)", precision=2)
clip_end_time_input = gr.Number(label="End Time (s)", precision=2)
with gr.Row():
set_clip_start_btn = gr.Button("Set START")
set_clip_end_btn = gr.Button("Set END")
apply_trim_btn = gr.Button("βœ‚οΈ Apply Trim")
# --- Event Listeners for Storyboard ---
assets_upload_btn.upload(add_assets_to_bin, [assets_upload_btn, assets_state], [assets_state, asset_gallery])
asset_gallery.select(handle_asset_selection, [assets_state, timeline_state], [timeline_state, asset_preview_gallery])
add_all_to_timeline_btn.click(add_all_assets_to_timeline, [assets_state, timeline_state], timeline_state)
timeline_state.change(update_timeline_df, timeline_state, timeline_df)
timeline_state.change(lambda: (-1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0),
outputs=[selected_timeline_index_state, clip_preview, clip_duration_input, timeline_up_btn, timeline_down_btn, timeline_remove_btn, trim_group, clip_start_time_input, clip_end_time_input])
timeline_df.select(
handle_timeline_selection,
timeline_state,
[selected_timeline_index_state, clip_preview, clip_duration_input, timeline_up_btn, timeline_down_btn, timeline_remove_btn, trim_group, clip_start_time_input, clip_end_time_input]
)
update_clip_btn.click(update_clip_properties, [timeline_state, selected_timeline_index_state, clip_duration_input], timeline_state)
apply_trim_btn.click(apply_trim_and_update, [timeline_state, selected_timeline_index_state, clip_start_time_input, clip_end_time_input], [timeline_state, clip_duration_input])
set_clip_start_btn.click(fn=None, js=storyboard_get_time_js, outputs=clip_start_time_input)
set_clip_end_btn.click(fn=None, js=storyboard_get_time_js, outputs=clip_end_time_input)
timeline_up_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("up")], timeline_state)
timeline_down_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("down")], timeline_state)
timeline_remove_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("remove")], timeline_state)
clear_assets_btn.click(lambda: ([], gr.update(value=None), None), outputs=[assets_state, asset_gallery, asset_preview_gallery])
clear_timeline_btn.click(lambda: [], outputs=[timeline_state])
generate_animatic_btn.click(fn=create_animatic, inputs=[timeline_state, animatic_audio, animatic_out_w, animatic_out_h], outputs=animatic_output_video).then(fn=None, js=fire_confetti_and_sound_js)
analyze_bpm_btn.click(detect_bpm, animatic_audio, bpm_display)
generate_rhythmic_btn.click(fn=create_rhythmic_animatic, inputs=[timeline_state, animatic_audio, measure_dropdown, animatic_out_w, animatic_out_h], outputs=animatic_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎨 Creative Suite", elem_id="creative_tab"):
with gr.Tabs():
with gr.TabItem("🎬 Automated Slideshow"):
gr.Markdown("## Automated Rhythmic Slideshow Creator")
gr.Info("Turn a collection of images and a music track into a dynamic video with Ken Burns effects and transitions synced to the beat.")
with gr.Row():
with gr.Column(scale=2):
slideshow_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
slideshow_audio = gr.Audio(label="Upload Music Track", type="filepath")
with gr.Accordion("βš™οΈ Style & Timing", open=True):
slideshow_kb_effect = gr.Dropdown(["Subtle", "Standard", "Dynamic"], value="Standard", label="Ken Burns Effect Intensity")
slideshow_transition = gr.Dropdown(["Random", "Fade", "WipeLeft", "WipeRight", "Dissolve", "SlideLeft", "SlideRight"], value="Random", label="Transition Style")
slideshow_rhythm = gr.Dropdown(["1 Image per Beat", "1 Image every 2 Beats", "1 Image per Measure (4 Beats)"], value="1 Image every 2 Beats", label="Image Display Rhythm")
with gr.Row():
slideshow_out_w = gr.Number(label="Output Width", value=1920)
slideshow_out_h = gr.Number(label="Output Height", value=1080)
slideshow_generate_btn = gr.Button("πŸš€ Generate Slideshow", variant="primary")
with gr.Column(scale=3):
slideshow_output_video = gr.Video(label="Generated Slideshow Video", show_download_button=True)
slideshow_generate_btn.click(
fn=slideshow_wrapper,
inputs=[slideshow_input_images, slideshow_audio, slideshow_kb_effect, slideshow_transition, slideshow_rhythm, slideshow_out_w, slideshow_out_h],
outputs=slideshow_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎞️ Auto-Trailer"):
gr.Markdown("## Automatic Trailer Creator")
gr.Info("Upload a long video, and this tool will find the most action-packed moments to create a short, dynamic trailer.")
with gr.Row():
with gr.Column(scale=2):
trailer_input_video = gr.Video(label="Upload Source Video")
trailer_music = gr.Audio(label="Add Background Music (Optional)", type="filepath")
with gr.Accordion("βš™οΈ Trailer Settings", open=True):
trailer_total_duration = gr.Slider(10, 120, 30, step=5, label="Total Trailer Length (s)")
trailer_clip_duration = gr.Slider(1.0, 5.0, 2.0, step=0.5, label="Duration of Each Clip (s)")
trailer_analysis_method = gr.Dropdown(["Motion"], value="Motion", label="Scene Analysis Method", info="Currently only motion detection is supported.")
trailer_transition = gr.Dropdown(["None", "Fade", "WipeLeft", "WipeRight", "Dissolve", "SlideLeft", "SlideRight"], value="Fade", label="Transition Style")
with gr.Row():
trailer_out_w = gr.Number(label="Output Width", value=1920)
trailer_out_h = gr.Number(label="Output Height", value=1080)
trailer_generate_btn = gr.Button("πŸš€ Generate Trailer", variant="primary")
with gr.Column(scale=3):
trailer_output_video = gr.Video(label="Generated Trailer Video", show_download_button=True)
trailer_generate_btn.click(
fn=auto_trailer_wrapper,
inputs=[
trailer_input_video, trailer_total_duration, trailer_clip_duration,
trailer_analysis_method, trailer_transition, trailer_music,
trailer_out_w, trailer_out_h
],
outputs=trailer_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎡 Audio Waveform"):
gr.Markdown("## Generate & Overlay Audio Waveforms")
gr.Info("Upload a video with an audio track to generate a dynamic waveform visualization.")
with gr.Row():
with gr.Column():
waveform_input_video = gr.Video(label="Input Video")
with gr.Row():
waveform_style = gr.Dropdown(["line", "p2p", "point"], value="line", label="Waveform Style")
waveform_size = gr.Textbox(value="800x200", label="Size (WxH)")
with gr.Row():
waveform_position = gr.Dropdown(["Bottom", "Center", "Top"], value="Bottom", label="Position")
waveform_color = gr.ColorPicker(value="#38bdf8", label="Color")
waveform_btn = gr.Button("🎢 Generate Waveform Video", variant="primary")
with gr.Column():
waveform_output_video = gr.Video(label="Output Video with Waveform", show_download_button=True)
waveform_btn.click(
fn=generate_waveform_video,
inputs=[waveform_input_video, waveform_style, waveform_size, waveform_position, waveform_color],
outputs=waveform_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–ΌοΈ Picture-in-Picture"):
gr.Markdown("## Create Picture-in-Picture (PiP) & Image Overlays")
gr.Info("Upload a main video and an overlay (video or image). The overlay will be placed on top of the main video.")
with gr.Row():
with gr.Column():
pip_main_video = gr.Video(label="Main Video (Background)")
pip_overlay_media = gr.File(label="Overlay Media (Video or Image)", file_types=["video", "image"])
with gr.Row():
pip_position = gr.Dropdown(
["Top-Left", "Top-Center", "Top-Right", "Center-Left", "Center", "Center-Right", "Bottom-Left", "Bottom-Center", "Bottom-Right"],
value="Bottom-Right", label="Position"
)
pip_scale = gr.Slider(0.01, 0.75, 0.25, step=0.01, label="Overlay Size")
pip_btn = gr.Button("✨ Create PiP Video", variant="primary")
with gr.Column():
pip_output_video = gr.Video(label="Output PiP Video", show_download_button=True)
pip_btn.click(
fn=create_pip_video,
inputs=[pip_main_video, pip_overlay_media, pip_position, pip_scale],
outputs=pip_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ˜‚ Meme Creator"):
gr.Markdown("## Classic Meme & Text Overlay Creator")
gr.Info("Upload an image and add text. Font availability depends on your operating system.")
with gr.Row():
with gr.Column():
meme_input_image = gr.Image(type="numpy", label="Input Image")
meme_text = gr.Textbox(label="Text", placeholder="Your witty text here...")
meme_position = gr.Radio(["Top", "Center", "Bottom"], value="Top", label="Text Position")
meme_font = gr.Dropdown(
["Impact", "Arial", "Arial Black", "Comic Sans MS", "Courier New", "Georgia", "Tahoma", "Times New Roman", "Trebuchet MS", "Verdana"],
value="Impact",
label="Font"
)
with gr.Row():
meme_text_color = gr.ColorPicker(value="#FFFFFF", label="Text Color")
meme_outline_color = gr.ColorPicker(value="#000000", label="Outline Color")
meme_font_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
meme_btn = gr.Button("πŸ˜‚ Generate Meme", variant="primary")
with gr.Column():
meme_output_image = gr.Image(label="Output Image", interactive=True)
meme_btn.click(
fn=create_meme,
inputs=[meme_input_image, meme_text, meme_position, meme_font, meme_font_size, meme_text_color, meme_outline_color],
outputs=meme_output_image
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–ΌοΈ Image Utilities", elem_id="image_tab"):
gr.Markdown("## Tools for processing and generating single images.")
with gr.Tabs():
with gr.TabItem("✨ Manipulate"):
gr.Markdown("### Simple Image Manipulation")
gr.Info("Apply a single transformation like inverting colors, flipping, or rotating.")
with gr.Row():
with gr.Column():
manip_input_image = gr.Image(type="numpy", label="Input Image")
manip_operation_radio = gr.Radio(
["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90Β° Right", "Rotate 90Β° Left"],
label="Select Operation", value="Invert Colors"
)
manip_apply_btn = gr.Button("🎨 Apply Manipulation", variant="primary")
with gr.Column():
manip_output_image = gr.Image(label="Output Image", interactive=True)
manip_apply_btn.click(fn=manipulate_image, inputs=[manip_input_image, manip_operation_radio], outputs=manip_output_image).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ“Ή Image to Video"):
gr.Markdown("### Create a short, looping video from a single static image.")
with gr.Row():
with gr.Column():
input_image_i2v = gr.Image(type="numpy", label="Input Image")
duration_slider_i2v = gr.Slider(1, 30, 5, step=0.1, label="Duration (s)")
input_audio_i2v = gr.Audio(label="Add Music (Optional)", type="filepath")
compile_i2v_btn = gr.Button("🎬 Create Looping Video", variant="primary")
with gr.Column():
output_video_i2v = gr.Video(label="Output Looping Video", interactive=True, show_download_button=True)
compile_i2v_btn.click(image_to_looping_video, [input_image_i2v, duration_slider_i2v, input_audio_i2v], output_video_i2v).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”Ž Zoom Video"):
gr.Markdown("### Create a 'Ken Burns' style zoom/pan video from an image.")
gr.Info("Upload one or more images. The output will be a gallery of videos, or a single combined video if you check the box.")
with gr.Row():
with gr.Column():
i2zv_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
i2zv_duration = gr.Slider(1, 30, 5, step=0.5, label="Video Duration (s) per Image")
i2zv_zoom_ratio = gr.Slider(1.0, 2.0, 1.25, step=0.05, label="Zoom Ratio")
i2zv_zoom_dir = gr.Dropdown(
["Center", "Top", "Bottom", "Left", "Right", "Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right"],
value="Center", label="Zoom Direction"
)
i2zv_combine = gr.Checkbox(label="Combine all videos into one", value=False)
i2zv_audio = gr.Audio(label="Add Music (Optional)", type="filepath")
i2zv_btn = gr.Button("✨ Create Zoom Video(s)", variant="primary")
with gr.Column():
i2zv_output_gallery = gr.Gallery(label="Output Video Previews", columns=2, object_fit="contain", visible=True)
i2zv_output_video = gr.Video(label="Combined Output Video", interactive=True, visible=False, show_download_button=True)
i2zv_output_zip = gr.File(label="Download All as .zip", interactive=False)
i2zv_combine.change(
fn=lambda x: [gr.update(visible=not x), gr.update(visible=x)],
inputs=i2zv_combine,
outputs=[i2zv_output_gallery, i2zv_output_video]
)
i2zv_btn.click(
fn=create_zoom_videos,
inputs=[i2zv_input_images, i2zv_duration, i2zv_zoom_ratio, i2zv_zoom_dir, i2zv_combine, i2zv_audio],
outputs=[i2zv_output_gallery, i2zv_output_video, i2zv_output_zip]
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("βœ‚οΈ Batch BG Remover"):
gr.Markdown("### Remove the background from a batch of images.")
with gr.Row():
with gr.Column():
input_images_bg = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
remove_bg_btn = gr.Button("🧼 Remove Backgrounds", variant="primary")
with gr.Column():
output_gallery_bg = gr.Gallery(label="Images with Transparent Background", columns=4, object_fit="contain", height="auto")
output_zip_bg = gr.File(label="Download All as .zip", interactive=False)
remove_bg_btn.click(remove_background_batch, input_images_bg, [output_gallery_bg, output_zip_bg]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–‹οΈ Batch Watermarker"):
gr.Markdown("### Apply a text watermark to a batch of images.")
with gr.Row():
with gr.Column():
input_images_wm = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
watermark_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Awesome Project")
watermark_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position")
watermark_opacity = gr.Slider(0, 100, 50, step=1, label="Opacity (%)")
watermark_btn = gr.Button("✍️ Apply Watermarks", variant="primary")
with gr.Column():
output_gallery_wm = gr.Gallery(label="Watermarked Images", columns=4, object_fit="contain", height="auto")
output_zip_wm = gr.File(label="Download All as .zip", interactive=False)
watermark_btn.click(apply_watermark_batch, [input_images_wm, watermark_text, watermark_pos, watermark_opacity], [output_gallery_wm, output_zip_wm]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("↔️ Batch Resizer"):
gr.Markdown("### Convert, resize, and compress a batch of images.")
with gr.Row():
with gr.Column():
brc_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
with gr.Accordion("βš™οΈ Output Settings", open=True):
brc_format = gr.Dropdown(["JPG", "PNG", "WEBP"], value="JPG", label="Output Format")
brc_quality = gr.Slider(1, 100, 90, step=1, label="JPG/WEBP Quality", interactive=True)
brc_enable_resize = gr.Checkbox(label="Enable Resizing", value=False)
with gr.Row():
brc_max_w = gr.Number(label="Max Width", value=1920, interactive=False)
brc_max_h = gr.Number(label="Max Height", value=1080, interactive=False)
brc_resize_mode = gr.Radio(["Fit (preserve aspect ratio)", "Stretch to Fit"], value="Fit (preserve aspect ratio)", label="Resize Mode", interactive=False)
brc_btn = gr.Button("πŸš€ Process Images", variant="primary")
with gr.Column():
brc_output_gallery = gr.Gallery(label="Processed Images Preview", columns=4, object_fit="contain", height="auto")
brc_output_zip = gr.File(label="Download All as .zip", interactive=False)
brc_format.change(lambda f: gr.update(visible=f in ["JPG", "WEBP"]), brc_format, brc_quality)
brc_enable_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x), gr.update(interactive=x)], brc_enable_resize, [brc_max_w, brc_max_h, brc_resize_mode])
brc_btn.click(
batch_resize_convert_images,
[brc_input_images, brc_format, brc_quality, brc_enable_resize, brc_max_w, brc_max_h, brc_resize_mode],
[brc_output_gallery, brc_output_zip]
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ€– FLUX.1 API"):
gr.Markdown("### Generate an image using `FLUX.1` models via Gradio Client.")
gr.Info("Requires a Hugging Face User Access Token.")
with gr.Row():
with gr.Column():
hf_token_input = gr.Textbox(label="HF Token", type="password", placeholder="Enter hf_... token")
flux_model_dropdown = gr.Dropdown(list(FLUX_MODELS.keys()), value="FLUX.1-schnell (Fast)", label="Select FLUX Model")
prompt_input_flux = gr.Textbox(label="Prompt", lines=3, placeholder="A cinematic photo...")
with gr.Row():
flux_width_slider = gr.Slider(256, 2048, 1024, step=64, label="Width")
flux_height_slider = gr.Slider(256, 2048, 1024, step=64, label="Height")
flux_btn = gr.Button("β˜„οΈ Generate Image", variant="primary")
with gr.Column():
output_image_flux = gr.Image(label="Generated Image", interactive=True)
flux_btn.click(call_flux_api, [prompt_input_flux, flux_model_dropdown, flux_width_slider, flux_height_slider, hf_token_input], output_image_flux).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸŽ₯ Video Utilities", elem_id="video_tab"):
gr.Markdown("## A collection of useful video tools.")
with gr.Tabs():
with gr.TabItem("🎞️ Frame Tools"):
with gr.Tabs():
with gr.TabItem("First & Last"):
gr.Markdown("### Extract the very first and very last frames of a video.")
with gr.Row():
with gr.Column():
input_video_fl = gr.Video(label="Input Video")
extract_fl_btn = gr.Button("🎬 Extract Frames", variant="primary")
with gr.Column():
output_gallery_fl = gr.Gallery(label="Output Frames (First, Last)", columns=2, object_fit="contain", height="auto")
extract_fl_btn.click(fn=extract_first_last_frame, inputs=input_video_fl, outputs=output_gallery_fl).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Video to Frames"):
gr.Markdown("### Extract all individual frames from a video file.")
with gr.Row():
with gr.Column():
input_video_v2f = gr.Video(label="Input Video")
v2f_fps_display = gr.Textbox(label="Detected FPS", interactive=False, value="N/A")
with gr.Accordion("βš™οΈ Advanced Options", open=False):
v2f_skip_rate = gr.Slider(1, 30, 1, step=1, label="Extract Every Nth Frame")
v2f_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation")
v2f_format = gr.Radio(["PNG", "JPG"], value="PNG", label="Output Format")
v2f_jpg_quality = gr.Slider(1, 100, 95, step=1, label="JPG Quality", interactive=False)
v2f_resize = gr.Checkbox(label="Resize all extracted frames", value=False)
with gr.Row():
v2f_width = gr.Number(label="Output Width", value=1024, interactive=False)
v2f_height = gr.Number(label="Output Height", value=576, interactive=False)
extract_v2f_btn = gr.Button("πŸͺš Extract All Frames", variant="primary")
with gr.Column():
output_gallery_v2f = gr.Gallery(label="Extracted Frames Preview (max 100 shown)", columns=8, object_fit="contain", height="auto")
output_zip_v2f = gr.File(label="Download All Frames (.zip)", interactive=False)
input_video_v2f.upload(lambda v: f"{get_video_fps(v):.2f} FPS", input_video_v2f, v2f_fps_display)
v2f_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], v2f_resize, [v2f_width, v2f_height])
v2f_format.change(lambda x: gr.update(interactive=(x=="JPG")), v2f_format, v2f_jpg_quality)
extract_v2f_btn.click(video_to_frames_extractor, [input_video_v2f, v2f_skip_rate, v2f_rotation, v2f_resize, v2f_width, v2f_height, v2f_format, v2f_jpg_quality], [output_gallery_v2f, output_zip_v2f]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Frames to Video"):
gr.Markdown("### Compile a sequence of image frames into a video file.")
with gr.Row():
with gr.Column():
input_frames_f2v = gr.File(label="Upload Frames", file_count="multiple", file_types=["image"])
fps_slider_f2v = gr.Slider(1, 60, 24, step=1, label="FPS")
with gr.Accordion("βš™οΈ Advanced Options", open=False):
f2v_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation")
f2v_resize = gr.Checkbox(label="Resize all frames", value=False)
with gr.Row():
f2v_width = gr.Number(label="Output Width", value=1024, interactive=False)
f2v_height = gr.Number(label="Output Height", value=576, interactive=False)
compile_f2v_btn = gr.Button("πŸ“½οΈ Create Video", variant="primary")
with gr.Column():
output_video_f2v = gr.Video(label="Compiled Video", interactive=True, show_download_button=True)
f2v_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], f2v_resize, [f2v_width, f2v_height])
compile_f2v_btn.click(create_video_from_frames, [input_frames_f2v, fps_slider_f2v, f2v_rotation, f2v_resize, f2v_width, f2v_height], output_video_f2v).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("βœ‚οΈ Editing"):
with gr.Tabs():
with gr.TabItem("Manipulate"):
gr.Markdown("### Simple Video Manipulation")
gr.Info("Apply a single transformation like inverting colors, flipping, or rotating to every frame of a video.")
with gr.Row():
with gr.Column():
vmanip_input_video = gr.Video(label="Input Video")
vmanip_operation_radio = gr.Radio(
["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90Β° Right", "Rotate 90Β° Left"],
label="Select Operation", value="Invert Colors"
)
vmanip_apply_btn = gr.Button("✨ Apply Manipulation", variant="primary")
with gr.Column():
vmanip_output_video = gr.Video(label="Output Video", interactive=True, show_download_button=True)
vmanip_apply_btn.click(fn=manipulate_video, inputs=[vmanip_input_video, vmanip_operation_radio], outputs=vmanip_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Ping-Pong"):
gr.Markdown("### Create a forward-then-reverse video loop (Boomerang).")
with gr.Row():
with gr.Column():
input_video_pingpong = gr.Video(label="Input Video")
audio_option_pingpong = gr.Radio(["Remove Audio", "Original Audio Only", "Reverse Audio"], value="Remove Audio", label="Audio Handling")
pingpong_btn = gr.Button("πŸ“ Create Ping-Pong Video", variant="primary")
with gr.Column():
output_video_pingpong = gr.Video(label="Ping-Pong Video", interactive=True, show_download_button=True)
pingpong_btn.click(fn=ping_pong_video, inputs=[input_video_pingpong, audio_option_pingpong], outputs=output_video_pingpong).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Reverse"):
gr.Markdown("### Reverse a video clip.")
with gr.Row():
with gr.Column():
input_video_reverse = gr.Video(label="Input Video")
audio_option_reverse = gr.Radio(["Remove Audio", "Reverse Audio"], value="Remove Audio", label="Audio Handling")
reverse_btn = gr.Button("πŸ”„ Reverse Video", variant="primary")
with gr.Column():
output_video_reverse = gr.Video(label="Reversed Video", interactive=True, show_download_button=True)
reverse_btn.click(fn=reverse_video, inputs=[input_video_reverse, audio_option_reverse], outputs=output_video_reverse).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Merger"):
gr.Markdown("### Simple Video Merger")
gr.Info("Upload two or more video clips to join them together in sequence. All clips will be conformed to the resolution and framerate of the first video.")
with gr.Row():
with gr.Column():
merger_input_videos = gr.File(label="Upload Videos (2 or more)", file_count="multiple", file_types=["video"])
merger_btn = gr.Button("πŸ”— Merge Videos", variant="primary")
with gr.Column():
merger_output_video = gr.Video(label="Merged Video", show_download_button=True)
merger_btn.click(
fn=merge_videos,
inputs=merger_input_videos,
outputs=merger_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Trimmer"):
gr.Markdown("### Visually trim a video. Use the player to find a frame, then set it as the start or end point.")
gr.Info("Keyboard hotkeys enabled: K = Play/Pause, J = Back 1 Frame, L = Forward 1 Frame (hover mouse over video)")
with gr.Row():
with gr.Column(scale=2):
input_video_trim = gr.Video(label="Input Video", elem_id="video-trim-input")
with gr.Row():
set_start_btn = gr.Button("Set Current Frame as START")
set_end_btn = gr.Button("Set Current Frame as END")
trim_btn = gr.Button("βœ‚οΈ Trim Video", variant="primary")
with gr.Column(scale=1):
gr.Markdown("#### Trim Points")
start_frame_img = gr.Image(label="Start Frame", interactive=False)
trim_start_time_display = gr.Textbox(label="Start Time (s)", interactive=False)
end_frame_img = gr.Image(label="End Frame", interactive=False)
trim_end_time_display = gr.Textbox(label="End Time (s)", interactive=False)
trim_start_time = gr.Number(value=0, visible=False)
trim_end_time = gr.Number(value=0, visible=False)
with gr.Row():
output_video_trim = gr.Video(label="Trimmed Video", interactive=True, show_download_button=True)
get_current_time_js = """()=>{const e=document.querySelector("#video-trim-input video");return e?e.currentTime:0}"""
def get_frame_from_time_wrapper(v,t): return get_frame_at_time(v,t), f"{t:.3f}"
set_start_btn.click(fn=None, js=get_current_time_js, outputs=[trim_start_time])
set_end_btn.click(fn=None, js=get_current_time_js, outputs=[trim_end_time])
trim_start_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_start_time], outputs=[start_frame_img, trim_start_time_display])
trim_end_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_end_time], outputs=[end_frame_img, trim_end_time_display])
trim_btn.click(fn=trim_video, inputs=[input_video_trim, trim_start_time, trim_end_time], outputs=output_video_trim).then(fn=None, js=fire_confetti_and_sound_js)
input_video_trim.clear(fn=lambda: (None, "0.00", None, "0.00", 0, 0), outputs=[start_frame_img, trim_start_time_display, end_frame_img, trim_end_time_display, trim_start_time, trim_end_time])
with gr.TabItem("Crop & Resize"):
gr.Markdown("### Visually crop a video.")
with gr.Row():
with gr.Column(scale=2):
crop_input_video = gr.Video(label="Input Video")
crop_preview_image = gr.Image(label="Frame Preview", interactive=False)
with gr.Column(scale=1):
gr.Markdown("#### Crop Settings")
with gr.Row():
crop_w = gr.Number(label="Width", value=1280)
crop_h = gr.Number(label="Height", value=720)
with gr.Row():
crop_x = gr.Number(label="Offset X", value=0)
crop_y = gr.Number(label="Offset Y", value=0)
gr.Markdown("#### Options")
crop_btn = gr.Button("πŸ“ Crop Video", variant="primary")
with gr.Accordion("Optional: Resize after cropping", open=False):
crop_do_resize = gr.Checkbox(label="Enable Resizing", value=False)
crop_resize_w = gr.Number(label="Output Width", value=1024, interactive=False)
crop_resize_h = gr.Number(label="Output Height", value=576, interactive=False)
output_video_crop = gr.Video(label="Cropped Video", interactive=True, show_download_button=True)
crop_input_video.upload(fn=get_frame_at_time, inputs=crop_input_video, outputs=crop_preview_image)
crop_do_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], inputs=crop_do_resize, outputs=[crop_resize_w, crop_resize_h])
crop_btn.click(fn=crop_video, inputs=[crop_input_video, crop_x, crop_y, crop_w, crop_h, crop_do_resize, crop_resize_w, crop_resize_h], outputs=output_video_crop).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Change Speed"):
gr.Markdown("### Create slow-motion or fast-forward videos.")
with gr.Row():
with gr.Column():
input_video_speed = gr.Video(label="Input Video")
speed_multiplier = gr.Slider(0.1, 10.0, 1.0, step=0.1, label="Speed Multiplier")
speed_btn = gr.Button("πŸƒ Change Speed", variant="primary")
with gr.Column():
output_video_speed = gr.Video(label="Modified Video", interactive=True, show_download_button=True)
speed_btn.click(fn=change_video_speed, inputs=[input_video_speed, speed_multiplier], outputs=output_video_speed).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎨 Effects & Overlays"):
with gr.Tabs():
with gr.TabItem("Fader"):
gr.Markdown("### Apply Fade-In and/or Fade-Out to a Video")
with gr.Row():
with gr.Column():
fade_input_video = gr.Video(label="Input Video")
with gr.Row():
fade_in_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-In Duration (s)")
fade_out_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)")
fade_video_btn = gr.Button("✨ Apply Fade", variant="primary")
with gr.Column():
fade_output_video = gr.Video(label="Faded Video", interactive=True, show_download_button=True)
fade_video_btn.click(apply_video_fade, [fade_input_video, fade_in_slider, fade_out_slider], fade_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("BG Remover"):
gr.Markdown("## Video Background Remover")
gr.Warning("This is a very slow process. A short video can take several minutes. Output is a .webm file.")
with gr.Row():
with gr.Column():
vbg_input_video = gr.Video(label="Input Video")
vbg_btn = gr.Button("βœ‚οΈ Remove Video Background", variant="primary")
with gr.Column():
vbg_output_video = gr.Video(label="Output Video with Transparency (.webm)", interactive=True, show_download_button=True)
vbg_btn.click(remove_video_background, vbg_input_video, vbg_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Watermark"):
gr.Markdown("### Apply a text watermark to a video.")
with gr.Row():
with gr.Column():
wm_input_video = gr.Video(label="Input Video")
wm_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Video 2025")
wm_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position")
wm_opacity = gr.Slider(0, 100, 70, step=1, label="Opacity (%)")
with gr.Accordion("Advanced Options", open=False):
wm_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
wm_color = gr.ColorPicker(value="#FFFFFF", label="Font Color")
wm_btn = gr.Button("πŸ–‹οΈ Apply Watermark", variant="primary")
with gr.Column():
wm_output_video = gr.Video(label="Watermarked Video", interactive=True, show_download_button=True)
wm_btn.click(apply_video_watermark, [wm_input_video, wm_text, wm_pos, wm_opacity, wm_size, wm_color], wm_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Create GIF"):
gr.Markdown("### Convert a video clip into a high-quality animated GIF.")
with gr.Row():
with gr.Column():
input_video_gif = gr.Video(label="Input Video")
with gr.Row():
gif_start_time = gr.Number(value=0, label="Start Time (s)")
gif_end_time = gr.Number(value=0, label="End Time (s)", info="Set to 0 for full duration")
gif_btn = gr.Button("πŸ–ΌοΈ Create GIF", variant="primary")
with gr.Column():
output_gif = gr.Image(label="Output GIF", interactive=True)
gif_btn.click(create_gif_from_video, [input_video_gif, gif_start_time, gif_end_time], output_gif).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”Š Audio & Transcription"):
with gr.Tabs():
with gr.TabItem("Add Audio"):
gr.Markdown("### Combine a silent video with an audio file.")
with gr.Row():
with gr.Column():
input_video_audio = gr.Video(label="Input Video")
input_audio = gr.Audio(type="filepath", label="Input Audio")
add_audio_btn = gr.Button("🎢 Add Audio", variant="primary")
with gr.Column():
output_video_audio = gr.Video(label="Final Video with Audio", interactive=True, show_download_button=True)
add_audio_btn.click(add_audio_to_video, [input_video_audio, input_audio], output_video_audio).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Extract Audio"):
gr.Markdown("### Strip the audio track from a video file.")
with gr.Row():
with gr.Column():
extract_audio_input_video = gr.Video(label="Input Video")
extract_audio_format = gr.Dropdown(["mp3", "wav", "aac"], value="mp3", label="Output Audio Format")
extract_audio_btn = gr.Button("🎡 Extract Audio", variant="primary")
with gr.Column():
extract_audio_output = gr.Audio(label="Extracted Audio", type="filepath")
extract_audio_btn.click(extract_audio, [extract_audio_input_video, extract_audio_format], extract_audio_output).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Audio Editor"):
gr.Markdown("### Trim and Apply Fades to an Audio File")
gr.Info("Set start/end times to trim the clip, then apply optional fades.")
with gr.Row():
with gr.Column():
audio_trim_input = gr.Audio(type="filepath", label="Input Audio")
with gr.Row():
audio_start_time = gr.Number(label="Start Time (s)", value=0)
audio_end_time = gr.Number(label="End Time (s)", info="Set to 0 for full duration")
with gr.Row():
audio_fade_in = gr.Slider(0.0, 10.0, 0.5, step=0.1, label="Fade-In Duration (s)")
audio_fade_out = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)")
audio_trim_fade_btn = gr.Button("βœ‚οΈ Process Audio", variant="primary")
with gr.Column():
audio_trim_output = gr.Audio(label="Processed Audio", type="filepath")
audio_trim_fade_btn.click(trim_and_fade_audio, [audio_trim_input, audio_start_time, audio_end_time, audio_fade_in, audio_fade_out], audio_trim_output).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Transcription", visible=(whisper is not None)):
gr.Markdown("## Transcribe Speech and Burn Subtitles")
gr.Info("Uses OpenAI's Whisper model. First run will download model files. After transcribing a video, options to burn subtitles will appear.")
transcribed_video_path_state = gr.State(None)
with gr.Row():
with gr.Column():
transcribe_input = gr.File(label="Upload Video or Audio File", file_types=["video", "audio"])
transcribe_model = gr.Dropdown(["tiny", "base", "small", "medium", "large"], value="base", label="Whisper Model Size")
transcribe_btn = gr.Button("πŸŽ™οΈ Transcribe", variant="primary")
with gr.Column():
with gr.Row():
transcribe_text = gr.Textbox(label="Transcription Result", lines=10, interactive=True, elem_id="transcription_textbox")
copy_transcription_btn = gr.Button("πŸ“‹ Copy")
transcribe_files = gr.File(label="Download Subtitle Files (.srt, .vtt)", file_count="multiple", interactive=False)
with gr.Accordion("πŸ”₯ Burn Subtitles onto Video", open=True, visible=False) as burn_accordion:
gr.Markdown("Set styling and burn the generated subtitles into the video.")
with gr.Row():
burn_font_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
burn_font_color = gr.ColorPicker(value="#FFFFFF", label="Font Color")
burn_btn = gr.Button("πŸ”₯ Burn Subtitles", variant="primary")
burn_output_video = gr.Video(label="Video with Burned-in Subtitles", interactive=True, show_download_button=True)
copy_transcription_btn.click(fn=None, js=copy_transcription_js)
transcribe_btn.click(
fn=transcribe_and_prep_burn,
inputs=[transcribe_input, transcribe_model],
outputs=[transcribe_text, transcribe_files, transcribed_video_path_state, burn_accordion]
).then(fn=None, js=fire_confetti_and_sound_js)
burn_btn.click(
fn=burn_subtitles,
inputs=[transcribed_video_path_state, transcribe_files, burn_font_size, burn_font_color],
outputs=burn_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🧠 ControlNet", elem_id="controlnet_tab"):
gr.Markdown("## ControlNet Preprocessing")
with gr.Tabs():
with gr.TabItem("Process a Video"):
gr.Markdown("### Convert a Video into a ControlNet-Ready Map")
with gr.Row():
with gr.Column():
input_video_cn = gr.Video(label="Input Video")
detector_dropdown_cn = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector")
process_btn_cn = gr.Button("✨ Process Video", variant="primary")
with gr.Column():
output_video_cn = gr.Video(label="Output ControlNet Video", interactive=True, show_download_button=True)
process_btn_cn.click(fn=process_video_with_detector, inputs=[input_video_cn, detector_dropdown_cn], outputs=output_video_cn).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Process Batch Images"):
gr.Markdown("### Generate ControlNet Maps from one or more images.")
with gr.Row():
with gr.Column():
input_images_cn = gr.File(label="Upload Images or Folder", file_count="multiple", file_types=["image"])
detector_dropdown_img = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector")
process_btn_img = gr.Button("✨ Process Images", variant="primary")
with gr.Column():
output_gallery_cn = gr.Gallery(label="Output ControlNet Images", columns=4, object_fit="contain", height="auto")
output_zip_cn = gr.File(label="Download All as .zip", interactive=False)
process_btn_img.click(fn=process_batch_images_with_detector, inputs=[input_images_cn, detector_dropdown_img], outputs=[output_gallery_cn, output_zip_cn]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ—œοΈ Converter", elem_id="converter_tab"):
gr.Markdown("## Universal Video Converter & Compressor")
gr.Info("Convert your video to a different format, change the codec, reduce the quality to save space, or downscale the resolution.")
with gr.Row():
with gr.Column():
conv_input_video = gr.Video(label="Input Video")
conv_btn = gr.Button("βš™οΈ Convert & Compress", variant="primary")
conv_output_video = gr.Video(label="Converted Video", interactive=True, show_download_button=True)
with gr.Column():
gr.Markdown("#### Output Settings")
with gr.Row():
conv_format = gr.Dropdown(["mp4", "mkv", "webm", "mov"], value="mp4", label="Output Format")
conv_vcodec = gr.Dropdown(["libx264", "libx265", "vp9"], value="libx264", label="Video Codec")
conv_crf = gr.Slider(minimum=18, maximum=30, value=23, step=1, label="Quality (CRF)", info="Lower = higher quality/size, Higher = lower quality/size. 23 is a good default.")
conv_scale = gr.Dropdown(["Original", "1080p", "720p", "480p"], value="Original", label="Downscale Resolution (optional)")
gr.Markdown("##### Audio Settings")
with gr.Row():
conv_acodec = gr.Dropdown(["copy", "aac", "opus"], value="copy", label="Audio Codec", info="'copy' is fastest and preserves quality.")
conv_abitrate = gr.Dropdown([96, 128, 192, 256, 320], value=192, label="Audio Bitrate (kbps)", interactive=False)
conv_acodec.change(lambda x: gr.update(interactive=(x != "copy")), conv_acodec, conv_abitrate)
conv_btn.click(fn=convert_compress_video, inputs=[conv_input_video, conv_format, conv_vcodec, conv_crf, conv_scale, conv_acodec, conv_abitrate], outputs=conv_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”— Transfer", elem_id="transfer_tab"):
gr.Markdown("## Image & Link Transfer Utility")
gr.Info("Drop images below, manage URL presets, and open the target application in a new tab.")
link_presets = gr.State(DEFAULT_LINK_PRESETS.copy())
with gr.Row():
with gr.Column(scale=1):
transfer_gallery = gr.Gallery(label="Drop Images Here", height=300, columns=3, object_fit="contain")
with gr.Column(scale=2):
gr.Markdown("### Link Preset Management")
target_url = gr.Textbox(label="Target URL", value="https://huggingface.co/spaces/bep40/FramePack_rotate_landscape", interactive=True, elem_id="transfer_target_url")
search_bar = gr.Textbox(label="Search Presets", placeholder="Type to filter...", interactive=True)
with gr.Row():
preset_dropdown = gr.Dropdown(
label="Load Link Preset",
choices=sorted(list(DEFAULT_LINK_PRESETS.keys())),
interactive=True
)
delete_preset_btn = gr.Button("πŸ—‘οΈ Delete", variant="stop")
with gr.Accordion("Create a new preset", open=False):
with gr.Row():
new_preset_name = gr.Textbox(label="New Preset Name", placeholder="e.g., My Favorite App")
save_preset_btn = gr.Button("πŸ’Ύ Save")
open_link_btn = gr.Button("πŸš€ Open in New Tab", variant="primary")
search_bar.input(fn=filter_presets, inputs=[search_bar, link_presets], outputs=[preset_dropdown])
preset_dropdown.change(fn=load_preset, inputs=[link_presets, preset_dropdown], outputs=[target_url])
save_preset_btn.click(
fn=save_preset, inputs=[link_presets, new_preset_name, target_url], outputs=[link_presets, preset_dropdown]
).then(lambda: ("", ""), outputs=[new_preset_name, search_bar])
delete_confirm_js = """(name) => { if (!name) { alert('Please select a preset to delete.'); return false; } return confirm(`Are you sure you want to delete the preset: '` + name + `'?`); }"""
delete_preset_btn.click(fn=None, js=delete_confirm_js, inputs=[preset_dropdown]).then(
fn=delete_preset, inputs=[link_presets, preset_dropdown], outputs=[link_presets, preset_dropdown, target_url]
).then(lambda: "", outputs=[search_bar])
open_link_btn.click(fn=None, js="()=>{const url=document.getElementById('transfer_target_url').querySelector('textarea').value;if(url){window.open(url,'_blank')}else{alert('Target URL is empty.')}}")
main_tabs.select(fn=None, inputs=main_tabs, js="(tab) => { window.skriptz_bling.update_title(tab); }")
gr.HTML('<a href="https://linktr.ee/skylinkd" target="_blank" style="color: #94a3b8; text-decoration: none;">skylinkd production 2025 (c)</a>', elem_id="custom-footer")
if __name__ == "__main__":
if os.path.exists(TEMP_DIR):
try: shutil.rmtree(TEMP_DIR)
except OSError as e: print(f"Error removing temp directory {TEMP_DIR}: {e}")
os.makedirs(TEMP_DIR, exist_ok=True)
# --- THIS IS THE MAIN FIX ---
# Add allowed_paths to let Gradio serve the sound file from the current directory.
demo.launch(inbrowser=True, allowed_paths=["."])