Video_stitch / app.py
sampleacc-3003's picture
Update app.py
56122f2 verified
import gradio as gr
import subprocess
import static_ffmpeg
import os
import tempfile
import requests
import re
import textwrap
import shutil
import time
import base64
from datetime import datetime
from PIL import Image, ImageDraw, ImageFont
from functools import lru_cache
from io import BytesIO
# ========================================
# CONFIGURATION SECTION - CUSTOMIZE HERE
# ========================================
REDDIT_CONFIG = {
'template_file': 'reddit_template.png',
'font_file': 'RFDewi-Bold.ttf',
'font_size_max': 180,
'font_size_min': 16,
'text_wrap_width': 35,
'text_color': 'black',
'line_spacing': 10,
'text_box_width_percent': 0.85,
'text_box_height_percent': 0.65,
'y_offset': 20,
}
SUBTITLE_CONFIG = {
'font_file': 'komiko_axis.ttf',
'font_name': 'Komika Axis',
'font_size_default': 12,
'position_alignment': 5,
'margin_left': 20,
'margin_right': 20,
'margin_vertical': 0,
}
VIDEO_CONFIG = {
'reddit_scale_percent': 0.75,
'fade_start_percent': 0.70,
'fade_end_percent': 0.83,
'promo_percent': 0.1,
'fade_color_rgb': (218, 207, 195),
'fade_color_hex': '#DACFC3',
'book_fade_in_duration': 2,
'encoding_preset': 'faster',
'threads': 0,
}
# ========================================
# END CONFIGURATION
# ========================================
static_ffmpeg.add_paths()
# Timing Utilities
def format_elapsed_time(seconds):
"""Format elapsed time as MM:SS."""
mins = int(seconds // 60)
secs = int(seconds % 60)
return f"{mins}:{secs:02d}"
def run_ffmpeg_cmd(cmd, env, description="", start_time=None):
"""Execute FFmpeg command with timing."""
step_start = time.time()
try:
subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)
elapsed = time.time() - step_start
total_elapsed = time.time() - start_time if start_time else elapsed
return True, None, f"βœ… {description} ({elapsed:.1f}s) | Total: {format_elapsed_time(total_elapsed)}"
except subprocess.CalledProcessError as e:
error_msg = e.stderr[-500:] if e.stderr else str(e)
return False, f"{description} failed: {error_msg}", None
# Font & Utility Functions
def load_font(font_paths, font_size, fallback='Verdana'):
"""Load font from multiple locations with fallback."""
for path in font_paths:
if os.path.exists(path):
try:
return ImageFont.truetype(path, font_size)
except:
pass
try:
return ImageFont.truetype(fallback, font_size)
except:
return ImageFont.load_default()
def time_to_seconds(time_str):
"""Convert SRT time to seconds."""
h, m, s = time_str.split(':')
s, ms = s.split(',')
return float(int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0)
def setup_custom_fonts_hf(temp_dir):
"""Setup fonts for HF Spaces compatibility."""
try:
fonts_dir = os.path.join(temp_dir, 'fonts')
os.makedirs(fonts_dir, exist_ok=True)
script_dir = os.path.dirname(os.path.abspath(__file__))
fonts_to_copy = []
# Check fonts/ subdirectory
repo_fonts_dir = os.path.join(script_dir, 'fonts')
if os.path.exists(repo_fonts_dir):
fonts_to_copy.extend([
os.path.join(repo_fonts_dir, f)
for f in os.listdir(repo_fonts_dir)
if f.lower().endswith(('.ttf', '.otf'))
])
# Check root directory
for font_file in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
font_path = os.path.join(script_dir, font_file)
if os.path.exists(font_path) and font_path not in fonts_to_copy:
fonts_to_copy.append(font_path)
# Copy fonts
for src in fonts_to_copy:
shutil.copy(src, os.path.join(fonts_dir, os.path.basename(src)))
if fonts_to_copy:
with open(os.path.join(temp_dir, 'fonts.conf'), 'w') as f:
f.write(f"""<?xml version="1.0"?>
<fontconfig>
<dir>{fonts_dir}</dir>
<cachedir>{temp_dir}/cache</cachedir>
</fontconfig>""")
env = os.environ.copy()
env['FONTCONFIG_FILE'] = os.path.join(temp_dir, 'fonts.conf')
env['FONTCONFIG_PATH'] = temp_dir
return env
return os.environ.copy()
except:
return os.environ.copy()
# File Handling
def download_file_from_url(url, output_dir, filename):
"""Download file from URL."""
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
file_path = os.path.join(output_dir, filename)
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return file_path
def download_book_cover(book_id, output_dir):
"""Download book cover from Google Books using Book ID."""
url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
response = requests.get(url, timeout=30)
response.raise_for_status()
path = os.path.join(output_dir, 'book_cover.png')
with open(path, 'wb') as f:
f.write(response.content)
Image.open(path).verify()
return path
def decode_base64_image(base64_string, output_dir):
"""
Decode base64 image data and save to file.
Supports both raw base64 and data URI format.
"""
try:
# Remove data URI prefix if present
if ',' in base64_string and 'base64' in base64_string:
base64_string = base64_string.split(',', 1)[1]
# Clean whitespace
base64_string = base64_string.strip()
# Decode base64
image_data = base64.b64decode(base64_string)
# Verify it's a valid image
img = Image.open(BytesIO(image_data))
img.verify()
# Save to file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(output_dir, f"book_cover_b64_{timestamp}.png")
# Re-open and save
img = Image.open(BytesIO(image_data))
img.save(output_path, 'PNG')
return output_path
except Exception as e:
raise Exception(f"Base64 decode failed: {str(e)}")
def validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir):
"""Validate book cover input. Returns: (book_cover_path, error_message)"""
has_file = book_cover_file is not None
has_url = bool(book_cover_url and book_cover_url.strip())
has_base64 = bool(book_cover_base64 and book_cover_base64.strip())
has_id = bool(book_id and book_id.strip())
methods_count = sum([has_file, has_url, has_base64, has_id])
if methods_count == 0:
return None, None
if methods_count > 1:
return None, "❌ Book Cover: Use only ONE method"
try:
if has_file:
file_path = book_cover_file.name if hasattr(book_cover_file, 'name') else book_cover_file
return str(file_path), None
if has_url:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_path = download_file_from_url(book_cover_url.strip(), temp_dir, f"book_cover_{timestamp}.png")
return str(file_path), None
if has_base64:
file_path = decode_base64_image(book_cover_base64.strip(), temp_dir)
return str(file_path), None
if has_id:
file_path = download_book_cover(book_id.strip(), temp_dir)
return str(file_path), None
except Exception as e:
return None, f"❌ Book cover error: {str(e)}"
return None, None
def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
"""Validate file input and return path."""
has_upload = uploaded_file is not None
has_url = url_string and url_string.strip()
if not has_upload and not has_url:
return None, f"❌ Provide {file_type} via upload or URL"
if has_upload and has_url:
return None, f"❌ Use only ONE method for {file_type}"
if has_upload:
return str(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
try:
url = url_string.strip()
filename = url.split('/')[-1] or f"{file_type}_file"
if '.' not in filename:
ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
filename += ext_map.get(file_type, '.tmp')
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_path = download_file_from_url(url, temp_dir, f"{file_type}_{timestamp}_{filename}")
return str(file_path), None
except Exception as e:
return None, f"❌ Download error: {str(e)}"
# Media Info (Cached)
@lru_cache(maxsize=32)
def get_video_info(video_path):
"""Get video resolution and frame rate (cached)."""
result = subprocess.run([
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", str(video_path)
], capture_output=True, text=True, check=True)
width, height = map(int, result.stdout.strip().split('x'))
result = subprocess.run([
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", str(video_path)
], capture_output=True, text=True, check=True)
fps_str = result.stdout.strip()
fps = eval(fps_str) if '/' in fps_str else float(fps_str)
return int(width), int(height), float(fps)
@lru_cache(maxsize=32)
def get_audio_duration(audio_path):
"""Get audio duration (cached)."""
result = subprocess.run([
"ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", str(audio_path)
], capture_output=True, text=True, check=True)
return float(result.stdout.strip())
# Subtitle Processing
def extract_first_subtitle(srt_path):
"""Extract first subtitle entry."""
with open(srt_path, 'r', encoding='utf-8') as f:
blocks = re.split(r'\n\s*\n', f.read().strip())
if not blocks:
return "No subtitle", 0.0, 3.0
lines = blocks[0].strip().split('\n')
if len(lines) >= 3:
times = lines[1].split(' --> ')
text = ' '.join(lines[2:]).strip()
start = time_to_seconds(times[0].strip())
end = time_to_seconds(times[1].strip())
return str(text), float(start), float(end)
return "No subtitle", 0.0, 3.0
def srt_time_to_ms(time_str):
"""Convert SRT timestamp to milliseconds."""
h, m, s = time_str.strip().split(':')
s, ms = s.split(',')
return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms)
def ms_to_ass_time(ms):
"""Convert milliseconds to ASS timestamp."""
ms = int(ms)
h = ms // 3600000
ms %= 3600000
m = ms // 60000
ms %= 60000
s = ms // 1000
cs = (ms % 1000) // 10
return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
"""Create Reddit card with text using PIL."""
template = Image.open(template_path).convert('RGBA')
tw, th = template.size
text_box_w = int(tw * config['text_box_width_percent'])
text_box_h = int(th * config['text_box_height_percent'])
script_dir = os.path.dirname(os.path.abspath(__file__))
font_paths = [
os.path.join(script_dir, 'fonts', config['font_file']),
os.path.join(script_dir, config['font_file'])
]
# Find best font size
best_size = config['font_size_max']
best_wrapped = hook_text
for size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
font = load_font(font_paths, size)
wrapped = textwrap.fill(str(hook_text), width=config['text_wrap_width'])
draw = ImageDraw.Draw(template)
bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
if bbox[2] <= text_box_w and bbox[3] <= text_box_h:
best_size = size
best_wrapped = wrapped
break
# Draw text
font = load_font(font_paths, best_size)
draw = ImageDraw.Draw(template)
bbox = draw.multiline_textbbox((0, 0), best_wrapped, font=font, spacing=config['line_spacing'])
x = (tw - bbox[2]) / 2
y = (th - bbox[3]) / 2 + config['y_offset']
draw.multiline_text((x, y), best_wrapped, fill=config['text_color'],
font=font, spacing=config['line_spacing'], align='left')
output_path = os.path.join(output_dir, 'reddit_card.png')
template.save(output_path, 'PNG')
return str(output_path)
def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
"""Convert SRT to ASS with word highlighting."""
font_size = int(font_size) if font_size else int(config['font_size_default'])
color_map = {
'yellow': ('&H0000FFFF', '&H00000000'), 'orange': ('&H0000A5FF', '&H00000000'),
'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
'blue': ('&H00FF0000', '&H00FFFFFF'),
}
highlight_bg, highlight_text = color_map.get(str(highlight_color).lower(), ('&H0000FFFF', '&H00000000'))
with open(srt_path, 'r', encoding='utf-8') as f:
srt_content = f.read()
ass_header = f"""[Script Info]
Title: Word Highlight
ScriptType: v4.00+
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
ass_events = []
srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
start_idx = 1 if skip_first else 0
for block in srt_blocks[start_idx:]:
lines = block.strip().split('\n')
if len(lines) < 3:
continue
times = lines[1].split(' --> ')
if len(times) != 2:
continue
start_ms = srt_time_to_ms(times[0])
end_ms = srt_time_to_ms(times[1])
words = ' '.join(lines[2:]).split()
if not words:
continue
time_per_word = (end_ms - start_ms) / len(words)
for i, word in enumerate(words):
word_start = start_ms + int(i * time_per_word)
word_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
styled_words = [
f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w
for j, w in enumerate(words)
]
ass_events.append(
f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(styled_words)}"
)
ass_path = os.path.join(output_dir, 'word_highlight.ass')
with open(ass_path, 'w') as f:
f.write(ass_header + '\n'.join(ass_events))
return str(ass_path)
# Main Processing
def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url,
book_cover_file, book_cover_url, book_cover_base64, book_id,
enable_highlight, highlight_color, font_size, crf_quality=23):
"""Main stitching function with timing."""
start_time = time.time()
temp_dir = tempfile.mkdtemp()
try:
ffmpeg_env = setup_custom_fonts_hf(temp_dir)
# Validate inputs
video_path, err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
if err: return None, err
audio_path, err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
if err: return None, err
subtitle_path, err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
if err: return None, err
setup_time = time.time() - start_time
# Get media info
video_width, video_height, video_fps = get_video_info(video_path)
audio_duration = get_audio_duration(audio_path)
video_width = int(video_width)
video_height = int(video_height)
video_fps = float(video_fps)
audio_duration = float(audio_duration)
status = f"⏱️ Setup: {setup_time:.1f}s\n"
status += f"πŸ“₯ {video_width}x{video_height}@{video_fps:.0f}fps | {audio_duration:.1f}s\n\n"
# Reddit template
script_dir = os.path.dirname(os.path.abspath(__file__))
reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
has_reddit = os.path.exists(reddit_template_path)
first_text = ""
first_start = 0.0
first_end = 3.0
reddit_card_path = ""
if has_reddit:
reddit_start = time.time()
first_text, first_start, first_end = extract_first_subtitle(subtitle_path)
reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_text, temp_dir)
reddit_time = time.time() - reddit_start
status += f"πŸ“± Reddit: βœ… ({reddit_time:.1f}s)\n"
# Generate subtitles
sub_start = time.time()
subtitle_ass = create_word_by_word_highlight_ass(
subtitle_path, temp_dir, highlight_color, font_size,
skip_first=has_reddit, config=SUBTITLE_CONFIG
) if enable_highlight else subtitle_path
sub_time = time.time() - sub_start
status += f"πŸ“ Subtitles: βœ… ({sub_time:.1f}s)\n\n"
subtitle_escaped = str(subtitle_ass).replace('\\', '/').replace(':', '\\:')
# Validate book cover
book_cover_path, book_error = validate_book_cover_input(
book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir
)
if book_error:
return None, book_error
has_book_cover = book_cover_path is not None
# Output setup
timestamp = datetime.now().strftime("%H%M%S")
output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
# Common encoding flags
common_flags = [
"-c:v", "libx264", "-preset", str(VIDEO_CONFIG['encoding_preset']),
"-crf", str(int(crf_quality)), "-pix_fmt", "yuv420p",
"-threads", str(int(VIDEO_CONFIG['threads']))
]
# Calculate timings
fade_start = float(audio_duration * VIDEO_CONFIG['fade_start_percent'])
fade_end = float(audio_duration * VIDEO_CONFIG['fade_end_percent'])
fade_duration = float(fade_end - fade_start)
promo_duration = float(audio_duration * VIDEO_CONFIG['promo_percent'])
book_start = float(audio_duration - promo_duration)
solid_duration = float(book_start - fade_end)
if has_book_cover:
status += "🎬 Encoding:\n\n"
segments = []
# STEP 1: Main
main_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
success, error, timing = run_ffmpeg_cmd([
"ffmpeg", "-hwaccel", "auto", "-stream_loop", "-1", "-i", str(video_path),
"-t", f"{fade_end:.2f}",
"-vf", f"fps={video_fps:.2f},scale={video_width}:{video_height},fade=t=out:st={fade_start:.2f}:d={fade_duration:.2f}:c={VIDEO_CONFIG['fade_color_hex']}",
*common_flags, "-an", "-y", str(main_path)
], ffmpeg_env, "1/4: Main", start_time)
if not success: return None, error
status += f"{timing}\n"
segments.append(main_path)
# STEP 2: Solid
if solid_duration > 0:
solid_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
success, error, timing = run_ffmpeg_cmd([
"ffmpeg", "-f", "lavfi",
"-i", f"color=c={VIDEO_CONFIG['fade_color_hex']}:s={video_width}x{video_height}:d={solid_duration:.2f}:r={video_fps:.2f}",
*common_flags, "-y", str(solid_path)
], ffmpeg_env, "2/4: Solid", start_time)
if not success: return None, error
status += f"{timing}\n"
segments.append(solid_path)
# STEP 3: Cover
cover_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
success, error, timing = run_ffmpeg_cmd([
"ffmpeg", "-hwaccel", "auto", "-loop", "1", "-i", str(book_cover_path),
"-t", f"{promo_duration:.2f}",
"-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps:.2f},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={VIDEO_CONFIG['fade_color_hex']}",
*common_flags, "-an", "-y", str(cover_path)
], ffmpeg_env, "3/4: Cover", start_time)
if not success: return None, error
status += f"{timing}\n"
segments.append(cover_path)
# STEP 4: Final
concat_list = os.path.join(temp_dir, f"concat_{timestamp}.txt")
with open(concat_list, 'w') as f:
f.write('\n'.join(f"file '{str(s)}'" for s in segments))
if has_reddit:
filter_complex = (
f"[0:v]ass={subtitle_escaped}[bg];"
f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start:.2f},{first_end:.2f})'[v]"
)
cmd = [
"ffmpeg", "-hwaccel", "auto", "-f", "concat", "-safe", "0", "-i", str(concat_list),
"-loop", "1", "-i", str(reddit_card_path), "-i", str(audio_path),
"-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
*common_flags, "-c:a", "aac", "-shortest", "-y", str(output_path)
]
else:
cmd = [
"ffmpeg", "-hwaccel", "auto", "-f", "concat", "-safe", "0", "-i", str(concat_list),
"-i", str(audio_path),
"-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
*common_flags, "-c:a", "aac", "-shortest", "-y", str(output_path)
]
success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "4/4: Final", start_time)
if not success: return None, error
status += f"{timing}\n"
else:
# Simple loop
status += "🎬 Encoding:\n\n"
if has_reddit:
filter_complex = (
f"[0:v]ass={subtitle_escaped}[bg];"
f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start:.2f},{first_end:.2f})'[v]"
)
cmd = [
"ffmpeg", "-hwaccel", "auto", "-stream_loop", "-1", "-i", str(video_path),
"-loop", "1", "-i", str(reddit_card_path), "-i", str(audio_path),
"-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
*common_flags, "-c:a", "aac", "-shortest", "-y", str(output_path)
]
else:
cmd = [
"ffmpeg", "-hwaccel", "auto", "-stream_loop", "-1", "-i", str(video_path),
"-i", str(audio_path),
"-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
*common_flags, "-c:a", "aac", "-shortest", "-y", str(output_path)
]
success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "Encoding", start_time)
if not success: return None, error
status += f"{timing}\n"
# Success - ensure string output
total_time = time.time() - start_time
if os.path.exists(output_path):
size_mb = os.path.getsize(output_path) / (1024 * 1024)
success_msg = f"""βœ… COMPLETE!
πŸ“Š {size_mb:.1f}MB | {audio_duration:.1f}s
⏱️ TOTAL: {format_elapsed_time(total_time)} ({total_time:.1f}s)
⚑ Preset: {VIDEO_CONFIG['encoding_preset']}
──────────────────────────
{status}"""
return str(output_path), str(success_msg)
return None, "❌ Output not created"
except Exception as e:
total_time = time.time() - start_time
# Clean error message - no binary data
return None, f"❌ Error after {format_elapsed_time(total_time)}: {str(e)}"
# Gradio UI
with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
gr.Markdown(f"""
# 🎬 Video Stitcher ⚑ OPTIMIZED
**Config:** Reddit={REDDIT_CONFIG['font_file']} | Subtitle={SUBTITLE_CONFIG['font_name']}
**Performance:** Hardware accel + {VIDEO_CONFIG['encoding_preset']} preset
""")
with gr.Row():
with gr.Column():
with gr.Group():
gr.Markdown("**πŸ“Ή Video**")
video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
video_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
with gr.Group():
gr.Markdown("**🎡 Audio**")
audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac"], type="filepath")
audio_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
with gr.Group():
gr.Markdown("**πŸ“ Subtitle**")
subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
subtitle_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
with gr.Group():
gr.Markdown("**πŸ“š Book Cover (Optional - ONE method)**")
book_cover_input = gr.File(label="Upload", file_types=[".png", ".jpg", ".jpeg"], type="filepath")
gr.Markdown("**OR**")
book_cover_url_input = gr.Textbox(label="Image URL", placeholder="https://...")
gr.Markdown("**OR**")
book_cover_base64_input = gr.Textbox(
label="Base64 Image",
placeholder="data:image/png;base64,iVBOR... or raw base64",
lines=2
)
gr.Markdown("**OR**")
book_id_input = gr.Textbox(label="Google Books ID", placeholder="wyaEDwAAQBAJ")
with gr.Row():
enable_highlight = gr.Checkbox(label="Highlight", value=True)
highlight_color = gr.Dropdown(
choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'],
value='yellow', label="Color"
)
with gr.Row():
font_size = gr.Slider(12, 32, 12, step=2, label="Font Size")
crf_input = gr.Slider(18, 28, 23, step=1, label="Quality")
stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
with gr.Column():
status_output = gr.Textbox(label="Status", lines=12)
video_output = gr.Video(label="Result")
gr.Markdown("""
### πŸ“š Book Cover - 4 Methods:
1. **Upload** - Image file
2. **URL** - Direct link
3. **Base64** - Encoded image data
4. **Book ID** - Google Books
⚠️ **Use only ONE!**
### ⚑ Optimizations:
- Hardware acceleration
- Faster preset (30-50% speedup)
- Real-time timing
""")
stitch_btn.click(
fn=stitch_media,
inputs=[video_input, video_url_input, audio_input, audio_url_input,
subtitle_input, subtitle_url_input,
book_cover_input, book_cover_url_input, book_cover_base64_input, book_id_input,
enable_highlight, highlight_color, font_size, crf_input],
outputs=[video_output, status_output]
)
if __name__ == "__main__":
app.launch(show_error=True)