video-ffmpeg / app.py
Tim13ekd's picture
Update app.py
b6a8e09 verified
raw
history blame
7.39 kB
import gradio as gr
import tempfile
from pathlib import Path
import uuid
import subprocess
import requests
import base64
import math
import shutil
import io # Für NamedString-Handling
import shlex # Für sicheres Escapen von Text
# Erlaubte Dateiformate
allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
API_URL = "https://text.pollinations.ai/openai"
def save_temp_audio(audio_file):
"""
Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
"""
if isinstance(audio_file, str):
ext = Path(audio_file).suffix
if ext.lower() not in allowed_audios:
ext = ".mp3" # Standard, falls Endung fehlt
temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
with open(temp_audio, "wb") as f:
f.write(audio_file.encode())
return temp_audio
elif hasattr(audio_file, 'name'):
ext = Path(audio_file.name).suffix
if ext.lower() not in allowed_audios:
ext = ".mp3"
temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
audio_file.seek(0)
with open(temp_audio, "wb") as f:
shutil.copyfileobj(audio_file, f)
return temp_audio
else:
raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
def convert_to_wav(audio_path):
wav_path = Path(audio_path).with_suffix(".wav")
cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
subprocess.run(cmd, check=True, capture_output=True, text=True)
return wav_path
def transcribe_audio(audio_file):
temp_audio = save_temp_audio(audio_file)
wav_file = convert_to_wav(temp_audio)
with open(wav_file, "rb") as f:
audio_data = base64.b64encode(f.read()).decode()
payload = {
"model": "openai-audio",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "Transcribe this audio:"},
{"type": "input_audio", "input_audio": {"data": audio_data, "format": "wav"}}
]
}]
}
try:
response = requests.post(API_URL, json=payload)
response.raise_for_status()
except requests.RequestException as e:
return None, f"❌ API Fehler: {e}"
result = response.json()
text = result['choices'][0]['message']['content']
return text
def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
if not images:
return None, "❌ Keine Bilder ausgewählt"
y_pos = min(max(0.0, y_pos), 0.9)
temp_dir = tempfile.mkdtemp()
clips = []
if audio_file:
transcript, err = transcribe_audio(audio_file)
if err:
return None, err
words = transcript.split()
total_words = len(words)
segments_per_image = math.ceil(total_words / len(images))
texts = []
for i in range(len(images)):
start = i * segments_per_image
end = min((i + 1) * segments_per_image, total_words)
texts.append(" ".join(words[start:end]))
temp_audio_file = save_temp_audio(audio_file)
else:
texts = [""] * len(images)
temp_audio_file = None
for i, img_path in enumerate(images):
img_path = Path(img_path.name) # Gradio liefert temporäre Dateipfade
clip_path = Path(temp_dir) / f"clip_{i}.mp4"
text = texts[i] if i < len(texts) else ""
vf_filters = (
"scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
"pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
"fps=25,format=yuv420p"
)
if text:
safe_text = shlex.quote(text)
drawtext_filter = (
f",drawtext=text={safe_text}:fontcolor=white:fontsize={font_size}:borderw=2:"
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
)
vf_filters += drawtext_filter
cmd = [
"ffmpeg",
"-y",
"-loop", "1",
"-i", str(img_path),
"-t", str(duration_per_image),
"-vf", vf_filters,
str(clip_path)
]
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
clips.append(clip_path)
# Clips zusammenfügen
filelist_path = Path(temp_dir) / "filelist.txt"
with open(filelist_path, "w") as f:
for clip in clips:
f.write(f"file '{clip}'\n")
output_file = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
cmd_concat = [
"ffmpeg",
"-y",
"-f", "concat",
"-safe", "0",
"-i", str(filelist_path),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
str(output_file)
]
try:
subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
if temp_audio_file:
final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
cmd_audio = [
"ffmpeg",
"-y",
"-i", str(output_file),
"-i", str(temp_audio_file),
"-c:v", "copy",
"-c:a", "aac",
"-shortest",
str(final_output)
]
try:
subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
return str(final_output), "✅ Slideshow mit Audio und automatischen Untertiteln erstellt"
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Audio Merge Fehler:\n{e.stderr}"
return str(output_file), "✅ Slideshow erstellt (ohne Audio)"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Slideshow mit Audio & automatischen Untertiteln")
img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
audio_input = gr.File(
label="Audio hinzufügen (MP3, WAV, M4A, OGG ... optional)",
file_types=allowed_audios
)
duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
font_size_input = gr.Number(value=60, label="Textgröße (px)")
speed_input = gr.Slider(minimum=0.1, maximum=3.0, value=1.0, label="Geschwindigkeit der Texteinblendung")
out_video = gr.Video(interactive=False, label="Generiertes Video")
status = gr.Textbox(interactive=False, label="Status")
btn = gr.Button("Video erstellen")
btn.click(
fn=generate_slideshow_with_audio,
inputs=[img_input, audio_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
outputs=[out_video, status]
)
demo.launch()