video-ffmpeg

Sleeping

App Files Files Community

video-ffmpeg / app.py

Tim13ekd

Update app.py

b6a8e09 verified 7 days ago

raw

history blame

7.39 kB

	import gradio as gr
	import tempfile
	from pathlib import Path
	import uuid
	import subprocess
	import requests
	import base64
	import math
	import shutil
	import io # Für NamedString-Handling
	import shlex # Für sicheres Escapen von Text

	# Erlaubte Dateiformate
	allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
	allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]

	API_URL = "https://text.pollinations.ai/openai"

	def save_temp_audio(audio_file):
	"""
	Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
	"""
	if isinstance(audio_file, str):
	ext = Path(audio_file).suffix
	if ext.lower() not in allowed_audios:
	ext = ".mp3" # Standard, falls Endung fehlt
	temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
	with open(temp_audio, "wb") as f:
	f.write(audio_file.encode())
	return temp_audio
	elif hasattr(audio_file, 'name'):
	ext = Path(audio_file.name).suffix
	if ext.lower() not in allowed_audios:
	ext = ".mp3"
	temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
	audio_file.seek(0)
	with open(temp_audio, "wb") as f:
	shutil.copyfileobj(audio_file, f)
	return temp_audio
	else:
	raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")

	def convert_to_wav(audio_path):
	wav_path = Path(audio_path).with_suffix(".wav")
	cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
	subprocess.run(cmd, check=True, capture_output=True, text=True)
	return wav_path

	def transcribe_audio(audio_file):
	temp_audio = save_temp_audio(audio_file)
	wav_file = convert_to_wav(temp_audio)
	with open(wav_file, "rb") as f:
	audio_data = base64.b64encode(f.read()).decode()
	payload = {
	"model": "openai-audio",
	"messages": [{
	"role": "user",
	"content": [
	{"type": "text", "text": "Transcribe this audio:"},
	{"type": "input_audio", "input_audio": {"data": audio_data, "format": "wav"}}
	]
	}]
	}
	try:
	response = requests.post(API_URL, json=payload)
	response.raise_for_status()
	except requests.RequestException as e:
	return None, f"❌ API Fehler: {e}"

	result = response.json()
	text = result['choices'][0]['message']['content']
	return text

	def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
	if not images:
	return None, "❌ Keine Bilder ausgewählt"

	y_pos = min(max(0.0, y_pos), 0.9)
	temp_dir = tempfile.mkdtemp()
	clips = []

	if audio_file:
	transcript, err = transcribe_audio(audio_file)
	if err:
	return None, err
	words = transcript.split()
	total_words = len(words)
	segments_per_image = math.ceil(total_words / len(images))
	texts = []
	for i in range(len(images)):
	start = i * segments_per_image
	end = min((i + 1) * segments_per_image, total_words)
	texts.append(" ".join(words[start:end]))
	temp_audio_file = save_temp_audio(audio_file)
	else:
	texts = [""] * len(images)
	temp_audio_file = None

	for i, img_path in enumerate(images):
	img_path = Path(img_path.name) # Gradio liefert temporäre Dateipfade
	clip_path = Path(temp_dir) / f"clip_{i}.mp4"
	text = texts[i] if i < len(texts) else ""

	vf_filters = (
	"scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
	"pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
	"fps=25,format=yuv420p"
	)

	if text:
	safe_text = shlex.quote(text)
	drawtext_filter = (
	f",drawtext=text={safe_text}:fontcolor=white:fontsize={font_size}:borderw=2:"
	f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
	f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
	)
	vf_filters += drawtext_filter

	cmd = [
	"ffmpeg",
	"-y",
	"-loop", "1",
	"-i", str(img_path),
	"-t", str(duration_per_image),
	"-vf", vf_filters,
	str(clip_path)
	]
	try:
	subprocess.run(cmd, check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"

	clips.append(clip_path)

	# Clips zusammenfügen
	filelist_path = Path(temp_dir) / "filelist.txt"
	with open(filelist_path, "w") as f:
	for clip in clips:
	f.write(f"file '{clip}'\n")

	output_file = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
	cmd_concat = [
	"ffmpeg",
	"-y",
	"-f", "concat",
	"-safe", "0",
	"-i", str(filelist_path),
	"-c:v", "libx264",
	"-pix_fmt", "yuv420p",
	str(output_file)
	]
	try:
	subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"

	if temp_audio_file:
	final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
	cmd_audio = [
	"ffmpeg",
	"-y",
	"-i", str(output_file),
	"-i", str(temp_audio_file),
	"-c:v", "copy",
	"-c:a", "aac",
	"-shortest",
	str(final_output)
	]
	try:
	subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
	return str(final_output), "✅ Slideshow mit Audio und automatischen Untertiteln erstellt"
	except subprocess.CalledProcessError as e:
	return None, f"❌ FFmpeg Audio Merge Fehler:\n{e.stderr}"

	return str(output_file), "✅ Slideshow erstellt (ohne Audio)"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# Slideshow mit Audio & automatischen Untertiteln")

	img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
	audio_input = gr.File(
	label="Audio hinzufügen (MP3, WAV, M4A, OGG ... optional)",
	file_types=allowed_audios
	)
	duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
	fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
	ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
	font_size_input = gr.Number(value=60, label="Textgröße (px)")
	speed_input = gr.Slider(minimum=0.1, maximum=3.0, value=1.0, label="Geschwindigkeit der Texteinblendung")

	out_video = gr.Video(interactive=False, label="Generiertes Video")
	status = gr.Textbox(interactive=False, label="Status")

	btn = gr.Button("Video erstellen")
	btn.click(
	fn=generate_slideshow_with_audio,
	inputs=[img_input, audio_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
	outputs=[out_video, status]
	)

	demo.launch()