|
|
import spaces |
|
|
import logging |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
import gradio as gr |
|
|
import torch |
|
|
import torchaudio |
|
|
import os |
|
|
import requests |
|
|
from transformers import pipeline |
|
|
import tempfile |
|
|
import numpy as np |
|
|
from einops import rearrange |
|
|
import cv2 |
|
|
from scipy.io import wavfile |
|
|
import librosa |
|
|
import json |
|
|
from typing import Optional, Tuple, List |
|
|
import atexit |
|
|
|
|
|
|
|
|
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1" |
|
|
|
|
|
try: |
|
|
import mmaudio |
|
|
except ImportError: |
|
|
os.system("pip install -e .") |
|
|
import mmaudio |
|
|
|
|
|
from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video, |
|
|
setup_eval_logging) |
|
|
from mmaudio.model.flow_matching import FlowMatching |
|
|
from mmaudio.model.networks import MMAudio, get_my_mmaudio |
|
|
from mmaudio.model.sequence_config import SequenceConfig |
|
|
from mmaudio.model.utils.features_utils import FeaturesUtils |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
|
|
) |
|
|
log = logging.getLogger() |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
device = torch.device("cuda") |
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
|
torch.backends.cudnn.allow_tf32 = True |
|
|
torch.backends.cudnn.benchmark = True |
|
|
else: |
|
|
device = torch.device("cpu") |
|
|
|
|
|
dtype = torch.bfloat16 |
|
|
|
|
|
|
|
|
model: ModelConfig = all_model_cfg['large_44k_v2'] |
|
|
model.download_if_needed() |
|
|
output_dir = Path('./output/gradio') |
|
|
|
|
|
setup_eval_logging() |
|
|
|
|
|
|
|
|
try: |
|
|
translator = pipeline("translation", |
|
|
model="Helsinki-NLP/opus-mt-ko-en", |
|
|
device="cpu", |
|
|
use_fast=True, |
|
|
trust_remote_code=False) |
|
|
except Exception as e: |
|
|
log.warning(f"Failed to load translation model with safetensors: {e}") |
|
|
try: |
|
|
translator = pipeline("translation", |
|
|
model="Helsinki-NLP/opus-mt-ko-en", |
|
|
device="cpu") |
|
|
except Exception as e2: |
|
|
log.error(f"Failed to load translation model: {e2}") |
|
|
translator = None |
|
|
|
|
|
PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17" |
|
|
|
|
|
def cleanup_temp_files(): |
|
|
temp_dir = tempfile.gettempdir() |
|
|
for file in os.listdir(temp_dir): |
|
|
if file.endswith(('.mp4', '.flac')): |
|
|
try: |
|
|
os.remove(os.path.join(temp_dir, file)) |
|
|
except: |
|
|
pass |
|
|
|
|
|
atexit.register(cleanup_temp_files) |
|
|
|
|
|
def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]: |
|
|
with torch.cuda.device(device): |
|
|
seq_cfg = model.seq_cfg |
|
|
net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval() |
|
|
net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True)) |
|
|
log.info(f'Loaded weights from {model.model_path}') |
|
|
|
|
|
feature_utils = FeaturesUtils( |
|
|
tod_vae_ckpt=model.vae_path, |
|
|
synchformer_ckpt=model.synchformer_ckpt, |
|
|
enable_conditions=True, |
|
|
mode=model.mode, |
|
|
bigvgan_vocoder_ckpt=model.bigvgan_16k_path, |
|
|
need_vae_encoder=False |
|
|
).to(device, dtype).eval() |
|
|
|
|
|
return net, feature_utils, seq_cfg |
|
|
|
|
|
net, feature_utils, seq_cfg = get_model() |
|
|
|
|
|
def translate_prompt(text): |
|
|
try: |
|
|
if translator is None: |
|
|
return text |
|
|
|
|
|
if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text): |
|
|
with torch.no_grad(): |
|
|
translation = translator(text)[0]['translation_text'] |
|
|
return translation |
|
|
return text |
|
|
except Exception as e: |
|
|
logging.error(f"Translation error: {e}") |
|
|
return text |
|
|
|
|
|
@torch.no_grad() |
|
|
def search_videos(query): |
|
|
try: |
|
|
query = translate_prompt(query) |
|
|
return search_pixabay_videos(query, PIXABAY_API_KEY) |
|
|
except Exception as e: |
|
|
logging.error(f"Video search error: {e}") |
|
|
return [] |
|
|
|
|
|
def search_pixabay_videos(query, api_key): |
|
|
try: |
|
|
base_url = "https://pixabay.com/api/videos/" |
|
|
params = { |
|
|
"key": api_key, |
|
|
"q": query, |
|
|
"per_page": 40 |
|
|
} |
|
|
|
|
|
response = requests.get(base_url, params=params) |
|
|
if response.status_code == 200: |
|
|
data = response.json() |
|
|
return [video['videos']['large']['url'] for video in data.get('hits', [])] |
|
|
return [] |
|
|
except Exception as e: |
|
|
logging.error(f"Pixabay API error: {e}") |
|
|
return [] |
|
|
|
|
|
@spaces.GPU |
|
|
@torch.inference_mode() |
|
|
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int, |
|
|
cfg_strength: float, duration: float): |
|
|
prompt = translate_prompt(prompt) |
|
|
negative_prompt = translate_prompt(negative_prompt) |
|
|
|
|
|
rng = torch.Generator(device=device) |
|
|
rng.manual_seed(seed) |
|
|
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
|
|
|
clip_frames, sync_frames, duration = load_video(video, duration) |
|
|
clip_frames = clip_frames.unsqueeze(0) |
|
|
sync_frames = sync_frames.unsqueeze(0) |
|
|
seq_cfg.duration = duration |
|
|
net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len) |
|
|
|
|
|
audios = generate(clip_frames, |
|
|
sync_frames, [prompt], |
|
|
negative_text=[negative_prompt], |
|
|
feature_utils=feature_utils, |
|
|
net=net, |
|
|
fm=fm, |
|
|
rng=rng, |
|
|
cfg_strength=cfg_strength) |
|
|
audio = audios.float().cpu()[0] |
|
|
|
|
|
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name |
|
|
make_video(video, |
|
|
video_save_path, |
|
|
audio, |
|
|
sampling_rate=seq_cfg.sampling_rate, |
|
|
duration_sec=seq_cfg.duration) |
|
|
|
|
|
|
|
|
info_log = f"""β
VIDEO TO AUDIO COMPLETE! |
|
|
{'=' * 50} |
|
|
π¬ Video Info: |
|
|
β’ Duration: {duration:.2f} seconds |
|
|
{'=' * 50} |
|
|
βοΈ Generation Settings: |
|
|
β’ Seed: {seed} |
|
|
β’ Steps: {num_steps} |
|
|
β’ CFG Strength: {cfg_strength} |
|
|
{'=' * 50} |
|
|
π Prompts: |
|
|
β’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''} |
|
|
β’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''} |
|
|
{'=' * 50} |
|
|
πΎ Video with audio ready!""" |
|
|
|
|
|
return video_save_path, info_log |
|
|
|
|
|
@spaces.GPU |
|
|
@torch.inference_mode() |
|
|
def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float, |
|
|
duration: float): |
|
|
prompt = translate_prompt(prompt) |
|
|
negative_prompt = translate_prompt(negative_prompt) |
|
|
|
|
|
rng = torch.Generator(device=device) |
|
|
rng.manual_seed(seed) |
|
|
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
|
|
|
clip_frames = sync_frames = None |
|
|
seq_cfg.duration = duration |
|
|
net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len) |
|
|
|
|
|
audios = generate(clip_frames, |
|
|
sync_frames, [prompt], |
|
|
negative_text=[negative_prompt], |
|
|
feature_utils=feature_utils, |
|
|
net=net, |
|
|
fm=fm, |
|
|
rng=rng, |
|
|
cfg_strength=cfg_strength) |
|
|
audio = audios.float().cpu()[0] |
|
|
|
|
|
audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name |
|
|
torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate) |
|
|
|
|
|
|
|
|
info_log = f"""β
TEXT TO AUDIO COMPLETE! |
|
|
{'=' * 50} |
|
|
π΅ Audio Info: |
|
|
β’ Duration: {duration:.2f} seconds |
|
|
β’ Sample Rate: {seq_cfg.sampling_rate} Hz |
|
|
{'=' * 50} |
|
|
βοΈ Generation Settings: |
|
|
β’ Seed: {seed} |
|
|
β’ Steps: {num_steps} |
|
|
β’ CFG Strength: {cfg_strength} |
|
|
{'=' * 50} |
|
|
π Prompts: |
|
|
β’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''} |
|
|
β’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''} |
|
|
{'=' * 50} |
|
|
πΎ Audio ready to download!""" |
|
|
|
|
|
return audio_save_path, info_log |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
/* ===== π¨ Google Fonts Import ===== */ |
|
|
@import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap'); |
|
|
|
|
|
/* ===== π¨ Comic Classic λ°°κ²½ - λΉν°μ§ νμ΄νΌ + λνΈ ν¨ν΄ ===== */ |
|
|
.gradio-container { |
|
|
background-color: #FEF9C3 !important; |
|
|
background-image: |
|
|
radial-gradient(#1F2937 1px, transparent 1px) !important; |
|
|
background-size: 20px 20px !important; |
|
|
min-height: 100vh !important; |
|
|
font-family: 'Comic Neue', cursive, sans-serif !important; |
|
|
} |
|
|
|
|
|
/* ===== νκΉ
νμ΄μ€ μλ¨ μμ μ¨κΉ ===== */ |
|
|
.huggingface-space-header, |
|
|
#space-header, |
|
|
.space-header, |
|
|
[class*="space-header"], |
|
|
.svelte-1ed2p3z, |
|
|
.space-header-badge, |
|
|
.header-badge, |
|
|
[data-testid="space-header"], |
|
|
.svelte-kqij2n, |
|
|
.svelte-1ax1toq, |
|
|
.embed-container > div:first-child { |
|
|
display: none !important; |
|
|
visibility: hidden !important; |
|
|
height: 0 !important; |
|
|
width: 0 !important; |
|
|
overflow: hidden !important; |
|
|
opacity: 0 !important; |
|
|
pointer-events: none !important; |
|
|
} |
|
|
|
|
|
/* ===== Footer μμ μ¨κΉ ===== */ |
|
|
footer, |
|
|
.footer, |
|
|
.gradio-container footer, |
|
|
.built-with, |
|
|
[class*="footer"], |
|
|
.gradio-footer, |
|
|
.main-footer, |
|
|
div[class*="footer"], |
|
|
.show-api, |
|
|
.built-with-gradio, |
|
|
a[href*="gradio.app"], |
|
|
a[href*="huggingface.co/spaces"] { |
|
|
display: none !important; |
|
|
visibility: hidden !important; |
|
|
height: 0 !important; |
|
|
padding: 0 !important; |
|
|
margin: 0 !important; |
|
|
} |
|
|
|
|
|
/* ===== λ©μΈ 컨ν
μ΄λ ===== */ |
|
|
#col-container { |
|
|
max-width: 1200px; |
|
|
margin: 0 auto; |
|
|
} |
|
|
|
|
|
/* ===== π¨ ν€λ νμ΄ν - μ½λ―Ή μ€νμΌ ===== */ |
|
|
.header-text h1 { |
|
|
font-family: 'Bangers', cursive !important; |
|
|
color: #1F2937 !important; |
|
|
font-size: 3.5rem !important; |
|
|
font-weight: 400 !important; |
|
|
text-align: center !important; |
|
|
margin-bottom: 0.5rem !important; |
|
|
text-shadow: |
|
|
4px 4px 0px #FACC15, |
|
|
6px 6px 0px #1F2937 !important; |
|
|
letter-spacing: 3px !important; |
|
|
-webkit-text-stroke: 2px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μλΈνμ΄ν ===== */ |
|
|
.subtitle { |
|
|
text-align: center !important; |
|
|
font-family: 'Comic Neue', cursive !important; |
|
|
font-size: 1.2rem !important; |
|
|
color: #1F2937 !important; |
|
|
margin-bottom: 1.5rem !important; |
|
|
font-weight: 700 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ ν μ€νμΌ ===== */ |
|
|
.tabs { |
|
|
background: #FFFFFF !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 12px !important; |
|
|
box-shadow: 6px 6px 0px #1F2937 !important; |
|
|
padding: 10px !important; |
|
|
} |
|
|
|
|
|
.tab-nav { |
|
|
background: #FACC15 !important; |
|
|
border-radius: 8px !important; |
|
|
padding: 5px !important; |
|
|
border: 2px solid #1F2937 !important; |
|
|
} |
|
|
|
|
|
.tab-nav button { |
|
|
font-family: 'Bangers', cursive !important; |
|
|
font-size: 1.1rem !important; |
|
|
letter-spacing: 1px !important; |
|
|
color: #1F2937 !important; |
|
|
background: transparent !important; |
|
|
border: none !important; |
|
|
padding: 10px 20px !important; |
|
|
border-radius: 6px !important; |
|
|
transition: all 0.2s ease !important; |
|
|
} |
|
|
|
|
|
.tab-nav button:hover { |
|
|
background: #FEF3C7 !important; |
|
|
} |
|
|
|
|
|
.tab-nav button.selected { |
|
|
background: #3B82F6 !important; |
|
|
color: #FFFFFF !important; |
|
|
box-shadow: 3px 3px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μΉ΄λ/ν¨λ - λ§ν νλ μ μ€νμΌ ===== */ |
|
|
.gr-panel, |
|
|
.gr-box, |
|
|
.gr-form, |
|
|
.block, |
|
|
.gr-group { |
|
|
background: #FFFFFF !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
box-shadow: 6px 6px 0px #1F2937 !important; |
|
|
transition: all 0.2s ease !important; |
|
|
} |
|
|
|
|
|
.gr-panel:hover, |
|
|
.block:hover { |
|
|
transform: translate(-2px, -2px) !important; |
|
|
box-shadow: 8px 8px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μ
λ ₯ νλ (Textbox) ===== */ |
|
|
textarea, |
|
|
input[type="text"], |
|
|
input[type="number"] { |
|
|
background: #FFFFFF !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
color: #1F2937 !important; |
|
|
font-family: 'Comic Neue', cursive !important; |
|
|
font-size: 1rem !important; |
|
|
font-weight: 700 !important; |
|
|
transition: all 0.2s ease !important; |
|
|
} |
|
|
|
|
|
textarea:focus, |
|
|
input[type="text"]:focus, |
|
|
input[type="number"]:focus { |
|
|
border-color: #3B82F6 !important; |
|
|
box-shadow: 4px 4px 0px #3B82F6 !important; |
|
|
outline: none !important; |
|
|
} |
|
|
|
|
|
textarea::placeholder { |
|
|
color: #9CA3AF !important; |
|
|
font-weight: 400 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ Primary λ²νΌ - μ½λ―Ή λΈλ£¨ ===== */ |
|
|
.gr-button-primary, |
|
|
button.primary, |
|
|
.gr-button.primary { |
|
|
background: #3B82F6 !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
color: #FFFFFF !important; |
|
|
font-family: 'Bangers', cursive !important; |
|
|
font-weight: 400 !important; |
|
|
font-size: 1.3rem !important; |
|
|
letter-spacing: 2px !important; |
|
|
padding: 14px 28px !important; |
|
|
box-shadow: 5px 5px 0px #1F2937 !important; |
|
|
transition: all 0.1s ease !important; |
|
|
text-shadow: 1px 1px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary:hover, |
|
|
button.primary:hover, |
|
|
.gr-button.primary:hover { |
|
|
background: #2563EB !important; |
|
|
transform: translate(-2px, -2px) !important; |
|
|
box-shadow: 7px 7px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary:active, |
|
|
button.primary:active, |
|
|
.gr-button.primary:active { |
|
|
transform: translate(3px, 3px) !important; |
|
|
box-shadow: 2px 2px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ Secondary λ²νΌ - μ½λ―Ή λ λ ===== */ |
|
|
.gr-button-secondary, |
|
|
button.secondary { |
|
|
background: #EF4444 !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
color: #FFFFFF !important; |
|
|
font-family: 'Bangers', cursive !important; |
|
|
font-weight: 400 !important; |
|
|
font-size: 1.1rem !important; |
|
|
letter-spacing: 1px !important; |
|
|
box-shadow: 4px 4px 0px #1F2937 !important; |
|
|
transition: all 0.1s ease !important; |
|
|
text-shadow: 1px 1px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
.gr-button-secondary:hover, |
|
|
button.secondary:hover { |
|
|
background: #DC2626 !important; |
|
|
transform: translate(-2px, -2px) !important; |
|
|
box-shadow: 6px 6px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ λ‘κ·Έ μΆλ ₯ μμ ===== */ |
|
|
.info-log textarea { |
|
|
background: #1F2937 !important; |
|
|
color: #10B981 !important; |
|
|
font-family: 'Courier New', monospace !important; |
|
|
font-size: 0.9rem !important; |
|
|
font-weight: 400 !important; |
|
|
border: 3px solid #10B981 !important; |
|
|
border-radius: 8px !important; |
|
|
box-shadow: 4px 4px 0px #10B981 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ λΉλμ€/μ€λμ€ μμ ===== */ |
|
|
.gr-video, |
|
|
.gr-audio, |
|
|
video, |
|
|
audio { |
|
|
border: 4px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
box-shadow: 8px 8px 0px #1F2937 !important; |
|
|
overflow: hidden !important; |
|
|
background: #FFFFFF !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ κ°€λ¬λ¦¬ μ€νμΌ ===== */ |
|
|
.gr-gallery { |
|
|
background: #FFFFFF !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
box-shadow: 6px 6px 0px #1F2937 !important; |
|
|
padding: 10px !important; |
|
|
} |
|
|
|
|
|
.gr-gallery .thumbnail-item { |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 6px !important; |
|
|
transition: all 0.2s ease !important; |
|
|
overflow: hidden !important; |
|
|
} |
|
|
|
|
|
.gr-gallery .thumbnail-item:hover { |
|
|
transform: scale(1.05) !important; |
|
|
box-shadow: 4px 4px 0px #3B82F6 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μ¬λΌμ΄λ μ€νμΌ ===== */ |
|
|
input[type="range"] { |
|
|
accent-color: #3B82F6 !important; |
|
|
} |
|
|
|
|
|
.gr-slider { |
|
|
background: #FFFFFF !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ λΌλ²¨ μ€νμΌ ===== */ |
|
|
label, |
|
|
.gr-input-label, |
|
|
.gr-block-label { |
|
|
color: #1F2937 !important; |
|
|
font-family: 'Comic Neue', cursive !important; |
|
|
font-weight: 700 !important; |
|
|
font-size: 1rem !important; |
|
|
} |
|
|
|
|
|
span.gr-label { |
|
|
color: #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μ 보 ν
μ€νΈ ===== */ |
|
|
.gr-info, |
|
|
.info { |
|
|
color: #6B7280 !important; |
|
|
font-family: 'Comic Neue', cursive !important; |
|
|
font-size: 0.9rem !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ Number Input μ€νμΌ ===== */ |
|
|
.gr-number input { |
|
|
background: #FFFFFF !important; |
|
|
border: 3px solid #1F2937 !important; |
|
|
border-radius: 8px !important; |
|
|
color: #1F2937 !important; |
|
|
font-family: 'Comic Neue', cursive !important; |
|
|
font-weight: 700 !important; |
|
|
box-shadow: 3px 3px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μ€ν¬λ‘€λ° - μ½λ―Ή μ€νμΌ ===== */ |
|
|
::-webkit-scrollbar { |
|
|
width: 12px; |
|
|
height: 12px; |
|
|
} |
|
|
|
|
|
::-webkit-scrollbar-track { |
|
|
background: #FEF9C3; |
|
|
border: 2px solid #1F2937; |
|
|
} |
|
|
|
|
|
::-webkit-scrollbar-thumb { |
|
|
background: #3B82F6; |
|
|
border: 2px solid #1F2937; |
|
|
border-radius: 0px; |
|
|
} |
|
|
|
|
|
::-webkit-scrollbar-thumb:hover { |
|
|
background: #EF4444; |
|
|
} |
|
|
|
|
|
/* ===== π¨ μ ν νμ΄λΌμ΄νΈ ===== */ |
|
|
::selection { |
|
|
background: #FACC15; |
|
|
color: #1F2937; |
|
|
} |
|
|
|
|
|
/* ===== π¨ λ§ν¬ μ€νμΌ ===== */ |
|
|
a { |
|
|
color: #3B82F6 !important; |
|
|
text-decoration: none !important; |
|
|
font-weight: 700 !important; |
|
|
} |
|
|
|
|
|
a:hover { |
|
|
color: #EF4444 !important; |
|
|
} |
|
|
|
|
|
/* ===== π¨ Row/Column κ°κ²© ===== */ |
|
|
.gr-row { |
|
|
gap: 1.5rem !important; |
|
|
} |
|
|
|
|
|
.gr-column { |
|
|
gap: 1rem !important; |
|
|
} |
|
|
|
|
|
/* ===== λ°μν μ‘°μ ===== */ |
|
|
@media (max-width: 768px) { |
|
|
.header-text h1 { |
|
|
font-size: 2.2rem !important; |
|
|
text-shadow: |
|
|
3px 3px 0px #FACC15, |
|
|
4px 4px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
.gr-button-primary, |
|
|
button.primary { |
|
|
padding: 12px 20px !important; |
|
|
font-size: 1.1rem !important; |
|
|
} |
|
|
|
|
|
.gr-panel, |
|
|
.block { |
|
|
box-shadow: 4px 4px 0px #1F2937 !important; |
|
|
} |
|
|
|
|
|
.tab-nav button { |
|
|
font-size: 0.9rem !important; |
|
|
padding: 8px 12px !important; |
|
|
} |
|
|
} |
|
|
|
|
|
/* ===== π¨ λ€ν¬λͺ¨λ λΉνμ±ν ===== */ |
|
|
@media (prefers-color-scheme: dark) { |
|
|
.gradio-container { |
|
|
background-color: #FEF9C3 !important; |
|
|
} |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(fill_height=True, css=css, title="MMAudio Studio") as demo: |
|
|
gr.LoginButton(value="Option: HuggingFace 'Login' for extra GPU quota +", size="sm") |
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; margin: 20px 0 10px 0;"> |
|
|
<a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;"> |
|
|
<img src="https://img.shields.io/static/v1?label=π HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME"> |
|
|
</a> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
# π΅ MMAUDIO STUDIO π¬ |
|
|
""", |
|
|
elem_classes="header-text" |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
<p class="subtitle">π Generate Audio from Text or Video β’ Korean Supported! νκΈμ§μ π°π·</p> |
|
|
""", |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("π Video Search"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
|
|
πΉ Search for videos from Pixabay to use as input! |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
search_query = gr.Textbox( |
|
|
label="π Search Query (νκΈμ§μ)" if translator else "π Search Query", |
|
|
placeholder="Enter search keywords...", |
|
|
lines=1 |
|
|
) |
|
|
search_btn = gr.Button( |
|
|
"π SEARCH VIDEOS!", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
search_gallery = gr.Gallery( |
|
|
label="πΊ Search Results", |
|
|
columns=4, |
|
|
rows=5, |
|
|
height=500 |
|
|
) |
|
|
|
|
|
search_btn.click( |
|
|
fn=search_videos, |
|
|
inputs=[search_query], |
|
|
outputs=[search_gallery] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("π¬ Video-to-Audio"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
|
|
π₯ Upload a video and generate matching audio! |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(equal_height=False): |
|
|
with gr.Column(scale=1): |
|
|
v2a_video = gr.Video(label="πΉ Input Video") |
|
|
v2a_prompt = gr.Textbox( |
|
|
label="βοΈ Prompt (νκΈμ§μ)" if translator else "βοΈ Prompt", |
|
|
placeholder="Describe the audio you want...", |
|
|
lines=2 |
|
|
) |
|
|
v2a_negative = gr.Textbox( |
|
|
label="π« Negative Prompt", |
|
|
value="music", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
v2a_seed = gr.Number(label="π² Seed", value=0) |
|
|
v2a_steps = gr.Number(label="π Steps", value=25) |
|
|
|
|
|
with gr.Row(): |
|
|
v2a_cfg = gr.Number(label="π― Guidance Scale", value=4.5) |
|
|
v2a_duration = gr.Number(label="β±οΈ Duration (sec)", value=8) |
|
|
|
|
|
v2a_btn = gr.Button( |
|
|
"π¬ GENERATE AUDIO! π", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
with gr.Accordion("π Generation Log", open=True): |
|
|
v2a_log = gr.Textbox( |
|
|
label="", |
|
|
placeholder="Upload video and click generate...", |
|
|
lines=12, |
|
|
interactive=False, |
|
|
elem_classes="info-log" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
v2a_output = gr.Video(label="π₯ Generated Result", height=400) |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;"> |
|
|
π‘ Right-click on the video to save! |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
v2a_btn.click( |
|
|
fn=video_to_audio, |
|
|
inputs=[v2a_video, v2a_prompt, v2a_negative, v2a_seed, v2a_steps, v2a_cfg, v2a_duration], |
|
|
outputs=[v2a_output, v2a_log] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("π΅ Text-to-Audio"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
|
|
β¨ Generate audio from text description! |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(equal_height=False): |
|
|
with gr.Column(scale=1): |
|
|
t2a_prompt = gr.Textbox( |
|
|
label="βοΈ Prompt (νκΈμ§μ)" if translator else "βοΈ Prompt", |
|
|
placeholder="Describe the audio you want to generate...", |
|
|
lines=3 |
|
|
) |
|
|
t2a_negative = gr.Textbox( |
|
|
label="π« Negative Prompt", |
|
|
placeholder="What to avoid...", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
t2a_seed = gr.Number(label="π² Seed", value=0) |
|
|
t2a_steps = gr.Number(label="π Steps", value=25) |
|
|
|
|
|
with gr.Row(): |
|
|
t2a_cfg = gr.Number(label="π― Guidance Scale", value=4.5) |
|
|
t2a_duration = gr.Number(label="β±οΈ Duration (sec)", value=8) |
|
|
|
|
|
t2a_btn = gr.Button( |
|
|
"π΅ GENERATE AUDIO! β¨", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
with gr.Accordion("π Generation Log", open=True): |
|
|
t2a_log = gr.Textbox( |
|
|
label="", |
|
|
placeholder="Enter prompt and click generate...", |
|
|
lines=12, |
|
|
interactive=False, |
|
|
elem_classes="info-log" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
t2a_output = gr.Audio(label="π Generated Audio") |
|
|
gr.Markdown( |
|
|
""" |
|
|
<p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;"> |
|
|
π‘ Click the download button to save! |
|
|
</p> |
|
|
""" |
|
|
) |
|
|
|
|
|
t2a_btn.click( |
|
|
fn=text_to_audio, |
|
|
inputs=[t2a_prompt, t2a_negative, t2a_seed, t2a_steps, t2a_cfg, t2a_duration], |
|
|
outputs=[t2a_output, t2a_log] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if translator is None: |
|
|
log.warning("Translation model failed to load. Korean translation will be disabled.") |
|
|
|
|
|
demo.launch(allowed_paths=[output_dir]) |