Spaces:
Running
Running
import gradio as gr | |
import torch | |
import subprocess | |
from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline | |
from transformers import MusicgenForConditionalGeneration, MusicgenProcessor, pipeline | |
import soundfile as sf | |
from PIL import Image | |
from torch import autocast | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Модель перевода русского текста на английский | |
translator = pipeline("translation_ru_to_en", "Helsinki-NLP/opus-mt-ru-en") | |
# Загрузка моделей | |
image_pipe = StableDiffusionPipeline.from_pretrained( | |
"CompVis/stable-diffusion-v1-4", | |
torch_dtype=torch.float16 if device == "cuda" else torch.float32 | |
).to(device) | |
video_pipe = StableVideoDiffusionPipeline.from_pretrained( | |
"stabilityai/stable-video-diffusion-img2vid", | |
torch_dtype=torch.float16 if device == "cuda" else torch.float32 | |
).to(device) | |
music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") | |
music_processor = MusicgenProcessor.from_pretrained("facebook/musicgen-small") | |
# Перевод промта | |
def translate_prompt(prompt): | |
translation = translator(prompt) | |
return translation[0]['translation_text'] | |
# Генерация изображения | |
def generate_image(prompt): | |
prompt_en = translate_prompt(prompt) | |
with autocast(device): | |
image = image_pipe(prompt_en, num_inference_steps=10).images[0] | |
image_path = "generated_image.png" | |
image.save(image_path) | |
return image_path | |
# Генерация видео | |
def generate_video(image_path): | |
image = Image.open(image_path) | |
with autocast(device): | |
video_frames = video_pipe(image, num_inference_steps=10, num_frames=16).frames | |
video_path = "generated_video.mp4" | |
video_pipe.save_video(video_frames, video_path, fps=8) | |
return video_path | |
# Генерация музыки | |
def generate_music(prompt): | |
prompt_en = translate_prompt(prompt) | |
inputs = music_processor(text=[prompt_en], return_tensors="pt") | |
audio_output = music_model.generate(**inputs, max_new_tokens=512) | |
audio_array = audio_output[0].cpu().numpy().squeeze() | |
audio_path = "generated_audio.wav" | |
sf.write(audio_path, audio_array, samplerate=16000) | |
return audio_path | |
# Объединение видео и музыки | |
def merge_video_audio(video_path, audio_path): | |
output_path = "final_video.mp4" | |
subprocess.run( | |
f"ffmpeg -y -i {video_path} -i {audio_path} -c:v copy -c:a aac {output_path}", | |
shell=True | |
) | |
return output_path | |
# Главная функция для генерации всех элементов отдельно | |
def create_all(prompt): | |
image_path = generate_image(prompt) | |
video_path = generate_video(image_path) | |
audio_path = generate_music(prompt) | |
final_video = merge_video_audio(video_path, audio_path) | |
return image_path, video_path, audio_path, final_video | |
# Интерфейс Gradio | |
with gr.Blocks() as demo: | |
gr.Markdown("# OnlyReels.AI 🎥🎵 - Генерация изображения, видео и музыки") | |
prompt_input = gr.Textbox(label="Описание (русский язык)", | |
placeholder="Пример: Киберпанк-город ночью, синтезаторная музыка") | |
btn_image = gr.Button("🖼️ Сгенерировать изображение") | |
btn_video = gr.Button("🎥 Сгенерировать видео") | |
btn_music = gr.Button("🎵 Сгенерировать музыку") | |
btn_merge = gr.Button("✨ Объединить видео и музыку") | |
image_output = gr.Image(label="Изображение") | |
video_output = gr.Video(label="Видео") | |
audio_output = gr.Audio(label="Музыка") | |
final_video_output = gr.Video(label="Итоговое видео с музыкой") | |
btn_image.click(fn=generate_image, inputs=prompt_input, outputs=image_output) | |
btn_video.click(fn=generate_video, inputs=image_output, outputs=video_output) | |
btn_music.click(fn=generate_music, inputs=prompt_input, outputs=audio_output) | |
btn_merge.click(fn=merge_video_audio, inputs=[video_output, audio_output], outputs=final_video_output) | |
#btn_generate = gr.Button("✨ Сгенерировать все") | |
#image_output = gr.Image(label="Изображение") | |
#video_output = gr.Video(label="Видео") | |
#audio_output = gr.Audio(label="Музыка") | |
#final_video_output = gr.Video(label="Итоговое видео с музыкой") | |
#btn_generate.click( | |
# fn=create_all, | |
# inputs=prompt_input, | |
# outputs=[image_output, video_output, audio_output, final_video_output] | |
#) | |
if __name__ == "__main__": | |
demo.launch() | |