nutri / app.py
salomonsky's picture
Create app.py
05d3cbf
raw
history blame
1.58 kB
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
os.environ["TOKENIZERS_PARALLELISM"] = "true"
generator = pipeline('text-generation', model="salomonsky/deepSP")
def generate_output(text):
prompt = ""
input_text = prompt + ""
gpt2_output = generator(input_text, max_length=20, do_sample=True, temperature=0.9)
generated_text = gpt2_output[0]['generated_text']
generated_text = generated_text.replace(input_text, "").strip()
tts = gTTS(generated_text, lang='es')
temp_audio_path = "temp_audio.mp3"
tts.save(temp_audio_path)
audio_path = "audio.wav"
audio = AudioSegment.from_mp3(temp_audio_path)
audio.export(audio_path, format="wav")
command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face face.jpg --audio audio.wav --outfile video.mp4 --nosmooth"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
return None
output_video_path = "video.mp4"
os.remove(temp_audio_path)
if os.path.isfile(output_video_path):
return output_video_path
return None
iface = gr.Interface(
fn=generate_output,
inputs=gr.inputs.Textbox(lines=1, placeholder='Escribe tu nombre para presentarte con Andrea...'),
outputs=[
gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)")
],
title="Andrea - Humanoid Chatbot IA 2023(c)",
)
iface.launch()