Spaces:
Runtime error
Runtime error
File size: 3,924 Bytes
162a61c 2ed5af9 162a61c 2ed5af9 162a61c 2ed5af9 162a61c 2ed5af9 162a61c 2ed5af9 73539b2 162a61c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import voiceClone as vc
import videoGenerate as vg
model_list = []
output_audio = None
image_error = None
# Function to create model from audio file
def create_voice_model(title, audio_file):
global model_list
new_model = vc.create_model(audio_file, title)
if new_model:
model_list.append((title, new_model)) # Store as (title, id)
return f"Voice model {title} created"
def update_dropdown_choices():
return gr.Dropdown(choices=[title for title, _ in model_list])
def on_model_select(selected_item):
return next((model_id for title, model_id in model_list if title == selected_item), None)
def generate_tts(model_id, text):
return vc.tts(model_id, text)
def create_talking_face(audio, image, aspect_ratio, resolution, text_prompt, seed):
output_path = vg.generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed)
return output_path
# Gradio UI components
with gr.Blocks() as app:
gr.Markdown(" # Deepfake Generator")
gr.Markdown(""" ## Voice Cloning """)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("Generate Voice Model")
audio_input = gr.Audio(sources=["upload", "microphone"], label="Record Voice Sample", type="filepath", interactive = True, info = "20-30 seconds of audio is recommended")
title_input = gr.Textbox(label="Model Title", placeholder="Enter model title")
output_textbox = gr.Label(label="Output", value = "")
generate_model_button = gr.Button("Generate Voice Model")
generate_model_button.click(create_voice_model, inputs=[title_input, audio_input], outputs=output_textbox)
with gr.Column(scale=1):
gr.Markdown("Generate Text to Speech (TTS)")
update_models = gr.Button("Update Models")
reference_id = gr.Textbox(label="Model ID", interactive=False, visible=False)
model_dropdown = gr.Dropdown(label="Select Model", choices=[], interactive=True, info = "Select a model to use for TTS")
model_dropdown.change(fn=on_model_select, inputs=model_dropdown, outputs= reference_id)
update_models.click(update_dropdown_choices, outputs=model_dropdown)
text_input = gr.Textbox(label="Text for TTS", placeholder="Enter text to synthesize", lines=3, max_length=300,
info="Text for generated speech. You can add special effects to enhance the result: (break), (long-break), (breath), (laugh), (cough), (lip-smacking), (sigh)")
tts_output = gr.Audio(label="TTS Output", type="filepath", interactive=False)
generate_tts_button = gr.Button("Generate TTS")
generate_tts_button.click(generate_tts, inputs=[reference_id, text_input], outputs=tts_output)
gr.Markdown(""" ## Avatar Creation""")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
resolution = gr.Radio(choices= ["540p", "720p"], value= "720p", label="Resolution")
text_prompt = gr.Textbox(label="(Optional) Text Prompt", placeholder="Enter text prompt to describe your avatar", lines=3, max_length = 150,info= "Influence the expressivity of generated avatar")
seed = gr.Slider(minimum=1, maximum=10000, value=None, label="(Optional) Seed for generation ")
with gr.Column(scale=1):
output_video = gr.Video(label="Talking Head")
generate_video_button = gr.Button("Generate Talking Face Avatar")
generate_video_button.click(create_talking_face, inputs=[tts_output, image_input, aspect_ratio, resolution, text_prompt, seed], outputs=output_video)
if __name__ == "__main__":
app.launch()
|