from pyharp import ModelCard, build_endpoint, save_and_return_filepath from audiotools import AudioSignal from audioldm import build_model, save_wave, text_to_audio import gradio as gr audioldm = build_model(model_name="audioldm-m-full") def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s): waveform = text_to_audio( audioldm, 'placeholder', input_audio_path, seed = int(seed), duration = audio_length_in_s, guidance_scale = guidance_scale, n_candidate_gen_per_text = int(num_candidates), ddim_steps = int(num_inference_steps) ) save_wave(waveform, "./", name="output.wav") card = ModelCard( name='AudioLDM Variations', description='AudioLDM Variation Generator, operates on region selected in track.', author='Team Audio', tags=['AudioLDM', 'Variations', 'audio-to-audio'] ) with gr.Blocks() as webapp: # Define your Gradio interface inputs = [ gr.Audio( label="Audio Input", type="filepath" ), gr.Slider( label="seed", minimum="0", maximum="65535", value="43534", step="1" ), gr.Slider( minimum=0, maximum=10, step=0.1, value=2.5, label="Guidance Scale" ), gr.Slider( minimum=1, maximum=500, step=1, value=200, label="Inference Steps" ), gr.Slider( minimum=1, maximum=10, step=1, value=1, label="Candidates" ), gr.Slider( minimum=2.5, maximum=10.0, step=2.5, value=5, label="Duration" ), ] output = gr.Audio(label="Audio Output", type="filepath") ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card) # queue the webapp: https://www.gradio.app/guides/setting-up-a-demo-for-maximum-performance #webapp.queue() webapp.launch(share=True)