File size: 1,014 Bytes
a37d72d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
from audioldm import text_to_audio, build_model

model_id="haoheliu/AudioLDM-S-Full"

audioldm = None
current_model_name = None

def text2audio(text, duration, guidance_scale, random_seed, n_candidates, model_name="audioldm-m-text-ft"):
    global audioldm, current_model_name

    if audioldm is None or model_name != current_model_name:
        audioldm=build_model(model_name=model_name)
        current_model_name = model_name

    # print(text, length, guidance_scale)
    waveform = text_to_audio(
        latent_diffusion=audioldm,
        text=text,
        seed=random_seed,
        duration=duration,
        guidance_scale=guidance_scale,
        n_candidate_gen_per_text=int(n_candidates),
    )  # [bs, 1, samples]
    waveform = [
        gr.make_waveform((16000, wave[0]), bg_image="bg.png") for wave in waveform
    ]
    # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
    if(len(waveform) == 1):
      waveform = waveform[0]
    return waveform