File size: 1,479 Bytes
6efc863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio

def infer(prompt):
    config = OmegaConf.load("configs/audiolcm.yaml")

    # print("-------quick debug no load ckpt---------")
    # model = instantiate_from_config(config['model'])# for quick debug
    model = load_model_from_config(config,
                                   "../logs/2024-04-21T14-50-11_text2music-audioset-nonoverlap/epoch=000184.ckpt")

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model = model.to(device)

    sampler = LCMSampler(model)

    os.makedirs("results/test", exist_ok=True)

    vocoder = VocoderBigVGAN("../vocoder/logs/bigvnat16k93.5w", device)

    generator = GenSamples(sampler, model, "results/test", vocoder, save_mel=False, save_wav=True,
                           original_inference_steps=config.model.params.num_ddim_timesteps)
    csv_dicts = []

    with torch.no_grad():
        with model.ema_scope():
            wav_name = f'{prompt.strip().replace(" ", "-")}'
            generator.gen_test_sample(prompt, wav_name=wav_name)

    print(f"Your samples are ready and waiting four you here: \nresults/test \nEnjoy.")


def my_inference_function(prompt_oir):
    prompt = {'ori_caption':prompt_oir,'struct_caption':prompt_oir}
    file_path = infer(prompt)
    return "test.wav"



gradio_interface = gradio.Interface(
    fn = my_inference_function,
    inputs = "text",
    outputs = "audio"
)
gradio_interface.launch()