File size: 2,212 Bytes
0ea9241
842b913
 
 
0ea9241
842b913
 
 
 
 
f40c31f
0ea9241
842b913
 
 
 
 
0ea9241
 
f40c31f
842b913
0ea9241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import torch
import numpy as np
from transformers import MusicgenForConditionalGeneration, AutoProcessor
import scipy.io.wavfile

def generate_music(prompt, unconditional=False):
    model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    model.to(device)

    # Generate music
    if unconditional:
        unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
        audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
    else:
        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
        inputs = processor(text=prompt, padding=True, return_tensors="pt")
        audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)

    sampling_rate = model.config.audio_encoder.sampling_rate
    audio_file = "musicgen_out.wav"
    
    # Ensure audio_values is 1D and scale if necessary
    audio_data = audio_values[0].cpu().numpy()
    
    # Check if audio_data is in the correct format
    if audio_data.ndim > 1:
        audio_data = audio_data[0]  # Take the first channel if stereo

    # Scale audio data to 16-bit PCM format
    audio_data = np.clip(audio_data, -1.0, 1.0)  # Ensure values are in the range [-1, 1]
    audio_data = (audio_data * 32767).astype(np.int16)  # Scale to int16

    # Save the generated audio
    scipy.io.wavfile.write(audio_file, sampling_rate, audio_data)
    
    return audio_file

def interface(prompt, unconditional):
    audio_file = generate_music(prompt, unconditional)
    return audio_file

with gr.Blocks() as demo:
    gr.Markdown("# AI-Powered Music Generation")
    
    with gr.Row():
        prompt_input = gr.Textbox(label="Enter the Music Prompt")
        unconditional_checkbox = gr.Checkbox(label="Generate Unconditional Music")

    generate_button = gr.Button("Generate Music")
    output_audio = gr.Audio(label="Output Music")

    generate_button.click(
        interface,
        inputs=[prompt_input, unconditional_checkbox],
        outputs=output_audio,
        show_progress=True
    )

demo.launch(share=True)