MSP / app.py
Ken Lin
delete model
4c5a4b0
raw
history blame
1.28 kB
import gradio as gr
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import numpy as np
title = "Musicalization System of Painting Demo"
description = "Pui Ching Middle School: Musicalization System of Painting Demo"
def generate_music(text):
tag2text_checkpoint = "./tag2text_swin_14m.pth"
tag2text_model = tag2text(pretrained=tag2text_checkpoint, image_size=image_size, vit='swin_b').eval().to(device)
res = inference_tag2text(image, tagging_model, specified_tags)
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
inputs = processor(
text=[text],
padding=True,
return_tensors="pt",
)
audio_values = model.generate(**inputs, max_new_tokens=256)
sampling_rate = model.audio_encoder.config.sampling_rate
target_dtype = np.int16
max_range = np.iinfo(target_dtype).max
audio_values = audio_values[0, 0].numpy()
return sampling_rate, (audio_values * max_range).astype(np.int16)
iface = gr.Interface(
fn=generate_music,
title=title,
description=description,
inputs=gr.Text(label="Content"),
outputs=gr.Audio(label='Generated Music'))
iface.launch()