File size: 1,643 Bytes
a158204
 
 
 
 
42de6a5
 
a158204
 
 
 
 
 
 
 
 
 
 
42de6a5
 
 
 
 
a32e57e
42de6a5
d3d41db
 
 
a158204
 
d3d41db
 
 
 
 
 
42de6a5
f075301
2b0ff08
42de6a5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import streamlit as st
import os
import requests
import io
from PIL import Image
from IPython.display import Audio, display
from freeGPT import Client

api_token = os.environ.get("API_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/facebook/musicgen-small"
API_URL_IMG = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
headers = {"Authorization": f"Bearer {api_token}"}

st.title("✨ Image2Music Generator")
st.write("Music generator using Facebook MusicGen, ChatGPT3 and Blip image captioning large.")
img_prompt = st.file_uploader("Upload Image", type=["jpeg", "jpg", "png"])
subm_btn = st.button("✨ Generate")

def musquery(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.content

def generate_audio(prompt):
    return musquery({"inputs": prompt})

if subm_btn and img_prompt is not None:
    def query(image_bytes):
        response = requests.post(API_URL_IMG, headers=headers, data=image_bytes)
        return response.json()

    image = Image.open(img_prompt)
    image_bytes = io.BytesIO()
    image.save(image_bytes, format=image.format)
    image_bytes = image_bytes.getvalue()

    output = query(image_bytes)
    prompt_gpt = output[0]['generated_text']
    prompt = Client.create_completion("gpt3", "Write a prompt a to generate music from this: " + prompt_gpt)
    st.image(image_bytes, caption=f'Generated prompt for input image - "{prompt}"')
    music = generate_audio(prompt)
    st.success('Music Generated Successfully!', icon="✅")
    st.audio(music, format="audio/wav")
    st.download_button("Download", music)