Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
from gradio_client import Client | |
# ์ด๋ฏธ์ง ์ธ์ ํ์ดํ๋ผ์ธ ๋ก๋ | |
image_model = pipeline("image-classification", model="google/vit-base-patch16-224") | |
def generate_music(prompt): | |
""" # audioldm API ์ฌ์ฉํ์ฌ ์์ ์์ฑ API ํธ์ถ | |
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/") | |
result = client.predict( | |
"playing piano.", # str in 'Input text' Textbox component | |
"Low quality.", # str in 'Negative prompt' Textbox component | |
5, # int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component | |
5.5, # int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component | |
5, # int | float in 'Seed' Number component | |
3, # int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component | |
api_name="/text2audio" | |
) | |
print(result) | |
#audio_result = extract_audio(result) | |
return result""" | |
client = Client("https://haoheliu-audioldm-48k-text-to-hifiaudio-generation.hf.space/") | |
result = client.predict( | |
"Howdy!", # str in 'Input your text here. If the output is not good enough, switching to a different seed will help.' Textbox component | |
5, # int | float (numeric value between 5 and 15) | |
in 'Duration (seconds)' Slider component | |
0, # int | float (numeric value between 0 and 6) | |
in 'Guidance scale (Large => better quality and relavancy to text; Small => better diversity)' Slider component | |
5, # int | float in 'Change this value (any integer number) will lead to a different generation result.' Number component | |
1, # int | float (numeric value between 1 and 3) | |
in 'Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation' Slider component | |
api_name="/text2audio" | |
) | |
print(result) | |
def generate_voice(prompt): | |
# Tango API๋ฅผ ์ฌ์ฉํ์ฌ ์์ฑ ์์ฑ | |
client = Client("https://declare-lab-tango.hf.space/") | |
result = client.predict( | |
prompt, # ์ด๋ฏธ์ง ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ๋กฌํํธ๋ก ์ฌ์ฉ | |
100, # Steps | |
1, # Guidance Scale | |
api_name="/predict" # API ์๋ํฌ์ธํธ ๊ฒฝ๋ก | |
) | |
# Tango API ํธ์ถ ๊ฒฐ๊ณผ ์ฒ๋ฆฌ | |
# ์: result์์ ์์ฑ ํ์ผ URL ๋๋ ๋ฐ์ดํฐ ์ถ์ถ | |
return result | |
def classify_and_generate_voice(uploaded_image): | |
# ์ด๋ฏธ์ง ๋ถ๋ฅ | |
predictions = image_model(uploaded_image) | |
top_prediction = predictions[0]['label'] # ๊ฐ์ฅ ํ๋ฅ ์ด ๋์ ๋ถ๋ฅ ๊ฒฐ๊ณผ | |
# ์์ฑ ์์ฑ | |
voice_result = generate_voice("this is " + top_prediction) | |
# ์์ ์์ฑ | |
music_result = generate_music("The rnb beat of 85BPM drums." + top_prediction + ".") | |
# ๋ฐํ๋ ์์ฑ ๋ฐ ์์ ๊ฒฐ๊ณผ๋ฅผ Gradio ์ธํฐํ์ด์ค๋ก ์ ๋ฌ | |
# ์: voice_result['url'] ๋๋ voice_result['audio_data'] ๋ฑ | |
return top_prediction, voice_result, music_result | |
# Gradio ์ธํฐํ์ด์ค ์์ฑ | |
iface = gr.Interface( | |
fn=classify_and_generate_voice, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Label(), gr.Audio(), gr.Audio()], | |
title="msVision_3", | |
description="์ด๋ฏธ์ง๋ฅผ ์ ๋ก๋ํ๋ฉด, ์ฌ๋ฌผ์ ์ธ์ํ๊ณ ํด๋นํ๋ ์์ฑ ๋ฐ ์์ ์ ์์ฑํฉ๋๋ค.(recognizes object and generate Voice&Music)", | |
examples=["dog.jpg","cafe.jpg","seoul.png"] | |
) | |
# ์ธํฐํ์ด์ค ์คํ | |
iface.launch() | |