Music_LMMs / app_copy copy.py
fistyee
update
d0c2b7c
raw
history blame
6.86 kB
import gradio as gr
from PIL import Image
from io import BytesIO
import openai
import os
from dotenv import load_dotenv
from image_processor import ImageProcessor
from evaluation_processor import EvaluationProcessor
from zhipuai import ZhipuAI
# Load environment variables
load_dotenv()
# Initialize OpenAI client
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "gpt-4o-mini"
# Initialize image and evaluation processors
api_key = 'ddc85b14-bd83-4757-9bc4-8a11194da536'
image_processor = ImageProcessor(api_key)
evaluation_processor = EvaluationProcessor(api_key)
def process_input(text=None, images=None, audio=None, video=None):
print("Starting process_input")
system_prompt = (
"1.你是一个音乐专家,只能回答音乐知识..."
)
messages = [{"role": "system", "content": system_prompt}]
prompt = ""
if text:
print("Processing text input")
prompt += f"\nText input: {text}"
result_path = None
if images:
print("Processing images")
prompt += process_images(images)
if audio:
print("Processing audio")
result, title = process_audio(audio)
prompt += result
result_path = title.get('result_path', '')
if video:
print("Processing video")
result, title = process_video(video)
prompt += result
result_path = title.get('result_path', '')
#print("Prepared prompt for ZhipuAI:", prompt)
#print("Preparing to call get_zhipuai_response")
return get_zhipuai_response(messages, prompt), result_path
def process_images(images):
image_bytes_list = []
for image in images:
img = Image.open(image.name)
image_bytes = BytesIO()
img.save(image_bytes, format="PNG")
image_bytes.seek(0)
image_bytes_list.append(image_bytes.getvalue())
try:
processed_image_result = image_processor.process_images(image_bytes_list)
return f"\n乐谱的内容如下,请你根据曲子的曲风回答问题: {processed_image_result}"
except Exception as e:
return f"Error processing image: {e}"
def process_audio(audio):
audio_path = audio.name
try:
result, title = evaluation_processor.process_evaluation(audio_path, is_video=False)
prompt = (
f'''如果有曲名{title},请你根据这首歌的名字作者,并且'''
f'''1. 请你从
"eva_all":综合得分
"eva_completion":完整性
"eva_note":按键
"eva_stability":稳定性
"eva_tempo_sync":节奏
几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,只使用中文,曲子为 {result}'''
)
return prompt, title
except Exception as e:
return f"Error processing audio: {e}", None
def process_video(video):
video_path = video.name
try:
result, title = evaluation_processor.process_evaluation(video_path, is_video=True)
prompt = (
f'''如果有曲名{title},请你根据这首歌的名字作者,并且'''
f'''1.请你从
"eva_all":综合得分
"eva_completion":完整性
"eva_note":按键
"eva_stability":稳定性
"eva_tempo_sync":节奏
几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,只使用中文,曲子为 {result}'''
)
return prompt, title
except Exception as e:
return f"Error processing video: {e}", None
def get_gpt_response(messages, prompt):
messages.append({"role": "user", "content": prompt})
response_text = ""
# Use OpenAI API for streaming response
try:
for chunk in openai.ChatCompletion.create(
model=engine,
messages=messages,
temperature=0.2,
max_tokens=4096,
top_p=0.95,
frequency_penalty=0,
presence_penalty=0,
stream=True # Enable streaming
):
if 'content' in chunk['choices'][0]['delta']:
response_text += chunk['choices'][0]['delta']['content']
yield response_text # Yield response incrementally
except Exception as e:
yield f"Error: {e}"
def get_zhipuai_response_stream(messages, prompt):
print("Inside get_zhipuai_response")
client = ZhipuAI(api_key="423ca4c1f712621a4a1740bb6008673b.81aM7DNo2Ssn8FPA")
messages.append({"role": "user", "content": prompt})
response_text = ""
# Use ZhipuAI API for streaming response
try:
response = client.chat.completions.create(
model="glm-4-flash",
messages=messages,
stream=True # Enable streaming
)
print("Response received from ZhipuAI")
print(response)
for chunk in response:
print(f"Chunk received: {chunk}") # Log each chunk
response_text = chunk.choices[0].delta.content
print(response_text)
yield response_text # Yield response incrementally
except Exception as e:
print(f"Error in get_zhipuai_response_stream: {e}")
yield f"Error: {e}"
def get_zhipuai_response(messages, prompt):
print("Inside get_zhipuai_response") # Confirming entry into the function
client = ZhipuAI(api_key="423ca4c1f712621a4a1740bb6008673b.81aM7DNo2Ssn8FPA")
messages.append({"role": "user", "content": prompt})
print("Messages prepared:", messages) # Log messages
response_text = ""
# Non-streaming test
try:
print("Calling ZhipuAI API...") # Log before API call
response = client.chat.completions.create(
model="glm-4-flash",
messages=messages,
stream=False # Disable streaming for this test
)
print("Response received from ZhipuAI") # Log response retrieval
response_text = response.choices[0].message.content
return response_text # Return the entire response
except Exception as e:
print(f"Error in get_zhipuai_response: {e}") # More informative error message
return f"Error: {e}"
# Create Gradio interface
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型,您可以上传需要分析的曲谱,音频和视频", lines=2),
gr.File(label="Input Images", file_count="multiple", type="filepath"),
gr.File(label="Input Audio, mp3", type="filepath"),
gr.File(label="Input Video, mp4", type="filepath")
],
outputs=[
gr.Textbox(label="Output Text", interactive=True), # Enable streaming in the output
gr.HTML(label="Webpage")
],
live=False,
)
# Launch Gradio application
iface.launch()