Final_Assignment_AWorld

Sleeping

App Files Files Community

Final_Assignment_AWorld / mcp_servers /audio_server.py

Duibonduil

Upload 17 files

3e11f9b verified about 1 month ago

raw

history blame contribute delete

4.45 kB

	import base64
	import json
	import os
	import traceback
	from typing import List

	from mcp.server.fastmcp import FastMCP
	from openai import OpenAI
	from pydantic import Field

	from aworld.logs.util import logger
	from mcp_servers.utils import get_file_from_source

	# Initialize MCP server
	mcp = FastMCP("audio-server")


	client = OpenAI(
	api_key=os.getenv("AUDIO_LLM_API_KEY"), base_url=os.getenv("AUDIO_LLM_BASE_URL")
	)

	AUDIO_TRANSCRIBE = (
	"Input is a base64 encoded audio. Transcribe the audio content. "
	"Return a json string with the following format: "
	'{"audio_text": "transcribed text from audio"}'
	)


	def encode_audio(audio_source: str, with_header: bool = True) -> str:
	"""
	Encode audio to base64 format with robust file handling

	Args:
	audio_source: URL or local file path of the audio
	with_header: Whether to include MIME type header

	Returns:
	str: Base64 encoded audio string, with MIME type prefix if with_header is True

	Raises:
	ValueError: When audio source is invalid or audio format is not supported
	IOError: When audio file cannot be read
	"""
	if not audio_source:
	raise ValueError("Audio source cannot be empty")

	try:
	# Get file with validation (only audio files allowed)
	file_path, mime_type, content = get_file_from_source(
	audio_source,
	allowed_mime_prefixes=["audio/"],
	max_size_mb=50.0, # 50MB limit for audio files
	type="audio", # Specify type as audio to handle audio files
	)

	# Encode to base64
	audio_base64 = base64.b64encode(content).decode()

	# Format with header if requested
	final_audio = (
	f"data:{mime_type};base64,{audio_base64}" if with_header else audio_base64
	)

	# Clean up temporary file if it was created for a URL
	if file_path != os.path.abspath(audio_source) and os.path.exists(file_path):
	os.unlink(file_path)

	return final_audio

	except Exception:
	logger.error(
	f"Error encoding audio from {audio_source}: {traceback.format_exc()}"
	)
	raise


	@mcp.tool(description="Transcribe the given audio in a list of filepaths or urls.")
	async def mcp_transcribe_audio(
	audio_urls: List[str] = Field(
	description="The input audio in given a list of filepaths or urls."
	),
	) -> str:
	"""
	Transcribe the given audio in a list of filepaths or urls.

	Args:
	audio_urls: List of audio file paths or URLs

	Returns:
	str: JSON string containing transcriptions
	"""
	transcriptions = []
	for audio_url in audio_urls:
	try:
	# Get file with validation (only audio files allowed)
	file_path, _, _ = get_file_from_source(
	audio_url,
	allowed_mime_prefixes=["audio/"],
	max_size_mb=50.0, # 50MB limit for audio files
	type="audio", # Specify type as audio to handle audio files
	)

	# Use the file for transcription
	with open(file_path, "rb") as audio_file:
	transcription = client.audio.transcriptions.create(
	file=audio_file,
	model=os.getenv("AUDIO_LLM_MODEL_NAME"),
	response_format="text",
	)
	transcriptions.append(transcription)

	# Clean up temporary file if it was created for a URL
	if file_path != os.path.abspath(audio_url) and os.path.exists(file_path):
	os.unlink(file_path)

	except Exception as e:
	logger.error(f"Error transcribing {audio_url}: {traceback.format_exc()}")
	transcriptions.append(f"Error: {str(e)}")

	logger.info(f"---get_text_by_transcribe-transcription:{transcriptions}")
	return json.dumps(transcriptions, ensure_ascii=False)


	def main():
	from dotenv import load_dotenv
	load_dotenv()

	print("Starting Audio MCP Server...", file=sys.stderr)
	mcp.run(transport="stdio")


	# Make the module callable
	def __call__():
	"""
	Make the module callable for uvx.
	This function is called when the module is executed directly.
	"""
	main()


	# Add this for compatibility with uvx
	import sys

	sys.modules[__name__].__call__ = __call__

	# Run the server when the script is executed directly
	if __name__ == "__main__":
	main()