agent-course-final-assessment

Running

agent-course-final-assessment / tools /openai_speech_to_text_tool.py

Daniil Bogdanov

Release v5

a225ae4 18 days ago

928 Bytes

	import os

	import whisper
	from smolagents import Tool


	class OpenAISpeechToTextTool(Tool):
	"""
	Tool to convert speech to text using OpenAI's Whisper model.

	Args:
	audio_path (str): Path to the audio file.

	Returns:
	str: Transcribed text from the audio file.
	"""

	name = "transcribe_audio"
	description = "Transcribes audio to text and returns the text"
	inputs = {
	"audio_path": {"type": "string", "description": "Path to the audio file"},
	}
	output_type = "string"

	def forward(self, audio_path: str) -> str:
	try:
	model = whisper.load_model("small")

	if not os.path.exists(audio_path):
	return f"Error: Audio file not found at {audio_path}"

	result = model.transcribe(audio_path)
	return result["text"]
	except Exception as e:
	return f"Error transcribing audio: {str(e)}"