agent_course_final / audio_analyzer.py
George Sergia
Add new tools. Fix bugs
b11304e
from typing import Optional
from google import genai
from google.genai import types
import requests
import os
def analyze_audio(audio_url: str, analysis_prompt: Optional[str] = None) -> str:
"""
Transcribes audio files to text using Google Gemini model. Supports various audio formats including MPEG, MP3, WAV, M4A, etc.
Args:
audio_url (str): Url path to an audio file to anlyze
analysis_prompt (Optional[str]): Optional prompt for specific analysis focus
Returns:
str: Text containing analysis results
"""
try:
# Initialize Google Gen client
gemini_llm = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
print(f"Analyzing audio from URL {audio_url}")
text=analysis_prompt or "Provide a detailed transcription of this audio."
audio_bytes = requests.get(audio_url).content
audio = types.Part.from_bytes(data=audio_bytes, mime_type="audio/mpeg")
# Get response from Gemini Flash 2.0 Vision
response = gemini_llm.models.generate_content(
model="gemini-2.0-flash",
contents=[text, audio],
)
print(response.text)
return response.text
except Exception as e:
return {"error": f"Error analyzing audio: {str(e)}"}