Spaces:
Sleeping
Sleeping
from typing import Optional | |
from google import genai | |
from google.genai import types | |
import requests | |
import os | |
def analyze_audio(audio_url: str, analysis_prompt: Optional[str] = None) -> str: | |
""" | |
Transcribes audio files to text using Google Gemini model. Supports various audio formats including MPEG, MP3, WAV, M4A, etc. | |
Args: | |
audio_url (str): Url path to an audio file to anlyze | |
analysis_prompt (Optional[str]): Optional prompt for specific analysis focus | |
Returns: | |
str: Text containing analysis results | |
""" | |
try: | |
# Initialize Google Gen client | |
gemini_llm = genai.Client(api_key=os.getenv("GOOGLE_API_KEY")) | |
print(f"Analyzing audio from URL {audio_url}") | |
text=analysis_prompt or "Provide a detailed transcription of this audio." | |
audio_bytes = requests.get(audio_url).content | |
audio = types.Part.from_bytes(data=audio_bytes, mime_type="audio/mpeg") | |
# Get response from Gemini Flash 2.0 Vision | |
response = gemini_llm.models.generate_content( | |
model="gemini-2.0-flash", | |
contents=[text, audio], | |
) | |
print(response.text) | |
return response.text | |
except Exception as e: | |
return {"error": f"Error analyzing audio: {str(e)}"} |