|
import speech_recognition as sr |
|
import os |
|
from pydub import AudioSegment |
|
from smolagents import tool |
|
|
|
|
|
@tool |
|
def transcribe_audio(mp3_path: str) -> str: |
|
""" |
|
Transcribes text from an MP3 audio file using speech recognition. |
|
|
|
Args: |
|
mp3_path (str): Path to the MP3 file to be transcribed. |
|
|
|
Returns: |
|
str: The transcribed text from the audio file. |
|
|
|
Raises: |
|
FileNotFoundError: If the MP3 file does not exist at the specified path. |
|
ValueError: If the file is not a valid MP3 file or audio cannot be processed. |
|
Exception: For other unexpected errors during transcription. |
|
|
|
Example: |
|
>>> text = transcribe_audio("sample.mp3") |
|
>>> print(text) |
|
"Hello, this is a sample audio." |
|
""" |
|
|
|
if not os.path.exists(mp3_path): |
|
raise FileNotFoundError(f"The file {mp3_path} does not exist.") |
|
|
|
|
|
recognizer = sr.Recognizer() |
|
|
|
try: |
|
|
|
audio = AudioSegment.from_mp3(mp3_path) |
|
wav_path = mp3_path.replace(".mp3", ".wav") |
|
audio.export(wav_path, format="wav") |
|
|
|
|
|
with sr.AudioFile(wav_path) as source: |
|
|
|
recognizer.adjust_for_ambient_noise(source) |
|
|
|
audio_data = recognizer.record(source) |
|
|
|
|
|
os.remove(wav_path) |
|
|
|
|
|
text = recognizer.recognize_google(audio_data) |
|
return text |
|
|
|
except sr.UnknownValueError: |
|
raise ValueError("Could not understand the audio.") |
|
except sr.RequestError as e: |
|
raise ValueError(f"Could not process audio; {e}") |
|
except Exception as e: |
|
raise Exception(f"An error occurred during transcription: {e}") |
|
|