File size: 2,024 Bytes
e5c9daf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from src.utils.tooling import tool
from pydub import AudioSegment
import os

@tool
def transcribe_audio(file_path: str, language: str = None) -> str:
    """
    Transcribes the content of an audio file into text.
    Args:
        file_path (str): The path to the audio file to transcribe.
        language (str, optional): The language of the audio content. If None, the language will be detected automatically. Defaults to None.
    Returns:
        str: The transcribed text from the audio file.
    """
    try:
        import speech_recognition as sr
    except ImportError as e:
        raise ImportError(
            "You must install the package `SpeechRecognition` to run this tool. For instance, run `pip install SpeechRecognition`."
        ) from e

    if file_path.lower().endswith('.mp3'):                                  # Convert MP3 to WAV if necessary
        wav_file_path = file_path.replace('.mp3', '.wav')
        audio = AudioSegment.from_mp3(file_path)
        audio.export(wav_file_path, format="wav")
        file_path = wav_file_path

    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(file_path) as source:
            audio_data = recognizer.record(source)

            if language is None:
                try:
                    text = recognizer.recognize_google(audio_data)      # Try to detect the language automatically

                except sr.UnknownValueError:
                    raise Exception("Speech Recognition could not understand the audio.")
            else:
                text = recognizer.recognize_google(audio_data, language=language)

            return f"Transcribed text: '{text}'"

    except sr.UnknownValueError:
        raise Exception("Speech Recognition could not understand the audio.")

    except sr.RequestError as e:
        raise Exception(f"Could not request results from Speech Recognition service; {e}")

    finally:
        if file_path.lower().endswith('.wav') and os.path.exists(file_path):
            os.remove(file_path)