Spaces:
Sleeping
Sleeping
| import base64 | |
| import tempfile | |
| from openai import OpenAI | |
| from langchain.tools import tool | |
| from constants import OPENAI_KEY | |
| import tempfile | |
| import os | |
| import openai | |
| from openai import OpenAI | |
| from langchain.tools import tool | |
| import yt_dlp | |
| from utils import get_bytes, get_text_file_contents, get_base64 | |
| # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly) | |
| client = OpenAI(api_key=OPENAI_KEY) | |
| def audio_to_text(base64_audio_path: str) -> str: | |
| """ | |
| Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API. | |
| Args: | |
| base64_audio_path (str): Path to a file containing base64-encoded audio as text. | |
| Returns: | |
| str: The transcribed text. | |
| """ | |
| try: | |
| # Read base64 string | |
| with open(base64_audio_path, "r") as f: | |
| base64_str = f.read() | |
| # Decode base64 to bytes | |
| audio_bytes = base64.b64decode(base64_str) | |
| # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.) | |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: | |
| temp_audio.write(audio_bytes) | |
| temp_audio_path = temp_audio.name | |
| # Transcribe using OpenAI Whisper API | |
| with open(temp_audio_path, "rb") as audio_file: | |
| transcript = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file, | |
| response_format="text" | |
| ) | |
| return transcript.strip() | |
| except Exception as e: | |
| return f"An error occurred during transcription: {str(e)}" | |
| def audio_to_text_from_youtube(youtube_url: str) -> str: | |
| """ | |
| Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API. | |
| Args: | |
| youtube_url (str): URL of the YouTube video. | |
| Returns: | |
| str: Transcribed text. | |
| """ | |
| try: | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| audio_stem = os.path.join(tmpdir, "audio") | |
| audio_output_path = audio_stem + ".mp3" | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt') | |
| print('cookies: \n' + get_text_file_contents(cookies_path)) | |
| ydl_opts = { | |
| "format": "bestaudio/best", | |
| "outtmpl": audio_stem, | |
| "quiet": True, | |
| "cookiefile": cookies_path, | |
| "postprocessors": [{ | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "mp3", | |
| "preferredquality": "192", | |
| }], | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([youtube_url]) | |
| # Ensure file exists | |
| if not os.path.exists(audio_output_path): | |
| raise FileNotFoundError(f"Audio file not created: {audio_output_path}") | |
| # Transcribe with OpenAI Whisper | |
| with open(audio_output_path, "rb") as audio_file: | |
| transcript = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file, | |
| response_format="text" | |
| ) | |
| return transcript.strip() | |
| except Exception as e: | |
| return f"An error occurred during YouTube transcription: {str(e)}" | |
| if __name__ == "__main__": | |
| # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt) | |
| base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64" | |
| # Call the tool function | |
| transcription = audio_to_text(base64_audio_file_path) | |
| # Print the result | |
| print("Transcription result:") | |
| print(transcription) | |