AGAZO_Final_Assignment

Sleeping

App Files Files Community

AGAZO_Final_Assignment / audio_to_text_tool.py

agazo

Update audio_to_text_tool.py

e83c110 verified 5 months ago

raw

history blame

3.78 kB

	import base64
	import tempfile
	from openai import OpenAI
	from langchain.tools import tool
	from constants import OPENAI_KEY
	import tempfile
	import os
	import openai
	from openai import OpenAI
	from langchain.tools import tool
	import yt_dlp
	from utils import get_bytes, get_text_file_contents, get_base64

	# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
	client = OpenAI(api_key=OPENAI_KEY)

	@tool
	def audio_to_text(base64_audio_path: str) -> str:
	"""
	Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.

	Args:
	base64_audio_path (str): Path to a file containing base64-encoded audio as text.

	Returns:
	str: The transcribed text.
	"""
	try:
	# Read base64 string
	with open(base64_audio_path, "r") as f:
	base64_str = f.read()

	# Decode base64 to bytes
	audio_bytes = base64.b64decode(base64_str)

	# Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
	temp_audio.write(audio_bytes)
	temp_audio_path = temp_audio.name

	# Transcribe using OpenAI Whisper API
	with open(temp_audio_path, "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)

	return transcript.strip()

	except Exception as e:
	return f"An error occurred during transcription: {str(e)}"

	@tool
	def audio_to_text_from_youtube(youtube_url: str) -> str:
	"""
	Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.

	Args:
	youtube_url (str): URL of the YouTube video.

	Returns:
	str: Transcribed text.
	"""
	try:
	with tempfile.TemporaryDirectory() as tmpdir:
	audio_stem = os.path.join(tmpdir, "audio")
	audio_output_path = audio_stem + ".mp3"

	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt')

	print('cookies: \n' + get_text_file_contents(cookies_path))

	ydl_opts = {
	"format": "bestaudio/best",
	"outtmpl": audio_stem,
	"quiet": True,
	"cookiefile": cookies_path,
	"postprocessors": [{
	"key": "FFmpegExtractAudio",
	"preferredcodec": "mp3",
	"preferredquality": "192",
	}],
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])

	# Ensure file exists
	if not os.path.exists(audio_output_path):
	raise FileNotFoundError(f"Audio file not created: {audio_output_path}")

	# Transcribe with OpenAI Whisper
	with open(audio_output_path, "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text"
	)

	return transcript.strip()

	except Exception as e:
	return f"An error occurred during YouTube transcription: {str(e)}"

	if __name__ == "__main__":
	# Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
	base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

	# Call the tool function
	transcription = audio_to_text(base64_audio_file_path)

	# Print the result
	print("Transcription result:")
	print(transcription)