Spaces:

grayphite
/

Abc

No application file

App Files Files Community

Abc / app.py

grayphite

minor chaneg

7033d59 over 1 year ago

raw

history blame contribute delete

8.84 kB

	import json
	import threading
	from typing import Iterator
	import time
	import gradio as gr
	from groq import Groq
	from elevenlabs import play, stream
	from elevenlabs.client import ElevenLabs, AsyncElevenLabs
	import subprocess

	from utils.mapper import TOOLS, AVAILABLE_FUNCTIONS, COMMON_VARS

	# Initialize Groq client
	client = Groq(api_key="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")
	elevenlabs_client = ElevenLabs(
	api_key="sk_16d08614d675e9de0a89bdbff094c6332fceaafbb280f4b3"
	)
	# elevenlabs_client = AsyncElevenLabs(
	# api_key="73d9a4f6d777e9224641e79aeb39dc50"
	# )


	def text_to_speech_file(text: str, play_audio: bool) -> Iterator[bytes]:
	audio = elevenlabs_client.generate(
	text=text,
	voice="Adam",
	model="eleven_turbo_v2_5",
	stream=True,
	optimize_streaming_latency=3
	)
	if play_audio:
	print("streaming")
	stream(audio)
	return audio


	def create_content(result):
	additional_text = """Based on the user's request, follow these steps:
	1. Understand the Request: Read the user's request carefully to determine the specific needs or actions
	required.
	2. Match with Functions: Compare the user's request with the available functions in the tools. Identify
	which function aligns with the user's needs.
	3. Select the Best Function: Choose the function that best matches the user's request.
	4. Call the Function: Use the selected function from the tools to fulfill the user's request.
	"""
	content = (
	f"""You are an AI assistant that will suggest and call the functions provided in the tools based on the
	user's request. You need to analyze the user's request and select the function from the provided tools that
	best matches the request and provide the results by calling the appropriate function.

	Expect all parameters from user request, Consider the dates according to the user query for example if user
	asking some operations for today, it should be understood to get today's date in YYYY-MM-DD format for date
	parameters.

	If required parameters are not in the user request these default can be used {COMMON_VARS}

	"""

	)
	return content


	def background_task(audio_file_path, language, additional_text):
	with open(audio_file_path, "rb") as audio_file:
	# Make the API call
	response = client.audio.transcriptions.create(
	file=audio_file,
	model="whisper-large-v3",
	language=language
	)

	transcribed_text = response.text

	result = f"Transcription: {transcribed_text}\n\nAdditional Context: {additional_text}\n\n"

	content = create_content(result)
	model = "llama3-groq-70b-8192-tool-use-preview"
	messages = [
	{
	"role": "system",
	"content": content
	},
	{
	"role": "user",
	"content": result
	}
	]
	chat_completion = client.chat.completions.create(
	messages=messages,
	tools=TOOLS,
	tool_choice="auto",
	model=model,
	temperature=0.5,
	max_tokens=500,
	)

	# thread.join()

	response_message = chat_completion.choices[0].message
	tool_calls = response_message.tool_calls
	if not tool_calls:
	raise Exception(f"No Tool Found associated with query: {transcribed_text}")
	messages.append(response_message)
	for tool_call in tool_calls:
	function_name = tool_call.function.name
	function_to_call = AVAILABLE_FUNCTIONS[function_name]
	function_args = json.loads(tool_call.function.arguments)
	function_response = function_to_call(**function_args)
	messages.append(
	{
	"tool_call_id": tool_call.id,
	"role": "tool",
	"name": function_name,
	"content": function_response,
	}
	)
	second_response = client.chat.completions.create(
	model=model,
	messages=messages
	)
	final_response = second_response.choices[0].message.content
	print("final: ", final_response)

	# Response Speech
	audio = text_to_speech_file(final_response, True)
	# stream(audio)
	# thread = threading.Thread(target=stream, args=(audio,))
	# thread.start()
	# print(time.time() - start_time, "seconds")
	return final_response


	def play_audio():
	bytes_data = open('greetings.mp3', 'rb').read()
	play(bytes_data)


	def transcribe_audio(audio_file_path, language, additional_text):
	try:
	# start_time = time.time()
	# thread = threading.Thread(target=background_task, args=(audio_file_path, language, additional_text,))
	# thread.start()

	# text_to_speech_file("Sure.", True)
	# text_to_speech_file("Sure, let me get that for you from the CRM.", True)
	bytes_data = open('greetings.mp3', 'rb').read()
	play(bytes_data)
	# Open the audio file
	with open(audio_file_path, "rb") as audio_file:
	# Make the API call
	response = client.audio.transcriptions.create(
	file=audio_file,
	model="whisper-large-v3",
	language=language
	)

	transcribed_text = response.text

	result = f"Transcription: {transcribed_text}\n\nAdditional Context: {additional_text}\n\n"

	content = create_content(result)
	model = "llama3-groq-70b-8192-tool-use-preview"
	messages = [
	{
	"role": "system",
	"content": content
	},
	{
	"role": "user",
	"content": result
	}
	]
	chat_completion = client.chat.completions.create(
	messages=messages,
	tools=TOOLS,
	tool_choice="auto",
	model=model,
	temperature=0.5,
	max_tokens=500,
	)

	# thread.join()

	response_message = chat_completion.choices[0].message
	tool_calls = response_message.tool_calls
	if not tool_calls:
	raise Exception(f"No Tool Found associated with query: {transcribed_text}")
	messages.append(response_message)
	for tool_call in tool_calls:
	function_name = tool_call.function.name
	function_to_call = AVAILABLE_FUNCTIONS[function_name]
	function_args = json.loads(tool_call.function.arguments)
	function_response = function_to_call(**function_args)
	messages.append(
	{
	"tool_call_id": tool_call.id,
	"role": "tool",
	"name": function_name,
	"content": function_response,
	}
	)
	second_response = client.chat.completions.create(
	model=model,
	messages=messages
	)
	final_response = second_response.choices[0].message.content
	print("final: ", final_response)

	# Response Speech
	# audio = text_to_speech_file(final_response, True)
	# stream(audio)
	thread = threading.Thread(target=text_to_speech_file, args=(final_response, True,))
	thread.start()
	# print(time.time() - start_time, "seconds")
	return final_response

	except Exception as e:
	return f"An error occurred: {str(e)}"


	def speach_to_text():
	# List of supported languages (this is an example, adjust based on Groq's actual supported languages)
	languages = ["en", "ba", "ms", "is", "no", "id"]

	# Create Gradio interface
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=[
	gr.Audio(type="filepath", label="Upload Audio File"),
	gr.Dropdown(choices=languages, label="Select Language", value="en"),
	# gr.Radio(["standard", "high"], label="Transcription Quality", value="standard"),
	gr.Textbox(label="Additional Text", placeholder="Enter any additional context or instructions here...")
	],
	outputs="text",
	title="Groq Speech-to-Text Transcription",
	description="Upload an audio file, set parameters, and provide additional text for context in the "
	"transcription process."
	)

	# Launch the interface
	iface.launch()


	# Press the green button in the gutter to run the script.
	if __name__ == '__main__':
	import platform
	print(f"platform: {platform.platform()}")
	print(f"platform: {platform.freedesktop_os_release()}")
	if "Linux" in platform.platform():
	# subprocess.run(['apt-get', 'install', '-y', 'snapd'])
	# subprocess.run(['snap', 'install', '-y', 'mpv'])
	subprocess.run(['apt', 'install', '-y', 'mpv'])
	speach_to_text()