Spaces:

CereusTech
/

Explayn

Sleeping

App Files Files Community

Explayn / app.py

Afeezee

Update app.py

c3841bf verified about 1 year ago

raw

history blame contribute delete

2.9 kB

	import os
	import gradio as gr
	import assemblyai as aai
	from cerebras.cloud.sdk import Cerebras
	from gtts import gTTS
	import tempfile

	Voicekey = os.getenv ("AssemblyVoice")
	CereAI = os.getenv ("CerebrasAI")

	# Set API keys
	aai.settings.api_key = Voicekey

	client = Cerebras(
	api_key= CereAI
	)

	def process_audio(audio):
	# Check if audio is valid
	if audio is None:
	return "No audio file received."

	# If the audio file doesn't have a name attribute, assign a temporary name
	if isinstance(audio, str): # If audio is passed as a file path (string)
	audio_file_path = audio
	else:
	# Generate a temporary file name and save audio
	audio_file_path = tempfile.mktemp(suffix=".mp3") # .wav as default, you can change the format if needed
	with open(audio_file_path, 'wb') as f:
	f.write(audio.read()) # Save audio data to the file

	# Upload audio to AssemblyAI for transcription
	transcriber = aai.Transcriber()
	transcript = transcriber.transcribe(audio_file_path) # Transcribe the uploaded file

	if transcript.status == aai.TranscriptStatus.error:
	return f"Error transcribing audio: {transcript.error}"

	transcript_text = transcript.text
	print(f"Transcription: {transcript_text}")

	# Generate response using Cerebras Llama 3.3
	stream = client.chat.completions.create(
	messages=[{
	"role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt."
	}, {
	"role": "user", "content": transcript_text
	}],
	model="llama-3.3-70b",
	stream=True,
	max_completion_tokens=1024,
	temperature=0.4,
	top_p=1
	)

	response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
	print(f"Response from LLM: {response_text}")

	# Generate speech using gTTS (Google Text-to-Speech)
	tts = gTTS(text=response_text, lang='en', slow=False)

	# Save the audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	tts.save(tmp_file.name)
	audio_path = tmp_file.name

	return audio_path

	# Gradio Interface
	interface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(sources=["microphone"], type="filepath"), # Use 'file' to correctly handle the audio file
	outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True,
	waveform_options=gr.WaveformOptions(
	waveform_color="#01C6FF",
	waveform_progress_color="#0066B4",
	skip_length=2,
	show_controls=False,
	)),
	title="Xplayn: Voice-to-Audio AI",
	description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio."
	)

	interface.launch()