Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import assemblyai as aai | |
| from cerebras.cloud.sdk import Cerebras | |
| from gtts import gTTS | |
| import tempfile | |
| Voicekey = os.getenv ("AssemblyVoice") | |
| CereAI = os.getenv ("CerebrasAI") | |
| # Set API keys | |
| aai.settings.api_key = Voicekey | |
| client = Cerebras( | |
| api_key= CereAI | |
| ) | |
| def process_audio(audio): | |
| # Check if audio is valid | |
| if audio is None: | |
| return "No audio file received." | |
| # If the audio file doesn't have a name attribute, assign a temporary name | |
| if isinstance(audio, str): # If audio is passed as a file path (string) | |
| audio_file_path = audio | |
| else: | |
| # Generate a temporary file name and save audio | |
| audio_file_path = tempfile.mktemp(suffix=".mp3") # .wav as default, you can change the format if needed | |
| with open(audio_file_path, 'wb') as f: | |
| f.write(audio.read()) # Save audio data to the file | |
| # Upload audio to AssemblyAI for transcription | |
| transcriber = aai.Transcriber() | |
| transcript = transcriber.transcribe(audio_file_path) # Transcribe the uploaded file | |
| if transcript.status == aai.TranscriptStatus.error: | |
| return f"Error transcribing audio: {transcript.error}" | |
| transcript_text = transcript.text | |
| print(f"Transcription: {transcript_text}") | |
| # Generate response using Cerebras Llama 3.3 | |
| stream = client.chat.completions.create( | |
| messages=[{ | |
| "role": "system", "content": "Conversation will be started in this chat. Try as much as possible to provide concise and informed responses to the prompt." | |
| }, { | |
| "role": "user", "content": transcript_text | |
| }], | |
| model="llama-3.3-70b", | |
| stream=True, | |
| max_completion_tokens=1024, | |
| temperature=0.4, | |
| top_p=1 | |
| ) | |
| response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream) | |
| print(f"Response from LLM: {response_text}") | |
| # Generate speech using gTTS (Google Text-to-Speech) | |
| tts = gTTS(text=response_text, lang='en', slow=False) | |
| # Save the audio to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tts.save(tmp_file.name) | |
| audio_path = tmp_file.name | |
| return audio_path | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(sources=["microphone"], type="filepath"), # Use 'file' to correctly handle the audio file | |
| outputs=gr.Audio(type="filepath", label="Generated Response Audio", show_download_button=True, | |
| waveform_options=gr.WaveformOptions( | |
| waveform_color="#01C6FF", | |
| waveform_progress_color="#0066B4", | |
| skip_length=2, | |
| show_controls=False, | |
| )), | |
| title="Xplayn: Voice-to-Audio AI", | |
| description="Record your voice, and the system will transcribe it, generate a response using Llama 3.3, and return the response as audio." | |
| ) | |
| interface.launch() |