Spaces:
Sleeping
Sleeping
| import os | |
| import asyncio | |
| import wave | |
| import gradio as gr | |
| import tempfile | |
| from groq import Groq | |
| from google import genai | |
| from google.genai import types | |
| BOT_NAME = "Nilla" | |
| MOTOR_NAME = "Nilla-2026 GPT motor" | |
| PROVIDER = "HumanV lab" | |
| POR = os.environ.get("POR") | |
| MODEL_ID = os.environ.get("MODEL_VERSION") | |
| UK_SERVER_API = os.environ.get("UK_SERVER_API") | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| client_gemini = genai.Client( | |
| api_key=UK_SERVER_API, | |
| http_options={"api_version": "v1alpha"} | |
| ) | |
| client_groq = Groq(api_key=GROQ_API_KEY) | |
| async def nilla_engine(audio_path, text_input, chat_history): | |
| if chat_history is None: | |
| chat_history = [] | |
| user_text = "" | |
| is_voice = False | |
| if text_input and text_input.strip(): | |
| user_text = text_input | |
| is_voice = False | |
| elif audio_path: | |
| with open(audio_path, "rb") as file: | |
| transcription = client_groq.audio.transcriptions.create( | |
| file=(audio_path, file.read()), | |
| model="whisper-large-v3", | |
| temperature=0, | |
| response_format="verbose_json", | |
| ) | |
| user_text = transcription.text | |
| is_voice = True | |
| else: | |
| return None, "", "", chat_history | |
| output_path = None | |
| model_response_text = "" | |
| current_turns = chat_history + [{"role": "user", "parts": [{"text": user_text}]}] | |
| config = { | |
| "response_modalities": ["AUDIO"], | |
| "system_instruction": POR, | |
| "enable_affective_dialog": True, | |
| "output_audio_transcription": {} | |
| } | |
| try: | |
| async with client_gemini.aio.live.connect(model=MODEL_ID, config=config) as session: | |
| await session.send_client_content(turns=current_turns, turn_complete=True) | |
| if is_voice: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| output_path = temp_file.name | |
| with wave.open(output_path, "wb") as wav: | |
| wav.setnchannels(1) | |
| wav.setsampwidth(2) | |
| wav.setframerate(24000) | |
| async for response in session.receive(): | |
| if response.data: | |
| wav.writeframes(response.data) | |
| if response.server_content and response.server_content.output_transcription: | |
| model_response_text += response.server_content.output_transcription.text | |
| else: | |
| async for response in session.receive(): | |
| if response.server_content and response.server_content.output_transcription: | |
| model_response_text += response.server_content.output_transcription.text | |
| new_history = current_turns + [{"role": "model", "parts": [{"text": model_response_text}]}] | |
| return output_path, user_text, model_response_text, new_history | |
| except Exception: | |
| return None, user_text, "Error", chat_history | |
| def run_interface(audio_file, text_input, chat_history): | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| return loop.run_until_complete(nilla_engine(audio_file, text_input, chat_history)) | |
| with gr.Blocks(title=BOT_NAME) as demo: | |
| history_component = gr.JSON(value=[], visible=False) | |
| with gr.Row(): | |
| in_audio = gr.Audio(label="Audio", type="filepath") | |
| in_text = gr.Textbox(label="Text") | |
| with gr.Row(): | |
| out_audio = gr.Audio(label="Voice Resp") | |
| out_user = gr.Textbox(label="User Text") | |
| out_nilla = gr.Textbox(label="Nilla Text") | |
| btn = gr.Button("Process") | |
| btn.click( | |
| fn=run_interface, | |
| inputs=[in_audio, in_text, history_component], | |
| outputs=[out_audio, out_user, out_nilla, history_component], | |
| api_name="run_interface" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |