import gradio as gr import websockets import asyncio import json import base64 async def process_audio_stream(audio_path, max_tokens): """ Process audio with streaming response via WebSocket """ if not audio_path: yield "Please upload or record an audio file first." return try: # Read audio file and convert to base64 bytes with open(audio_path, 'rb') as f: audio_bytes = f.read() base64_bytes = base64.b64encode(audio_bytes) # Convert to string for writing to text file base64_string = base64_bytes.decode('utf-8') # Connect to WebSocket async with websockets.connect('ws://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket: # Send binary base64 audio data await websocket.send(base64_string) # Send as raw bytes # Send parameters as JSON string await websocket.send(json.dumps({ "prompt": "", "max_tokens": max_tokens })) # Initialize response response = "" # Receive streaming response async for message in websocket: try: data = json.loads(message) if data["status"] == "generating": response += data["token"] yield response elif data["status"] == "complete": break elif data["status"] == "error": yield f"Error: {data['error']}" break except json.JSONDecodeError: continue except Exception as e: yield f"Error connecting to server: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=process_audio_stream, inputs=[ gr.Audio( type="filepath", label="Upload or Record Audio", sources=["upload", "microphone"] ), gr.Slider( minimum=50, maximum=200, value=50, step=1, label="Max Tokens" ) ], outputs=gr.Textbox(label="Response", interactive=False), title="Nexa Omni", description="Upload an audio file and optionally provide a prompt to analyze the audio content.", examples=[ ["example_audios/example_1.wav", 200], ] ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860)