Spaces:
Running
Running
File size: 2,521 Bytes
cfb4e8c 22c5bdb 9759803 cfb4e8c 22c5bdb de76a17 22c5bdb de76a17 22c5bdb b97cf3c 1bccd9f de76a17 9759803 22c5bdb 9759803 22c5bdb 7fb94ea 9759803 22c5bdb 1bccd9f 22c5bdb 1bccd9f 9759803 22c5bdb 9759803 22c5bdb de76a17 9759803 ff9e518 de76a17 22c5bdb de76a17 1bccd9f de76a17 22c5bdb 7bd2b9b de76a17 502958f de76a17 9759803 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
import websockets
import asyncio
import json
import base64
async def process_audio_stream(audio_path, max_tokens):
"""
Process audio with streaming response via WebSocket
"""
if not audio_path:
yield "Please upload or record an audio file first."
return
try:
# Read audio file and convert to base64
with open(audio_path, 'rb') as f:
audio_bytes = f.read()
base64_bytes = base64.b64encode(audio_bytes)
# Convert to binary for websocket
binary_data = base64_bytes
# Connect to WebSocket
async with websockets.connect('ws://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket:
# Send base64 encoded audio data
await websocket.send(binary_data)
# Send parameters
await websocket.send(json.dumps({
"prompt": "",
"max_tokens": max_tokens
}))
# Initialize response
response = ""
# Receive streaming response
async for message in websocket:
try:
data = json.loads(message)
if data["status"] == "generating":
response += data["token"]
yield response
elif data["status"] == "complete":
break
elif data["status"] == "error":
yield f"Error: {data['error']}"
break
except json.JSONDecodeError:
continue
except Exception as e:
yield f"Error connecting to server: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=process_audio_stream,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Slider(
minimum=50,
maximum=200,
value=50,
step=1,
label="Max Tokens"
)
],
outputs=gr.Textbox(label="Response", interactive=False),
title="Nexa Omni",
description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
examples=[
["example_audios/example_1.wav", 200],
]
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|