File size: 2,521 Bytes
cfb4e8c
22c5bdb
 
 
9759803
cfb4e8c
22c5bdb
de76a17
22c5bdb
de76a17
22c5bdb
b97cf3c
 
1bccd9f
de76a17
9759803
22c5bdb
9759803
 
 
 
 
22c5bdb
7fb94ea
9759803
 
22c5bdb
 
 
 
 
 
1bccd9f
22c5bdb
 
1bccd9f
9759803
22c5bdb
9759803
 
 
 
 
 
 
 
 
 
 
 
22c5bdb
de76a17
9759803
ff9e518
de76a17
 
22c5bdb
de76a17
 
 
 
 
 
1bccd9f
 
 
 
 
 
de76a17
 
22c5bdb
7bd2b9b
de76a17
 
502958f
de76a17
 
 
 
9759803
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import websockets
import asyncio
import json
import base64

async def process_audio_stream(audio_path, max_tokens):
    """
    Process audio with streaming response via WebSocket
    """
    if not audio_path:
        yield "Please upload or record an audio file first."
        return
    
    try:
        # Read audio file and convert to base64
        with open(audio_path, 'rb') as f:
            audio_bytes = f.read()
            base64_bytes = base64.b64encode(audio_bytes)
            # Convert to binary for websocket
            binary_data = base64_bytes
            
        # Connect to WebSocket
        async with websockets.connect('ws://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket:
            # Send base64 encoded audio data
            await websocket.send(binary_data)
            
            # Send parameters
            await websocket.send(json.dumps({
                "prompt": "",
                "max_tokens": max_tokens
            }))
            
            # Initialize response
            response = ""
            
            # Receive streaming response
            async for message in websocket:
                try:
                    data = json.loads(message)
                    if data["status"] == "generating":
                        response += data["token"]
                        yield response
                    elif data["status"] == "complete":
                        break
                    elif data["status"] == "error":
                        yield f"Error: {data['error']}"
                        break
                except json.JSONDecodeError:
                    continue
                
    except Exception as e:
        yield f"Error connecting to server: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_audio_stream,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response", interactive=False),
    title="Nexa Omni",
    description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
    examples=[
        ["example_audios/example_1.wav", 200],
    ]
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)