File size: 2,172 Bytes
cfb4e8c
de76a17
 
cfb4e8c
de76a17
 
cfb4e8c
1bccd9f
 
 
 
de76a17
 
 
1bccd9f
 
 
 
 
 
 
 
 
 
 
 
ff9e518
de76a17
1bccd9f
 
 
 
 
 
 
 
 
 
 
 
 
de76a17
 
ff9e518
de76a17
 
 
 
 
 
 
 
 
1bccd9f
 
 
 
 
 
de76a17
 
 
7bd2b9b
de76a17
 
1bccd9f
de76a17
 
 
1bccd9f
7bd2b9b
 
 
de76a17
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import requests
import os

# FastAPI endpoint
API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"

# Add this global variable to track the last valid audio file
last_valid_audio = None

def process_audio(audio_path, max_tokens):
    """
    Send audio file to FastAPI backend for processing
    """
    global last_valid_audio
    
    # Check audio file availability
    if audio_path and os.path.exists(audio_path):
        # New audio uploaded/recorded
        last_valid_audio = audio_path
    elif not audio_path and not last_valid_audio:
        # No audio provided and no previous valid audio
        return "Please upload or record an audio file first."
    
    # Use the appropriate audio path
    current_audio = last_valid_audio if last_valid_audio else audio_path
    
    try:
        # Only proceed if we have a valid audio file
        if current_audio and os.path.exists(current_audio):
            files = {
                'file': ('audio.wav', open(current_audio, 'rb'), 'audio/wav')
            }
            data = {'max_tokens': max_tokens}
            
            response = requests.post(API_URL, files=files, data=data)
            response.raise_for_status()
            
            return response.json()['response']
        else:
            return "No valid audio file available."
    except Exception as e:
        return f"Error processing audio: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response"),
    title="Nexa Omni",
    description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
    examples=[
        ["example_audios/example_1.wav", 50],
    ]
)

def clear_output(audio, max_tokens):
    return ""
demo.load_examples = clear_output

if __name__ == "__main__":
    demo.launch()