Spaces:

NexaAIDev
/

omni-audio-demo

Running

File size: 2,172 Bytes

cfb4e8c
de76a17
 
cfb4e8c
de76a17
 
cfb4e8c
1bccd9f
 
 
 
de76a17
 
 
1bccd9f
 
 
 
 
 
 
 
 
 
 
 
ff9e518
de76a17
1bccd9f
 
 
 
 
 
 
 
 
 
 
 
 
de76a17
 
ff9e518
de76a17
 
 
 
 
 
 
 
 
1bccd9f
 
 
 
 
 
de76a17
 
 
7bd2b9b
de76a17
 
1bccd9f
de76a17
 
 
1bccd9f
7bd2b9b
 
 
de76a17

import gradio as gr
import requests
import os

# FastAPI endpoint
API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"

# Add this global variable to track the last valid audio file
last_valid_audio = None

def process_audio(audio_path, max_tokens):
    """
    Send audio file to FastAPI backend for processing
    """
    global last_valid_audio
    
    # Check audio file availability
    if audio_path and os.path.exists(audio_path):
        # New audio uploaded/recorded
        last_valid_audio = audio_path
    elif not audio_path and not last_valid_audio:
        # No audio provided and no previous valid audio
        return "Please upload or record an audio file first."
    
    # Use the appropriate audio path
    current_audio = last_valid_audio if last_valid_audio else audio_path
    
    try:
        # Only proceed if we have a valid audio file
        if current_audio and os.path.exists(current_audio):
            files = {
                'file': ('audio.wav', open(current_audio, 'rb'), 'audio/wav')
            }
            data = {'max_tokens': max_tokens}
            
            response = requests.post(API_URL, files=files, data=data)
            response.raise_for_status()
            
            return response.json()['response']
        else:
            return "No valid audio file available."
    except Exception as e:
        return f"Error processing audio: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response"),
    title="Nexa Omni",
    description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
    examples=[
        ["example_audios/example_1.wav", 50],
    ]
)

def clear_output(audio, max_tokens):
    return ""
demo.load_examples = clear_output

if __name__ == "__main__":
    demo.launch()