Spaces:
Running
Running
"""A simple web interactive chat demo based on gradio.""" | |
import os | |
import time | |
import gradio as gr | |
import numpy as np | |
import spaces | |
import torch | |
from inference import OmniInference | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
omni_client = OmniInference('./checkpoint', device) | |
omni_client.warm_up() | |
OUT_CHUNK = 4096 | |
OUT_RATE = 24000 | |
OUT_CHANNELS = 1 | |
def process_audio(audio): | |
filepath = audio | |
print(f"filepath: {filepath}") | |
if filepath is None: | |
return | |
cnt = 0 | |
tik = time.time() | |
for chunk in omni_client.run_AT_batch_stream(filepath): | |
# Convert chunk to numpy array | |
if cnt == 0: | |
print(f"first chunk time cost: {time.time() - tik:.3f}") | |
cnt += 1 | |
audio_data = np.frombuffer(chunk, dtype=np.int16) | |
audio_data = audio_data.reshape(-1, OUT_CHANNELS) | |
yield OUT_RATE, audio_data.astype(np.int16) | |
demo = gr.Interface( | |
process_audio, | |
inputs=gr.Audio(type="filepath", label="Microphone"), | |
outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)], | |
title="Chat Mini-Omni Demo", | |
live=True, | |
) | |
demo.queue().launch() |