| | import gradio as gr |
| | from pydub import AudioSegment |
| | import librosa |
| | import torch |
| | import soundfile as sf |
| | import numpy as np |
| | import os |
| |
|
| | |
| | from model import textonly, speechonly |
| |
|
| |
|
| | def text_interface(text): |
| | """Process text input and return response""" |
| | result = textonly(text) |
| | return result |
| |
|
| |
|
| | def speech_interface(audio_file): |
| | """Process speech input and return LLM response and audio output""" |
| | if audio_file is None: |
| | return "Please provide an audio file", None |
| | |
| | |
| | sr, audio_data = audio_file |
| | |
| | |
| | if len(audio_data.shape) > 1: |
| | audio_data = np.mean(audio_data, axis=1) |
| | |
| | |
| | if sr != 16000: |
| | audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000) |
| | |
| | |
| | llm_response = speechonly(audio_data, output_wav_path="output.wav") |
| | |
| | return llm_response |
| |
|
| |
|
| | |
| | with gr.Blocks(title="Hamid AI Speech API") as app: |
| | gr.Markdown("# Hamid AI Speech Interface") |
| | gr.Markdown("Choose between text-only or speech-based interaction") |
| | |
| | with gr.Tab("Text Only"): |
| | text_input = gr.Textbox(label="Enter your text", placeholder="Type something...") |
| | text_output = gr.Textbox(label="Response", interactive=False) |
| | text_button = gr.Button("Process Text") |
| | text_button.click(fn=text_interface, inputs=text_input, outputs=text_output) |
| | |
| | with gr.Tab("Speech Only"): |
| | audio_input = gr.Audio(label="Upload or record audio", type="numpy") |
| | speech_output = gr.Textbox(label="LLM Response", interactive=False) |
| | audio_output = gr.Audio(label="Output Audio", type="filepath") |
| | speech_button = gr.Button("Process Speech") |
| | speech_button.click(fn=speech_interface, inputs=audio_input, outputs=[speech_output, audio_output]) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | app.launch(share=False) |
| |
|