File size: 1,149 Bytes
a93487d
aaf1ba9
fb3c8c4
4f7064a
a93487d
fb3c8c4
 
7bbe6db
a93487d
fb3c8c4
 
a93487d
fb3c8c4
 
 
 
 
4f7064a
fb3c8c4
 
a93487d
fb3c8c4
a93487d
fb3c8c4
96439ed
fb3c8c4
 
a93487d
 
fb3c8c4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import gradio as gr
from asr import transcribe_audio  # Import your ASR function
from lid import detect_language  # Import your Language Detection function
from tts import synthesize  # Import the correct TTS function

def process_audio(audio_data):
    # Step 1: Perform ASR (Audio-to-Text)
    transcription = transcribe_audio(audio_data)
    
    # Step 2: Detect language
    language = detect_language(audio_data)
    
    # Step 3: Generate Text Response based on ASR result (Future model generation)
    # Replace this with your model inference logic
    generated_text = f"Detected Language: {language}\n\nTranscription: {transcription}"
    
    # Step 4: Convert generated text into speech using TTS
    speech_output, _ = synthesize(text=generated_text, lang=language, speed=1.0)
    
    return generated_text, speech_output

# Define the Gradio Interface
interface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="numpy"),  # Removed 'source' argument and kept 'type'
    outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech")],
    live=True
)

if __name__ == "__main__":
    interface.launch()