Spaces:
Sleeping
Sleeping
File size: 1,149 Bytes
a93487d aaf1ba9 fb3c8c4 4f7064a a93487d fb3c8c4 7bbe6db a93487d fb3c8c4 a93487d fb3c8c4 4f7064a fb3c8c4 a93487d fb3c8c4 a93487d fb3c8c4 96439ed fb3c8c4 a93487d fb3c8c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import gradio as gr
from asr import transcribe_audio # Import your ASR function
from lid import detect_language # Import your Language Detection function
from tts import synthesize # Import the correct TTS function
def process_audio(audio_data):
# Step 1: Perform ASR (Audio-to-Text)
transcription = transcribe_audio(audio_data)
# Step 2: Detect language
language = detect_language(audio_data)
# Step 3: Generate Text Response based on ASR result (Future model generation)
# Replace this with your model inference logic
generated_text = f"Detected Language: {language}\n\nTranscription: {transcription}"
# Step 4: Convert generated text into speech using TTS
speech_output, _ = synthesize(text=generated_text, lang=language, speed=1.0)
return generated_text, speech_output
# Define the Gradio Interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="numpy"), # Removed 'source' argument and kept 'type'
outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech")],
live=True
)
if __name__ == "__main__":
interface.launch()
|