File size: 1,504 Bytes
44da960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20f17fc
44da960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import requests
from deep_translator import GoogleTranslator
import gradio as gr
import soundfile as sf

def speech_translation(audio, language):
    if audio is None:
        return "No audio input provided!", "No audio input provided!"

    # Convert audio to .wav format if not already
    if not audio.endswith(".wav"):
        wav_data, samplerate = sf.read(audio)
        sf.write("temp_audio.wav", wav_data, samplerate)
        audio_file = "temp_audio.wav" 
    else:
        audio_file = audio

    # ASR processing 
    files = {
        'file': open(audio_file, "rb"),
        'language': (None, language), 
        'vtt': (None, 'true'),
    }
    response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files)

    print(response.json())
    try:
        asr_output = response.json()['transcript']
    except:
        asr_output = "Error in ASR processing"

    asr_output = asr_output.replace("।", "")
    asr_output = asr_output.replace(".", "")

    
    translator = GoogleTranslator(source=language, target='en')
    translation = translator.translate(asr_output)


    return translation

iface = gr.Interface(
    fn=speech_translation,
    inputs=[
        gr.Audio(type="filepath", label="Record your speech"),
        gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language")
    ], 
    outputs=["text"],
    title="Speech Translation",
    description="Record your speech and get the English translation.",
)

iface.launch(share=True)