File size: 1,692 Bytes
d84f405
 
 
 
 
3420bec
 
d84f405
17c527a
87d303a
17c527a
c89a3ea
17c527a
30318a6
17c527a
30318a6
87d303a
d84f405
17c527a
bd97165
17c527a
30318a6
17c527a
 
d84f405
17c527a
 
d84f405
17c527a
 
 
 
d84f405
17c527a
 
 
d84f405
17c527a
d84f405
 
 
 
 
17c527a
4cec81b
d84f405
 
0123058
c89a3ea
 
 
d84f405
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import gradio as gr
import whisper
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
import sentencepiece


def translate_voice(file, target_lang):
 
        model = whisper.load_model("base").float()

        audio = whisper.load_audio(file.name)

        audio = whisper.pad_or_trim(audio)

        mel = whisper.log_mel_spectrogram(audio).to(model.device).float()

        _, probs = model.detect_language(mel)
        options = whisper.DecodingOptions(fp16 = False)
        result = whisper.decode(model, mel, options)

        text = result.text
        lang = max(probs, key=probs.get)

        tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
        model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")

        tokenizer.src_lang = target_lang
        encoded_bg = tokenizer(text, return_tensors="pt")
        generated_tokens = model.generate(**encoded_bg)
        translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

        tts = gTTS(text=translated_text, lang=target_lang)
        filename = "to_speech.mp3"
        tts.save(filename)

        return filename, text, translated_text, target_lang


iface = gr.Interface(
    fn=translate_voice, 
    inputs=[
        gr.inputs.File(label="Your Audio"), 
        gr.inputs.Dropdown(choices=['en', 'ru', 'de', 'fr', 'bg'], label="Target Language")
    ], 
    outputs=[
        gr.outputs.Audio(type="filepath", label="Translated Audio"),
        gr.outputs.Textbox(label="Original Text"),
        gr.outputs.Textbox(label="Translated Text"),
        gr.outputs.Textbox(label="Target Language"),
    ]
)
iface.launch()