frogcho123 commited on
Commit
8ba1b29
·
1 Parent(s): 3128259

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -19
app.py CHANGED
@@ -1,13 +1,9 @@
1
  import os
2
  import gradio as gr
3
- import whisper
4
- import IPython
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  from gtts import gTTS
7
 
8
- # Load the ASR model
9
- asr_model = whisper.load_model("base")
10
-
11
  # Load the translation model
12
  translation_tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
13
  translation_model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
@@ -24,25 +20,19 @@ available_languages = {
24
  def translate_audio(audio_file, target_language):
25
  to_lang = available_languages[target_language]
26
 
27
- # Auto to text (ASR)
28
- audio = whisper.load_audio(audio_file.name)
29
- audio = whisper.pad_or_trim(audio)
30
- mel = whisper.log_mel_spectrogram(audio).to(asr_model.device)
31
- _, probs = asr_model.detect_language(mel)
32
- options = whisper.DecodingOptions()
33
- result = whisper.decode(asr_model, mel, options)
34
- text = result.text
35
 
36
  # Translate the text
37
  translation_tokenizer.src_lang = to_lang
38
- encoded_bg = translation_tokenizer(text, return_tensors="pt")
39
  generated_tokens = translation_model.generate(**encoded_bg)
40
- translated_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
 
 
 
 
41
 
42
- # Text-to-audio (TTS)
43
- tts = gTTS(text=translated_text, lang=to_lang)
44
- output_file = "translated_audio.mp3"
45
- tts.save(output_file)
46
  return output_file
47
 
48
  # Gradio interface
 
1
  import os
2
  import gradio as gr
3
+ import soundfile as sf
 
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from gtts import gTTS
6
 
 
 
 
7
  # Load the translation model
8
  translation_tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
9
  translation_model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
 
20
  def translate_audio(audio_file, target_language):
21
  to_lang = available_languages[target_language]
22
 
23
+ # Load audio
24
+ audio, sample_rate = sf.read(audio_file.name)
 
 
 
 
 
 
25
 
26
  # Translate the text
27
  translation_tokenizer.src_lang = to_lang
28
+ encoded_bg = translation_tokenizer(audio, return_tensors="pt", padding=True, truncation=True)
29
  generated_tokens = translation_model.generate(**encoded_bg)
30
+ translated_audio = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
31
+
32
+ # Save translated audio
33
+ output_file = "translated_audio.wav"
34
+ sf.write(output_file, translated_audio, sample_rate)
35
 
 
 
 
 
36
  return output_file
37
 
38
  # Gradio interface