frogcho123 commited on
Commit
87d303a
1 Parent(s): bd97165

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -7,20 +7,15 @@ import sentencepiece
7
 
8
 
9
  def translate_voice(file, target_lang):
10
- try:
11
- # Load the model and switch to float32
12
  model = whisper.load_model("base").float()
13
 
14
- # Load the audio
15
  audio = whisper.load_audio(file.name)
16
 
17
- # Pad or trim the audio
18
  audio = whisper.pad_or_trim(audio)
19
 
20
- # Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
21
- mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
22
 
23
- # Proceed with your language detection and decoding
24
  _, probs = model.detect_language(mel)
25
  options = whisper.DecodingOptions(fp16 = False)
26
  result = whisper.decode(model, mel, options)
@@ -28,7 +23,6 @@ def translate_voice(file, target_lang):
28
  text = result.text
29
  lang = max(probs, key=probs.get)
30
 
31
- # Translate
32
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
33
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
34
 
@@ -37,15 +31,12 @@ def translate_voice(file, target_lang):
37
  generated_tokens = model.generate(**encoded_bg)
38
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
39
 
40
- # Text-to-audio (TTS)
41
  tts = gTTS(text=translated_text, lang=target_lang)
42
  filename = "to_speech.mp3"
43
  tts.save(filename)
44
 
45
  return filename, text, translated_text, target_lang
46
 
47
- except Exception as e:
48
- return str(e), "", "", ""
49
 
50
  iface = gr.Interface(
51
  fn=translate_voice,
 
7
 
8
 
9
  def translate_voice(file, target_lang):
10
+
 
11
  model = whisper.load_model("base").float()
12
 
 
13
  audio = whisper.load_audio(file.name)
14
 
 
15
  audio = whisper.pad_or_trim(audio)
16
 
17
+ mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
 
18
 
 
19
  _, probs = model.detect_language(mel)
20
  options = whisper.DecodingOptions(fp16 = False)
21
  result = whisper.decode(model, mel, options)
 
23
  text = result.text
24
  lang = max(probs, key=probs.get)
25
 
 
26
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
27
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
28
 
 
31
  generated_tokens = model.generate(**encoded_bg)
32
  translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
33
 
 
34
  tts = gTTS(text=translated_text, lang=target_lang)
35
  filename = "to_speech.mp3"
36
  tts.save(filename)
37
 
38
  return filename, text, translated_text, target_lang
39
 
 
 
40
 
41
  iface = gr.Interface(
42
  fn=translate_voice,