arshsin commited on
Commit
8c8da6d
1 Parent(s): 83d9770

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -2,14 +2,15 @@ import gradio as gr
2
  import numpy as np
3
  import torch
4
  from datasets import load_dataset
5
- from transformers import VitsModel, VitsTokenizer
6
  from transformers import pipeline
 
7
 
8
 
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
 
11
  # load speech translation checkpoint
12
- asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
13
 
14
 
15
 
@@ -27,8 +28,8 @@ def translate(audio):
27
  def synthesise(text):
28
  inputs = processor(text=text, return_tensors="pt")
29
  with torch.no_grad():
30
- output = model(**inputs)
31
- return output['audio']
32
 
33
 
34
  def speech_to_speech_translation(audio):
 
2
  import numpy as np
3
  import torch
4
  from datasets import load_dataset
5
+
6
  from transformers import pipeline
7
+ from transformers import VitsModel, VitsTokenizer
8
 
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  # load speech translation checkpoint
13
+ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
14
 
15
 
16
 
 
28
  def synthesise(text):
29
  inputs = processor(text=text, return_tensors="pt")
30
  with torch.no_grad():
31
+ speech = model(inputs["input_ids"].to(device))
32
+ return speech.audio[0]
33
 
34
 
35
  def speech_to_speech_translation(audio):