unijoh commited on
Commit
67f0a18
1 Parent(s): cfbbfed

Update tts.py

Browse files
Files changed (1) hide show
  1. tts.py +9 -12
tts.py CHANGED
@@ -1,21 +1,18 @@
1
  import torch
2
- from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
3
- import sentencepiece
 
4
 
5
  MODEL_ID = "microsoft/speecht5_tts"
6
  processor = SpeechT5Processor.from_pretrained(MODEL_ID)
7
  model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID)
 
8
 
9
- def synthesize_speech(text):
10
- if not text:
11
- return "ERROR: Please provide text for synthesis"
12
-
13
- inputs = processor(text, return_tensors="pt")
14
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
- model.to(device)
16
- inputs = inputs.to(device)
17
 
18
  with torch.no_grad():
19
- speech = model.generate(**inputs)
20
 
21
- return processor.decode(speech, skip_special_tokens=True)
 
 
1
  import torch
2
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
3
+ from datasets import load_dataset
4
+ import soundfile as sf
5
 
6
  MODEL_ID = "microsoft/speecht5_tts"
7
  processor = SpeechT5Processor.from_pretrained(MODEL_ID)
8
  model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID)
9
+ vocoder = torch.hub.load("snakers4/silero-vad", "silero_vad", force_reload=True)
10
 
11
+ def synthesize_speech(text_input):
12
+ inputs = processor(text=text_input, return_tensors="pt")
 
 
 
 
 
 
13
 
14
  with torch.no_grad():
15
+ speech = model.generate_speech(inputs["input_ids"], vocoder=vocoder)
16
 
17
+ sf.write("output.wav", speech.numpy(), 16000)
18
+ return "output.wav"