How can I pass the result to microphone

#31
by thunder-007 - opened
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from datasets import load_dataset
import torch
import soundfile as sf
from datasets import load_dataset

processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

inputs = processor(text="Hello, my dog is cute.", return_tensors="pt")

# embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
# speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

speech = model.generate_speech(inputs["input_ids"], vocoder=vocoder)#, speaker_embeddings,)

sf.write("speech.wav", speech.numpy(), samplerate=16000)

I think gonna give the output from a llm to this model when the llm yeild the text in real time how can we make this read in real time?

Hi! Just use some libraries like playsound to play a .wav file

deleted
This comment has been hidden

Sign up or log in to comment