import torch from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor MODEL_ID = "microsoft/speecht5_tts" processor = SpeechT5Processor.from_pretrained(MODEL_ID) model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID) def synthesize_speech(text): inputs = processor(text, return_tensors="pt") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) inputs = inputs.to(device) with torch.no_grad(): speech = model.generate(**inputs) return processor.decode(speech, skip_special_tokens=True)