from transformers import VitsModel, AutoTokenizer import torch # Load processor and model models_info = { "Meta-MMS": { "processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"), "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"), }, } def synthesize(text, model_id): device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") processor = models_info[model_id]["processor"] model = models_info[model_id]["model"].to(device) inputs = processor(text, return_tensors="pt").to(device) with torch.no_grad(): output = model(**inputs).waveform sampling_rate = 22050 return (output.cpu(), sampling_rate)