import gradio as gr import torch from transformers import VitsModel, VitsTokenizer # --- 1. Load Model and Tokenizer --- # NOTE: Switched to a compatible model that has the correct file structure. print("Loading facebook/mms-tts-eng model and tokenizer...") model_id = "facebook/mms-tts-eng" model = VitsModel.from_pretrained(model_id) tokenizer = VitsTokenizer.from_pretrained(model_id) print("Model and tokenizer loaded successfully.") # --- 2. Define the Speech Synthesis Function --- def synthesize_speech(text): """ Converts text to speech using the selected TTS model. """ # Tokenize the input text. The `return_tensors="pt"` part formats it for PyTorch. inputs = tokenizer(text, return_tensors="pt") # Generate the audio waveform. # We use torch.no_grad() to speed up inference as we aren't training the model. with torch.no_grad(): waveform = model(**inputs).waveform # The output is a PyTorch tensor. Convert it to a NumPy array. # .squeeze() removes any extra single dimensions. waveform_numpy = waveform.cpu().numpy().squeeze() # Get the sampling rate from the model's configuration. sampling_rate = model.config.sampling_rate # Return the sampling rate and waveform as a tuple for the Gradio Audio component. return (sampling_rate, waveform_numpy) # --- 3. Build the Gradio Interface --- demo = gr.Interface( fn=synthesize_speech, inputs=gr.Textbox( label="Text to Synthesize", info="Enter the text you want to convert to speech.", value="Hello, this is a demonstration of the Facebook MMS text to speech model." ), outputs=gr.Audio( label="Synthesized Audio", type="numpy" # The function returns a NumPy array ), title="🗣️ MMS Text-to-Speech (English)", description="A Gradio app to run the `facebook/mms-tts-eng` model for text-to-speech conversion.", examples=[ ["The quick brown fox jumps over the lazy dog."], ["To be, or not to be, that is the question."], ["Artificial intelligence will shape our future in profound ways."] ], cache_examples=True # Cache results for faster demo ) # --- 4. Launch the App --- if __name__ == "__main__": demo.launch()