# app.py from fastapi import FastAPI, Query from fastapi.responses import StreamingResponse from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan import torch import io import soundfile as sf app = FastAPI(title="SpeechT5 TTS API") # Load models once at startup processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Dummy speaker embedding (flat, neutral voice) speaker_embeddings = torch.zeros((1, 512)) @app.get("/speak") def speak(text: str = Query(..., description="Text to convert to speech")): """ Convert text to speech using SpeechT5 + HiFi-GAN. Returns a WAV audio stream. """ # Prepare input inputs = processor(text=text, return_tensors="pt") # Generate speech speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) # Convert to bytes buffer buf = io.BytesIO() sf.write(buf, speech.numpy(), samplerate=16000, format="WAV") buf.seek(0) return StreamingResponse(buf, media_type="audio/wav") Embeddings: US female 1: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_slt_arctic-wav-arctic_a0001.bin US female 2: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_clb_arctic-wav-arctic_a0001.bin US male 1: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_bdl_arctic-wav-arctic_a0003.bin US male 2: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_rms_arctic-wav-arctic_a0003.bin Canadian male: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_jmk_arctic-wav-arctic_a0002.bin Scottish male: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_awb_arctic-wav-arctic_b0002.bin Indian male: https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_ksp_arctic-wav-arctic_a0007.bin