MBZUAI
/

speecht5_tts_clartts_ar

Inference Endpoints

Model card Files Files and versions Community

herwoww commited on Feb 22

Commit

1648f5c

•

1 Parent(s): d99f46e

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -31,8 +31,8 @@ import soundfile as sf
 synthesiser = pipeline("text-to-speech", "MBZUAI/speecht5_tts_clartts_ar")
-embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 # You can replace this embedding with your own as well.
 speech = synthesiser("لأنه لا يرى أنه على السفه ثم من بعد ذلك حديث منتشر", forward_params={"speaker_embeddings": speaker_embedding})
@@ -56,8 +56,8 @@ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 inputs = processor(text="لأنه لا يرى أنه على السفه ثم من بعد ذلك حديث منتشر", return_tensors="pt")
 # load xvector containing speaker's voice characteristics from a dataset
-embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
@@ -65,7 +65,7 @@ sf.write("speech.wav", speech.numpy(), samplerate=16000)
 ```
-# Citation [optional]
 <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->

 synthesiser = pipeline("text-to-speech", "MBZUAI/speecht5_tts_clartts_ar")
+embeddings_dataset = load_dataset("herwoww/arabic_xvect_embeddings", split="validation")
+speaker_embedding = torch.tensor(embeddings_dataset[1]["speaker_embeddings"]).unsqueeze(0)
 # You can replace this embedding with your own as well.
 speech = synthesiser("لأنه لا يرى أنه على السفه ثم من بعد ذلك حديث منتشر", forward_params={"speaker_embeddings": speaker_embedding})
 inputs = processor(text="لأنه لا يرى أنه على السفه ثم من بعد ذلك حديث منتشر", return_tensors="pt")
 # load xvector containing speaker's voice characteristics from a dataset
+embeddings_dataset = load_dataset("herwoww/arabic_xvect_embeddings", split="validation")
+speaker_embedding = torch.tensor(embeddings_dataset[1]["speaker_embeddings"]).unsqueeze(0)
 speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
 ```
+# Citation
 <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->