Xenova
/

speecht5_tts

Xenova HF staff commited on Dec 2, 2024

Commit

f4dbe6d

verified ·

1 Parent(s): fda0ee8

Update code snippets to use transformers.js v3

Files changed (1) hide show

README.md CHANGED Viewed

@@ -16,10 +16,10 @@ npm i @xenova/transformers
 **Example:** Text-to-speech pipeline.
 ```js
-import { pipeline } from '@xenova/transformers';
 // Create a text-to-speech pipeline
-const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
 // Generate speech
 const speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
@@ -45,7 +45,7 @@ fs.writeFileSync('result.wav', wav.toBuffer());
 **Example:** Load processor, tokenizer, and models separately.
 ```js
-import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@xenova/transformers';
 // Load the tokenizer and processor
 const tokenizer = await AutoTokenizer.from_pretrained('Xenova/speecht5_tts');
@@ -53,8 +53,8 @@ const processor = await AutoProcessor.from_pretrained('Xenova/speecht5_tts');
 // Load the models
 // NOTE: We use the unquantized versions as they are more accurate
-const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { quantized: false });
-const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { quantized: false });
 // Load speaker embeddings from URL
 const speaker_embeddings_data = new Float32Array(

 **Example:** Text-to-speech pipeline.
 ```js
+import { pipeline } from '@huggingface/transformers';
 // Create a text-to-speech pipeline
+const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { dtype: "fp32" });
 // Generate speech
 const speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
 **Example:** Load processor, tokenizer, and models separately.
 ```js
+import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@huggingface/transformers';
 // Load the tokenizer and processor
 const tokenizer = await AutoTokenizer.from_pretrained('Xenova/speecht5_tts');
 // Load the models
 // NOTE: We use the unquantized versions as they are more accurate
+const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { dtype: 'fp32' });
+const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { dtype: 'fp32' });
 // Load speaker embeddings from URL
 const speaker_embeddings_data = new Float32Array(