jungjee commited on
Commit
f29f762
1 Parent(s): c59321c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -20
README.md CHANGED
@@ -27,6 +27,18 @@ pip install -e .
27
  cd egs2/voxceleb/spk1
28
  ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_ska_wavlm_frozen
29
  ```
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  <!-- Generated by scripts/utils/show_spk_result.py -->
32
  # RESULTS
@@ -260,32 +272,20 @@ distributed: true
260
  ### Citing ESPnet
261
 
262
  ```BibTex
 
 
 
 
 
 
 
263
  @inproceedings{watanabe2018espnet,
264
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
265
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
266
  year={2018},
267
- booktitle={Proceedings of Interspeech},
268
  pages={2207--2211},
269
  doi={10.21437/Interspeech.2018-1456},
270
  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
271
  }
272
-
273
-
274
-
275
-
276
-
277
-
278
- ```
279
-
280
- or arXiv:
281
-
282
- ```bibtex
283
- @misc{watanabe2018espnet,
284
- title={ESPnet: End-to-End Speech Processing Toolkit},
285
- author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
286
- year={2018},
287
- eprint={1804.00015},
288
- archivePrefix={arXiv},
289
- primaryClass={cs.CL}
290
- }
291
  ```
 
27
  cd egs2/voxceleb/spk1
28
  ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_ska_wavlm_frozen
29
  ```
30
+ ```python
31
+ import numpy as np
32
+ from espnet2.bin.spk_inference import Speech2Embedding
33
+
34
+ # from uploaded models
35
+ speech2spk_embed = Speech2Embedding.from_pretrained(model_tag="espnet/voxcelebs12_ska_wavlm_frozen")
36
+ embedding = speech2spk_embed(np.zeros(16500))
37
+
38
+ # from checkpoints trained by oneself
39
+ speech2spk_embed = Speech2Embedding(model_file="model.pth", train_config="config.yaml")
40
+ embedding = speech2spk_embed(np.zeros(32000))
41
+ ```
42
 
43
  <!-- Generated by scripts/utils/show_spk_result.py -->
44
  # RESULTS
 
272
  ### Citing ESPnet
273
 
274
  ```BibTex
275
+ @article{jung2024espnet,
276
+ title={ESPnet-SPK: full pipeline speaker embedding toolkit with reproducible recipes, self-supervised front-ends, and off-the-shelf models},
277
+ author={Jung, Jee-weon and Zhang, Wangyou and Shi, Jiatong and Aldeneh, Zakaria and Higuchi, Takuya and Theobald, Barry-John and Abdelaziz, Ahmed Hussen and Watanabe, Shinji},
278
+ journal={arXiv preprint arXiv:2401.17230},
279
+ year={2024}
280
+ }
281
+
282
  @inproceedings{watanabe2018espnet,
283
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
284
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
285
  year={2018},
286
+ booktitle={Proc. Interspeech},
287
  pages={2207--2211},
288
  doi={10.21437/Interspeech.2018-1456},
289
  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
290
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  ```