jungjee commited on
Commit
cb5d475
1 Parent(s): 00597f6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -18
README.md CHANGED
@@ -27,6 +27,18 @@ pip install -e .
27
  cd egs2/voxceleb/spk1
28
  ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_rawnet3
29
  ```
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  <!-- Generated by scripts/utils/show_spk_result.py -->
32
  # RESULTS
@@ -250,33 +262,31 @@ distributed: true
250
 
251
 
252
 
253
- ### Citing ESPnet
254
 
255
  ```BibTex
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  @inproceedings{watanabe2018espnet,
257
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
258
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
259
  year={2018},
260
- booktitle={Proceedings of Interspeech},
261
  pages={2207--2211},
262
  doi={10.21437/Interspeech.2018-1456},
263
  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
264
  }
265
-
266
-
267
-
268
-
269
-
270
-
271
  ```
272
 
273
- RawNet3:
274
-
275
- ```bibtex
276
- @article{jung2022pushing,
277
- title={Pushing the limits of raw waveform speaker recognition},
278
- author={Jung, Jee-weon and Kim, You Jin and Heo, Hee-Soo and Lee, Bong-Jin and Kwon, Youngki and Chung, Joon Son},
279
- journal={Proc. Interspeech},
280
- year={2022}
281
- }
282
- ```
 
27
  cd egs2/voxceleb/spk1
28
  ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_rawnet3
29
  ```
30
+ ```python
31
+ import numpy as np
32
+ from espnet2.bin.spk_inference import Speech2Embedding
33
+
34
+ # from uploaded models
35
+ speech2spk_embed = Speech2Embedding.from_pretrained(model_tag="espnet/voxcelebs12_rawnet3")
36
+ embedding = speech2spk_embed(np.zeros(16500))
37
+
38
+ # from checkpoints trained by oneself
39
+ speech2spk_embed = Speech2Embedding(model_file="model.pth", train_config="config.yaml")
40
+ embedding = speech2spk_embed(np.zeros(32000))
41
+ ```
42
 
43
  <!-- Generated by scripts/utils/show_spk_result.py -->
44
  # RESULTS
 
262
 
263
 
264
 
265
+ ### Citing
266
 
267
  ```BibTex
268
+ @article{jung2024espnet,
269
+ title={ESPnet-SPK: full pipeline speaker embedding toolkit with reproducible recipes, self-supervised front-ends, and off-the-shelf models},
270
+ author={Jung, Jee-weon and Zhang, Wangyou and Shi, Jiatong and Aldeneh, Zakaria and Higuchi, Takuya and Theobald, Barry-John and Abdelaziz, Ahmed Hussen and Watanabe, Shinji},
271
+ journal={arXiv preprint arXiv:2401.17230},
272
+ year={2024}
273
+ }
274
+ @article{jung2022pushing,
275
+ title={Pushing the limits of raw waveform speaker recognition},
276
+ author={Jung, Jee-weon and Kim, You Jin and Heo, Hee-Soo and Lee, Bong-Jin and Kwon, Youngki and Chung, Joon Son},
277
+ journal={Proc. Interspeech},
278
+ year={2022}
279
+ }
280
+ ```
281
+
282
  @inproceedings{watanabe2018espnet,
283
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
284
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
285
  year={2018},
286
+ booktitle={Proc. Interspeech},
287
  pages={2207--2211},
288
  doi={10.21437/Interspeech.2018-1456},
289
  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
290
  }
 
 
 
 
 
 
291
  ```
292