File size: 2,277 Bytes
b386992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@inproceedings{jukic2023flexible,
  title={Flexible multichannel speech enhancement for noise-robust frontend},
  author={Jukić, Ante and Balam, Jagadeesh and Ginsburg, Boris},
  booktitle={Proc. WASPAA},
  year={2023}
}

@inproceedings{ito2016directional,
  title={Complex angular central Gaussian mixture model for directional statistics in mask-based microphone array signal processing},
  author={Ito, Nobutaka and Araki, Shoko and Nakatani, Tomohiro},
  booktitle={Proc. EUSIPCO},
  year={2016}
}

@inproceedings{jukic2024sb,
  title={Schrödinger Bridge for Generative Speech Enhancement},
  author={Ante Juki\'{c} and Roman Korostik and Jagadeesh Balam and Boris Ginsburg},
  year={2024},
  pages={1175-1179},
  booktitle={Proc. Interspeech}
}

@inproceedings{welker2022speech,
  author={Simon Welker and Julius Richter and Timo Gerkmann},
  title={Speech Enhancement with Score-Based Generative Models in the Complex {STFT} Domain},
  year={2022},
  pages={2928-2932},
  booktitle={Proc. Interspeech}
}

@article{richter2023sgmse,
  author  = {Richter, Julius and Welker, Simon and Lemercier, Jean-Marie and Lay, Bunlong and Gerkmann, Timo},
  title   = {{Speech Enhancement and Dereverberation with Diffusion-Based Generative Models}},
  journal = {IEEE/ACM Trans. on Audio, Speech, and Language Process.},
  volume  = {31},
  pages   = {2351-2364},
  year    = {2023}
}

@article{ku2024generative,
  title={Generative Speech Foundation Model Pretraining for High-Quality Speech Extraction and Restoration},
  author={Pin-Jui Ku and Alexander H. Liu and Roman Korostik and Sung-Feng Huang and Szu-Wei Fu and Ante Jukić},
  journal={arXiv preprint arXiv:2409.16117},
  year={2024},
}

@souden{souden2010,
  author={Souden, Mehrez and Benesty, Jacob and Affes, SofiÈne},
  journal={IEEE Transactions on Audio, Speech, and Language Processing}, 
  title={On Optimal Frequency-Domain Multichannel Linear Filtering for Noise Reduction}, 
  year={2010},
  volume={18},
  number={2},
  pages={260-276}
}

@inproceedings{
lipman2023flow,
title={Flow Matching for Generative Modeling},
author={Yaron Lipman and Ricky T. Q. Chen and Heli Ben-Hamu and Maximilian Nickel and Matthew Le},
booktitle={Proc. ICLR},
year={2023},
url={https://openreview.net/forum?id=PqvMRDCJT9t}
}