@article{xie2024sonicvisionlm, title={SonicVisionLM: Playing Sound with Vision Language Models}, author={Xie, Zhifeng and Yu, Shengye and Li, Mengtian and He, Qile and Chen, Chaofeng and Jiang, Yu-Gang}, journal={arXiv preprint arXiv:2401.04394}, year={2024} }