@article{devlin2018bert, | |
title={Bert: Pre-training of deep bidirectional transformers for language understanding}, | |
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, | |
journal={arXiv preprint arXiv:1810.04805}, | |
year={2018} | |
} | |
@article{shoeybi2019megatron, | |
title={Megatron-lm: Training multi-billion parameter language models using model parallelism}, | |
author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan}, | |
journal={arXiv preprint arXiv:1909.08053}, | |
year={2019} | |
} | |
@InProceedings{maas2011, | |
author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher}, | |
title = {Learning Word Vectors for Sentiment Analysis}, | |
booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}, | |
month = {June}, | |
year = {2011}, | |
address = {Portland, Oregon, USA}, | |
publisher = {Association for Computational Linguistics}, | |
pages = {142--150}, | |
url = {http://www.aclweb.org/anthology/P11-1015} | |
} | |
@inproceedings{socher2013, | |
title = "Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank", | |
author = "Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning, Christopher D. and Ng, Andrew and Potts, Christopher", | |
booktitle = "Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing", | |
month = oct, | |
year = "2013", | |
address = "Seattle, Washington, USA", | |
publisher = "Association for Computational Linguistics", | |
url = "https://www.aclweb.org/anthology/D13-1170", | |
pages = "1631--1642", | |
} | |
@article{lim2018chemical, | |
title={Chemical--gene relation extraction using recursive neural network}, | |
author={Lim, Sangrak and Kang, Jaewoo}, | |
journal={Database}, | |
volume={2018}, | |
year={2018}, | |
publisher={Oxford Academic} | |
} | |
@inproceedings{li2007scalable, | |
title={Scalable term selection for text categorization}, | |
author={Li, Jingyang and Sun, Maosong}, | |
booktitle={Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)}, | |
pages={774--782}, | |
year={2007} | |
} | |
@misc{lee2019biobert, | |
title={BioBERT: a pre-trained biomedical language representation model for biomedical text mining}, | |
author={Jinhyuk Lee and Wonjin Yoon and Sungdong Kim and Donghyeon Kim and Sunkyu Kim and Chan Ho So and Jaewoo Kang}, | |
year={2019}, | |
eprint={1901.08746}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
@misc{shin2020biomegatron, | |
title={BioMegatron: Larger Biomedical Domain Language Model}, | |
author={Hoo-Chang Shin and Yang Zhang and Evelina Bakhturina and Raul Puri and Mostofa Patwary and Mohammad Shoeybi and Raghav Mani}, | |
year={2020}, | |
eprint={2010.06060}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
@inproceedings{vaswani2017attention, | |
title={Attention is all you need}, | |
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, | |
booktitle={Advances in Neural Information Processing Systems}, | |
pages={6000--6010}, | |
year={2017} | |
} | |
@article{sennrich2015neural, | |
title={Neural machine translation of rare words with subword units}, | |
author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra}, | |
journal={arXiv preprint arXiv:1508.07909}, | |
year={2015} | |
} | |
@article{provilkov2019bpe, | |
title={Bpe-dropout: Simple and effective subword regularization}, | |
author={Provilkov, Ivan and Emelianenko, Dmitrii and Voita, Elena}, | |
journal={arXiv preprint arXiv:1910.13267}, | |
year={2019} | |
} | |
@article{post2018call, | |
title={A call for clarity in reporting BLEU scores}, | |
author={Post, Matt}, | |
journal={arXiv preprint arXiv:1804.08771}, | |
year={2018} | |
} | |
@misc{zhang2021sgdqa, | |
title={SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services}, | |
author={Yang Zhang and Vahid Noroozi and Evelina Bakhturina and Boris Ginsburg}, | |
year={2021}, | |
eprint={2105.08049}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
@article{zhang2019neural, | |
title={Neural Models of Text Normalization for Speech Applications}, | |
author={Hao Zhang and R. Sproat and Axel H. Ng and Felix Stahlberg and Xiaochang Peng and Kyle Gorman and B. Roark}, | |
journal={Computational Linguistics}, | |
year={2019}, | |
pages={293-338} | |
} | |
@misc{liu2021selfalignment, | |
title={Self-Alignment Pretraining for Biomedical Entity Representations}, | |
author={Fangyu Liu and Ehsan Shareghi and Zaiqiao Meng and Marco Basaldella and Nigel Collier}, | |
year={2021}, | |
eprint={2010.11784}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
@article{gulcehre2015using, | |
title={On using monolingual corpora in neural machine translation}, | |
author={Gulcehre, Caglar and Firat, Orhan and Xu, Kelvin and Cho, Kyunghyun and Barrault, Loic and Lin, Huei-Chi and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua}, | |
journal={arXiv preprint arXiv:1503.03535}, | |
year={2015} | |
} | |
@article{yee2019simple, | |
title={Simple and effective noisy channel modeling for neural machine translation}, | |
author={Yee, Kyra and Ng, Nathan and Dauphin, Yann N and Auli, Michael}, | |
journal={arXiv preprint arXiv:1908.05731}, | |
year={2019} | |
} | |
@inproceedings{koehnetal2007moses, | |
title = "{M}oses: Open Source Toolkit for Statistical Machine Translation", | |
author = "Koehn, Philipp and | |
Hoang, Hieu and | |
Birch, Alexandra and | |
Callison-Burch, Chris and | |
Federico, Marcello and | |
Bertoldi, Nicola and | |
Cowan, Brooke and | |
Shen, Wade and | |
Moran, Christine and | |
Zens, Richard and | |
Dyer, Chris and | |
Bojar, Ond{\v{r}}ej and | |
Constantin, Alexandra and | |
Herbst, Evan", | |
booktitle = "Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics Companion Volume Proceedings of the Demo and Poster Sessions", | |
month = jun, | |
year = "2007", | |
address = "Prague, Czech Republic", | |
publisher = "Association for Computational Linguistics", | |
url = "https://aclanthology.org/P07-2045", | |
pages = "177--180", | |
} | |
@inproceedings{sunkara20_interspeech, | |
author={Monica Sunkara and Srikanth Ronanki and Dhanush Bekal and Sravan Bodapati and Katrin Kirchhoff}, | |
title={{Multimodal Semi-Supervised Learning Framework for Punctuation Prediction in Conversational Speech}}, | |
year=2020, | |
booktitle={Proc. Interspeech 2020}, | |
pages={4911--4915}, | |
doi={10.21437/Interspeech.2020-3074} | |
} | |
@article{chen2019bert, | |
title={Bert for joint intent classification and slot filling}, | |
author={Chen, Qian and Zhuo, Zhu and Wang, Wen}, | |
journal={arXiv preprint arXiv:1902.10909}, | |
year={2019} | |
} | |
@article{borgeaud2021improving, | |
title={Improving language models by retrieving from trillions of tokens}, | |
author={Borgeaud, Sebastian and Mensch, Arthur and Hoffmann, Jordan and Cai, Trevor and Rutherford, Eliza and Millican, Katie and Driessche, George van den and Lespiau, Jean-Baptiste and Damoc, Bogdan and Clark, Aidan and others}, | |
journal={arXiv preprint arXiv:2112.04426}, | |
year={2021} | |
} | |
@article{su2021roformer, | |
title={Roformer: Enhanced transformer with rotary position embedding}, | |
author={Su, Jianlin and Lu, Yu and Pan, Shengfeng and Wen, Bo and Liu, Yunfeng}, | |
journal={arXiv preprint arXiv:2104.09864}, | |
year={2021} | |
} | |
@article{reimers2019sentence, | |
title={Sentence-bert: Sentence embeddings using siamese bert-networks}, | |
author={Reimers, Nils and Gurevych, Iryna}, | |
journal={arXiv preprint arXiv:1908.10084}, | |
year={2019} | |
} | |
@article{yang2022tensor, | |
title={Tensor Programs V: Tuning Large Neural Networks via Zero-Shot Hyperparameter Transfer}, | |
author={Yang, Greg and Hu, Edward J and Babuschkin, Igor and Sidor, Szymon and Liu, Xiaodong and Farhi, David and Ryder, Nick and Pachocki, Jakub and Chen, Weizhu and Gao, Jianfeng}, | |
journal={arXiv preprint arXiv:2203.03466}, | |
year={2022} | |
} | |
@article{jegou2022faiss, | |
title={Faiss: Similarity search and clustering of dense vectors library}, | |
author={J{\'e}gou, Herv{\'e} and Douze, Matthijs and Johnson, Jeff and Hosseini, Lucas and Deng, Chengqi}, | |
journal={Astrophysics Source Code Library}, | |
pages={ascl--2210}, | |
year={2022} | |
} | |