thanks to NVIDIA ❤

7934b29 about 2 years ago

8.43 kB

	@article{devlin2018bert,
	title={Bert: Pre-training of deep bidirectional transformers for language understanding},
	author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
	journal={arXiv preprint arXiv:1810.04805},
	year={2018}
	}

	@article{shoeybi2019megatron,
	title={Megatron-lm: Training multi-billion parameter language models using model parallelism},
	author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
	journal={arXiv preprint arXiv:1909.08053},
	year={2019}
	}

	@InProceedings{maas2011,
	author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},
	title = {Learning Word Vectors for Sentiment Analysis},
	booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
	month = {June},
	year = {2011},
	address = {Portland, Oregon, USA},
	publisher = {Association for Computational Linguistics},
	pages = {142--150},
	url = {http://www.aclweb.org/anthology/P11-1015}
	}

	@inproceedings{socher2013,
	title = "Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank",
	author = "Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning, Christopher D. and Ng, Andrew and Potts, Christopher",
	booktitle = "Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing",
	month = oct,
	year = "2013",
	address = "Seattle, Washington, USA",
	publisher = "Association for Computational Linguistics",
	url = "https://www.aclweb.org/anthology/D13-1170",
	pages = "1631--1642",
	}

	@article{lim2018chemical,
	title={Chemical--gene relation extraction using recursive neural network},
	author={Lim, Sangrak and Kang, Jaewoo},
	journal={Database},
	volume={2018},
	year={2018},
	publisher={Oxford Academic}
	}

	@inproceedings{li2007scalable,
	title={Scalable term selection for text categorization},
	author={Li, Jingyang and Sun, Maosong},
	booktitle={Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)},
	pages={774--782},
	year={2007}
	}

	@misc{lee2019biobert,
	title={BioBERT: a pre-trained biomedical language representation model for biomedical text mining},
	author={Jinhyuk Lee and Wonjin Yoon and Sungdong Kim and Donghyeon Kim and Sunkyu Kim and Chan Ho So and Jaewoo Kang},
	year={2019},
	eprint={1901.08746},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}

	@misc{shin2020biomegatron,
	title={BioMegatron: Larger Biomedical Domain Language Model},
	author={Hoo-Chang Shin and Yang Zhang and Evelina Bakhturina and Raul Puri and Mostofa Patwary and Mohammad Shoeybi and Raghav Mani},
	year={2020},
	eprint={2010.06060},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}

	@inproceedings{vaswani2017attention,
	title={Attention is all you need},
	author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
	booktitle={Advances in Neural Information Processing Systems},
	pages={6000--6010},
	year={2017}
	}

	@article{sennrich2015neural,
	title={Neural machine translation of rare words with subword units},
	author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
	journal={arXiv preprint arXiv:1508.07909},
	year={2015}
	}

	@article{provilkov2019bpe,
	title={Bpe-dropout: Simple and effective subword regularization},
	author={Provilkov, Ivan and Emelianenko, Dmitrii and Voita, Elena},
	journal={arXiv preprint arXiv:1910.13267},
	year={2019}
	}

	@article{post2018call,
	title={A call for clarity in reporting BLEU scores},
	author={Post, Matt},
	journal={arXiv preprint arXiv:1804.08771},
	year={2018}
	}

	@misc{zhang2021sgdqa,
	title={SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services},
	author={Yang Zhang and Vahid Noroozi and Evelina Bakhturina and Boris Ginsburg},
	year={2021},
	eprint={2105.08049},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}

	@article{zhang2019neural,
	title={Neural Models of Text Normalization for Speech Applications},
	author={Hao Zhang and R. Sproat and Axel H. Ng and Felix Stahlberg and Xiaochang Peng and Kyle Gorman and B. Roark},
	journal={Computational Linguistics},
	year={2019},
	pages={293-338}
	}

	@misc{liu2021selfalignment,
	title={Self-Alignment Pretraining for Biomedical Entity Representations},
	author={Fangyu Liu and Ehsan Shareghi and Zaiqiao Meng and Marco Basaldella and Nigel Collier},
	year={2021},
	eprint={2010.11784},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}

	@article{gulcehre2015using,
	title={On using monolingual corpora in neural machine translation},
	author={Gulcehre, Caglar and Firat, Orhan and Xu, Kelvin and Cho, Kyunghyun and Barrault, Loic and Lin, Huei-Chi and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
	journal={arXiv preprint arXiv:1503.03535},
	year={2015}
	}

	@article{yee2019simple,
	title={Simple and effective noisy channel modeling for neural machine translation},
	author={Yee, Kyra and Ng, Nathan and Dauphin, Yann N and Auli, Michael},
	journal={arXiv preprint arXiv:1908.05731},
	year={2019}
	}

	@inproceedings{koehnetal2007moses,
	title = "{M}oses: Open Source Toolkit for Statistical Machine Translation",
	author = "Koehn, Philipp and
	Hoang, Hieu and
	Birch, Alexandra and
	Callison-Burch, Chris and
	Federico, Marcello and
	Bertoldi, Nicola and
	Cowan, Brooke and
	Shen, Wade and
	Moran, Christine and
	Zens, Richard and
	Dyer, Chris and
	Bojar, Ond{\v{r}}ej and
	Constantin, Alexandra and
	Herbst, Evan",
	booktitle = "Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics Companion Volume Proceedings of the Demo and Poster Sessions",
	month = jun,
	year = "2007",
	address = "Prague, Czech Republic",
	publisher = "Association for Computational Linguistics",
	url = "https://aclanthology.org/P07-2045",
	pages = "177--180",
	}

	@inproceedings{sunkara20_interspeech,
	author={Monica Sunkara and Srikanth Ronanki and Dhanush Bekal and Sravan Bodapati and Katrin Kirchhoff},
	title={{Multimodal Semi-Supervised Learning Framework for Punctuation Prediction in Conversational Speech}},
	year=2020,
	booktitle={Proc. Interspeech 2020},
	pages={4911--4915},
	doi={10.21437/Interspeech.2020-3074}
	}

	@article{chen2019bert,
	title={Bert for joint intent classification and slot filling},
	author={Chen, Qian and Zhuo, Zhu and Wang, Wen},
	journal={arXiv preprint arXiv:1902.10909},
	year={2019}
	}

	@article{borgeaud2021improving,
	title={Improving language models by retrieving from trillions of tokens},
	author={Borgeaud, Sebastian and Mensch, Arthur and Hoffmann, Jordan and Cai, Trevor and Rutherford, Eliza and Millican, Katie and Driessche, George van den and Lespiau, Jean-Baptiste and Damoc, Bogdan and Clark, Aidan and others},
	journal={arXiv preprint arXiv:2112.04426},
	year={2021}
	}

	@article{su2021roformer,
	title={Roformer: Enhanced transformer with rotary position embedding},
	author={Su, Jianlin and Lu, Yu and Pan, Shengfeng and Wen, Bo and Liu, Yunfeng},
	journal={arXiv preprint arXiv:2104.09864},
	year={2021}
	}

	@article{reimers2019sentence,
	title={Sentence-bert: Sentence embeddings using siamese bert-networks},
	author={Reimers, Nils and Gurevych, Iryna},
	journal={arXiv preprint arXiv:1908.10084},
	year={2019}
	}

	@article{yang2022tensor,
	title={Tensor Programs V: Tuning Large Neural Networks via Zero-Shot Hyperparameter Transfer},
	author={Yang, Greg and Hu, Edward J and Babuschkin, Igor and Sidor, Szymon and Liu, Xiaodong and Farhi, David and Ryder, Nick and Pachocki, Jakub and Chen, Weizhu and Gao, Jianfeng},
	journal={arXiv preprint arXiv:2203.03466},
	year={2022}
	}

	@article{jegou2022faiss,
	title={Faiss: Similarity search and clustering of dense vectors library},
	author={J{\'e}gou, Herv{\'e} and Douze, Matthijs and Johnson, Jeff and Hosseini, Lucas and Deng, Chengqi},
	journal={Astrophysics Source Code Library},
	pages={ascl--2210},
	year={2022}
	}