rntc commited on
Commit
8e2645c
1 Parent(s): 86cc3e7

update citation

Browse files
Files changed (1) hide show
  1. README.md +20 -7
README.md CHANGED
@@ -129,15 +129,28 @@ To ensure reliability, we averaged over 10 evaluations with different seeds.
129
  ## Citation information
130
 
131
  ```bibtex
132
- @misc{touchent2023camembertbio,
133
- title={CamemBERT-bio: a Tasty French Language Model Better for your Health},
134
- author={Rian Touchent and Laurent Romary and Eric de la Clergerie},
135
- year={2023},
136
- eprint={2306.15550},
137
- archivePrefix={arXiv},
138
- primaryClass={cs.CL}
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
 
 
141
  @inproceedings{touchent:hal-04130187,
142
  TITLE = {{CamemBERT-bio : Un mod{\`e}le de langue fran{\c c}ais savoureux et meilleur pour la sant{\'e}}},
143
  AUTHOR = {Touchent, Rian and Romary, Laurent and De La Clergerie, Eric},
 
129
  ## Citation information
130
 
131
  ```bibtex
132
+
133
+ @inproceedings{touchent-de-la-clergerie-2024-camembert-bio,
134
+ title = "{C}amem{BERT}-bio: Leveraging Continual Pre-training for Cost-Effective Models on {F}rench Biomedical Data",
135
+ author = "Touchent, Rian and
136
+ de la Clergerie, {\'E}ric",
137
+ editor = "Calzolari, Nicoletta and
138
+ Kan, Min-Yen and
139
+ Hoste, Veronique and
140
+ Lenci, Alessandro and
141
+ Sakti, Sakriani and
142
+ Xue, Nianwen",
143
+ booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
144
+ month = may,
145
+ year = "2024",
146
+ address = "Torino, Italia",
147
+ publisher = "ELRA and ICCL",
148
+ url = "https://aclanthology.org/2024.lrec-main.241",
149
+ pages = "2692--2701",
150
+ abstract = "Clinical data in hospitals are increasingly accessible for research through clinical data warehouses. However these documents are unstructured and it is therefore necessary to extract information from medical reports to conduct clinical studies. Transfer learning with BERT-like models such as CamemBERT has allowed major advances for French, especially for named entity recognition. However, these models are trained for plain language and are less efficient on biomedical data. Addressing this gap, we introduce CamemBERT-bio, a dedicated French biomedical model derived from a new public French biomedical dataset. Through continual pre-training of the original CamemBERT, CamemBERT-bio achieves an improvement of 2.54 points of F1-score on average across various biomedical named entity recognition tasks, reinforcing the potential of continual pre-training as an equally proficient yet less computationally intensive alternative to training from scratch. Additionally, we highlight the importance of using a standard evaluation protocol that provides a clear view of the current state-of-the-art for French biomedical models.",
151
  }
152
 
153
+
154
  @inproceedings{touchent:hal-04130187,
155
  TITLE = {{CamemBERT-bio : Un mod{\`e}le de langue fran{\c c}ais savoureux et meilleur pour la sant{\'e}}},
156
  AUTHOR = {Touchent, Rian and Romary, Laurent and De La Clergerie, Eric},