sultan commited on
Commit
970b113
1 Parent(s): 94c9987

Initial commit

Browse files
Files changed (4) hide show
  1. README.md +88 -0
  2. config.json +28 -0
  3. pytorch_model.bin +3 -0
  4. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BioM-Transformers: Building Large Biomedical Language Models with BERT, ALBERT and ELECTRA
2
+
3
+ # Abstract
4
+
5
+
6
+ The impact of design choices on the performance
7
+ of biomedical language models recently
8
+ has been a subject for investigation. In
9
+ this paper, we empirically study biomedical
10
+ domain adaptation with large transformer models
11
+ using different design choices. We evaluate
12
+ the performance of our pretrained models
13
+ against other existing biomedical language
14
+ models in the literature. Our results show that
15
+ we achieve state-of-the-art results on several
16
+ biomedical domain tasks despite using similar
17
+ or less computational cost compared to other
18
+ models in the literature. Our findings highlight
19
+ the significant effect of design choices on
20
+ improving the performance of biomedical language
21
+ models.
22
+
23
+ # Model Description
24
+ - This model is fine-tuned on the SQuAD2.0 dataset and then on the BioASQ8B-Factoid training dataset. We convert the BioASQ8B-Factoid training dataset to SQuAD1.1 format and train and evaluate our model (BioM-ELECTRA-Base-SQuAD2) on this dataset.
25
+
26
+ - Please note that this version (PyTorch) is different than what we used in our participation in BioASQ9B (TensorFlow with Layer-Wise Decay). We combine all five batches of the BioASQ8B testing dataset as one dev.json file.
27
+
28
+ - Below is unofficial results of our models against the original ELECTRA base and large :
29
+
30
+
31
+ | Model | Exact Match (EM) | F1 Score |
32
+ | --- | --- | --- |
33
+ | ELECTRA-Base-SQuAD2 | 61.89 | 74.39 |
34
+ | BioM-ELECTRA-Base-SQuAD2-BioASQ8B | 70.31 | 80.90 |
35
+ | ELECTRA-Large-SQuAD2 | 67.36 | 78.90 |
36
+ | BioM-ELECTRA-Large-SQuAD2-BioASQ8B | 74.31 | 84.72 |
37
+
38
+
39
+
40
+ Training script
41
+
42
+ ```python
43
+ python3 run_squad.py --model_type electra --model_name_or_path sultan/BioM-ELECTRA-Base-SQuAD2 \
44
+ --train_file BioASQ8B/train.json \
45
+ --predict_file BioASQ8B/dev.json \
46
+ --do_lower_case \
47
+ --do_train \
48
+ --do_eval \
49
+ --threads 20 \
50
+ --version_2_with_negative \
51
+ --num_train_epochs 3 \
52
+ --learning_rate 3e-5 \
53
+ --max_seq_length 512 \
54
+ --doc_stride 128 \
55
+ --per_gpu_train_batch_size 8 \
56
+ --gradient_accumulation_steps 2 \
57
+ --per_gpu_eval_batch_size 128 \
58
+ --logging_steps 50 \
59
+ --save_steps 5000 \
60
+ --fp16 \
61
+ --fp16_opt_level O1 \
62
+ --overwrite_output_dir \
63
+ --output_dir BioM-ELECTRA-Base-SQuAD-BioASQ \
64
+ --overwrite_cache
65
+ ```
66
+
67
+ # Acknowledgment
68
+
69
+ We would like to acknowledge the support we have from Tensorflow Research Cloud (TFRC) team to grant us access to TPUv3 units.
70
+
71
+ # Citation
72
+
73
+
74
+ ```bibtex
75
+ @inproceedings{alrowili-shanker-2021-biom,
76
+ title = "{B}io{M}-Transformers: Building Large Biomedical Language Models with {BERT}, {ALBERT} and {ELECTRA}",
77
+ author = "Alrowili, Sultan and
78
+ Shanker, Vijay",
79
+ booktitle = "Proceedings of the 20th Workshop on Biomedical Language Processing",
80
+ month = jun,
81
+ year = "2021",
82
+ address = "Online",
83
+ publisher = "Association for Computational Linguistics",
84
+ url = "https://www.aclweb.org/anthology/2021.bionlp-1.24",
85
+ pages = "221--227",
86
+ abstract = "The impact of design choices on the performance of biomedical language models recently has been a subject for investigation. In this paper, we empirically study biomedical domain adaptation with large transformer models using different design choices. We evaluate the performance of our pretrained models against other existing biomedical language models in the literature. Our results show that we achieve state-of-the-art results on several biomedical domain tasks despite using similar or less computational cost compared to other models in the literature. Our findings highlight the significant effect of design choices on improving the performance of biomedical language models.",
87
+ }
88
+ ```
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sultan/BioM-ELECTRA-Large-SQuAD2",
3
+ "architectures": [
4
+ "ElectraForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "embedding_size": 1024,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4096,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "electra",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "summary_activation": "gelu",
21
+ "summary_last_dropout": 0.1,
22
+ "summary_type": "first",
23
+ "summary_use_proj": true,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.9.0.dev0",
26
+ "type_vocab_size": 2,
27
+ "vocab_size": 28895
28
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a735bb061833678abb0c0be4c4a6d2db8c75c8b66fd77e138f3d8679d99602f
3
+ size 1329876465
vocab.txt ADDED
The diff for this file is too large to render. See raw diff