ti250 commited on
Commit
b6676cd
1 Parent(s): 084f39f

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,41 @@
1
  ---
2
- license: mit
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - generated_from_trainer
4
+ datasets:
5
+ - squad_v2
6
+ model-index:
7
+ - name: opticalbert_cased-squad2
8
+ results: []
9
  ---
10
+
11
+ # opticalbert_cased-squad2
12
+
13
+ This model is a fine-tuned version of [opticalmaterials/opticalbert_cased](https://huggingface.co/opticalmaterials/opticalbert_cased) on the squad_v2 dataset.
14
+
15
+ ## Training procedure
16
+
17
+ ### Training hyperparameters
18
+
19
+ The following hyperparameters were used during training:
20
+ - learning_rate: 3e-05
21
+ - train_batch_size: 12
22
+ - eval_batch_size: 8
23
+ - seed: 0
24
+ - distributed_type: multi-GPU
25
+ - num_devices: 20
26
+ - total_train_batch_size: 240
27
+ - total_eval_batch_size: 160
28
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
29
+ - lr_scheduler_type: linear
30
+ - num_epochs: 10.0
31
+
32
+ ### Framework versions
33
+
34
+ - Transformers 4.25.1
35
+ - Pytorch 1.12.0a0+git664058f
36
+ - Datasets 2.7.1
37
+ - Tokenizers 0.12.1
38
+
39
+ ## Acknowledgements
40
+
41
+ This model was trained for the paper "How beneficial is pre-training on a narrow domain-specific corpus for information extraction about photocatalytic water splitting?" by Taketomo Isazawa and Jacqueline M. Cole. J.M.C. is grateful for the BASF/Royal Academy of Engineering Research Chair in Data-Driven Molecular Engineering of Functional Materials, which includes PhD studentship support (for T.I.). This Chair is also partly supported by the Science and Technology Facilities Council. They are also indebted to the Argonne Leadership Computing Facility, which is a DOE Office of Science Facility, for use of its research resources, under contract No. DE-AC02-06CH11357.
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_HasAns_exact": 71.693657219973,
4
+ "eval_HasAns_f1": 79.04795108852238,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 76.9049621530698,
7
+ "eval_NoAns_f1": 76.9049621530698,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 74.30304051208624,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 77.97492243348442,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 74.30304051208624,
14
+ "eval_f1": 77.97492243348445,
15
+ "eval_samples": 11974,
16
+ "eval_total": 11873,
17
+ "train_loss": 0.6307362275965074,
18
+ "train_runtime": 1688.4541,
19
+ "train_samples": 130544,
20
+ "train_samples_per_second": 773.157,
21
+ "train_steps_per_second": 3.222
22
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "opticalmaterials/opticalbert_cased",
3
+ "architectures": [
4
+ "BertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 28996
26
+ }
eval_nbest_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5d7be22dd6abdc6bc41a2e3bbd23b333a41ee172db79cbaae2509163b614b0
3
+ size 56055087
eval_null_odds.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_HasAns_exact": 71.693657219973,
4
+ "eval_HasAns_f1": 79.04795108852238,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 76.9049621530698,
7
+ "eval_NoAns_f1": 76.9049621530698,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 74.30304051208624,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 77.97492243348442,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 74.30304051208624,
14
+ "eval_f1": 77.97492243348445,
15
+ "eval_samples": 11974,
16
+ "eval_total": 11873
17
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d1205148d1a2e34cc1922d743eb9fd7de68cd812be0d75c5d4f7ba3d187c7e
3
+ size 430955313
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": false,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "opticalmaterials/opticalbert_cased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]",
14
+ "use_fast": true
15
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.6307362275965074,
4
+ "train_runtime": 1688.4541,
5
+ "train_samples": 130544,
6
+ "train_samples_per_second": 773.157,
7
+ "train_steps_per_second": 3.222
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 5440,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.92,
12
+ "learning_rate": 2.724264705882353e-05,
13
+ "loss": 1.6219,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.84,
18
+ "learning_rate": 2.448529411764706e-05,
19
+ "loss": 1.0069,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 2.76,
24
+ "learning_rate": 2.1727941176470588e-05,
25
+ "loss": 0.8043,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 3.68,
30
+ "learning_rate": 1.8970588235294116e-05,
31
+ "loss": 0.6756,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 4.6,
36
+ "learning_rate": 1.6213235294117647e-05,
37
+ "loss": 0.5709,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 5.51,
42
+ "learning_rate": 1.3455882352941177e-05,
43
+ "loss": 0.4862,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 6.43,
48
+ "learning_rate": 1.0698529411764707e-05,
49
+ "loss": 0.4233,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 7.35,
54
+ "learning_rate": 7.941176470588236e-06,
55
+ "loss": 0.3724,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 8.27,
60
+ "learning_rate": 5.183823529411765e-06,
61
+ "loss": 0.3353,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 9.19,
66
+ "learning_rate": 2.4264705882352943e-06,
67
+ "loss": 0.3038,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 10.0,
72
+ "step": 5440,
73
+ "total_flos": 2.0576085260160205e+17,
74
+ "train_loss": 0.6307362275965074,
75
+ "train_runtime": 1688.4541,
76
+ "train_samples_per_second": 773.157,
77
+ "train_steps_per_second": 3.222
78
+ }
79
+ ],
80
+ "max_steps": 5440,
81
+ "num_train_epochs": 10,
82
+ "total_flos": 2.0576085260160205e+17,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f7396818ebacd379214c25c8f96df8bf8deb231006ecf201c21040b0b20281
3
+ size 4207
vocab.txt ADDED
The diff for this file is too large to render. See raw diff