Alireza1044 commited on
Commit
1fb5f36
1 Parent(s): 77084b9
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ checkpoint-*/
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - glue
9
+ metrics:
10
+ - spearmanr
11
+ model_index:
12
+ - name: stsb
13
+ results:
14
+ - task:
15
+ name: Text Classification
16
+ type: text-classification
17
+ dataset:
18
+ name: GLUE STSB
19
+ type: glue
20
+ args: stsb
21
+ metric:
22
+ name: Spearmanr
23
+ type: spearmanr
24
+ value: 0.9050744778895732
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # stsb
31
+
32
+ This model is a fine-tuned version of [albert-base-v2](https://huggingface.co/albert-base-v2) on the GLUE STSB dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 0.3978
35
+ - Pearson: 0.9090
36
+ - Spearmanr: 0.9051
37
+ - Combined Score: 0.9071
38
+
39
+ ## Model description
40
+
41
+ More information needed
42
+
43
+ ## Intended uses & limitations
44
+
45
+ More information needed
46
+
47
+ ## Training and evaluation data
48
+
49
+ More information needed
50
+
51
+ ## Training procedure
52
+
53
+ ### Training hyperparameters
54
+
55
+ The following hyperparameters were used during training:
56
+ - learning_rate: 3e-05
57
+ - train_batch_size: 64
58
+ - eval_batch_size: 8
59
+ - seed: 42
60
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
+ - lr_scheduler_type: linear
62
+ - num_epochs: 4.0
63
+
64
+ ### Training results
65
+
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.9.0
71
+ - Pytorch 1.9.0+cu102
72
+ - Datasets 1.10.2
73
+ - Tokenizers 0.10.3
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_combined_score": 0.9070512696741792,
4
+ "eval_loss": 0.3978178799152374,
5
+ "eval_pearson": 0.9090280614587851,
6
+ "eval_runtime": 13.6802,
7
+ "eval_samples": 1500,
8
+ "eval_samples_per_second": 109.648,
9
+ "eval_spearmanr": 0.9050744778895732,
10
+ "eval_steps_per_second": 13.743,
11
+ "train_loss": 0.43474773830837676,
12
+ "train_runtime": 506.7423,
13
+ "train_samples": 5749,
14
+ "train_samples_per_second": 45.38,
15
+ "train_steps_per_second": 0.71
16
+ }
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "finetuning_task": "stsb",
13
+ "gap_size": 0,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0,
16
+ "hidden_size": 768,
17
+ "id2label": {
18
+ "0": "LABEL_0"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "inner_group_num": 1,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "LABEL_0": 0
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "albert",
29
+ "net_structure_type": 0,
30
+ "num_attention_heads": 12,
31
+ "num_hidden_groups": 1,
32
+ "num_hidden_layers": 12,
33
+ "num_memory_blocks": 0,
34
+ "pad_token_id": 0,
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "regression",
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.9.0",
39
+ "type_vocab_size": 2,
40
+ "vocab_size": 30000
41
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_combined_score": 0.9070512696741792,
4
+ "eval_loss": 0.3978178799152374,
5
+ "eval_pearson": 0.9090280614587851,
6
+ "eval_runtime": 13.6802,
7
+ "eval_samples": 1500,
8
+ "eval_samples_per_second": 109.648,
9
+ "eval_spearmanr": 0.9050744778895732,
10
+ "eval_steps_per_second": 13.743
11
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fc22d84559cadd2b5bce10cc8caac94c27da6d99c529d76bf426f3179425fd
3
+ size 46752465
runs/Jul26_10-48-21_87e5b6406975/1627296525.161824/events.out.tfevents.1627296525.87e5b6406975.485.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778b654c978867b2cb0d7a9c4a094ebe63ccaa4843767412b60b2d1d53b98784
3
+ size 4167
runs/Jul26_10-48-21_87e5b6406975/events.out.tfevents.1627296525.87e5b6406975.485.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde7b06c86b4876bfd8bdc13d98596a6c5fbb3e0ec86ebbc8c57a4e997825d38
3
+ size 3548
runs/Jul26_10-48-21_87e5b6406975/events.out.tfevents.1627297045.87e5b6406975.485.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b52aabf9d844fae40928fec3984949d7fe04cc3ab4b022e11c0a6efedb66424
3
+ size 473
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "remove_space": true, "keep_accents": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "albert-base-v2", "tokenizer_class": "AlbertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.43474773830837676,
4
+ "train_runtime": 506.7423,
5
+ "train_samples": 5749,
6
+ "train_samples_per_second": 45.38,
7
+ "train_steps_per_second": 0.71
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "global_step": 360,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 4.0,
12
+ "step": 360,
13
+ "total_flos": 137376605764608.0,
14
+ "train_loss": 0.43474773830837676,
15
+ "train_runtime": 506.7423,
16
+ "train_samples_per_second": 45.38,
17
+ "train_steps_per_second": 0.71
18
+ }
19
+ ],
20
+ "max_steps": 360,
21
+ "num_train_epochs": 4,
22
+ "total_flos": 137376605764608.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c592deb26553180813296cece5490d0e30ccd681013f57a4a94259490592f22
3
+ size 2607