bayuela commited on
Commit
9fc5870
1 Parent(s): 001b831

New model Version

Browse files
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6991cc6552c5b76dbe1f0de75531c3a899fb22204ca73ad87b7212dd9296b6da
3
- size 14244
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a415030a49c53672ccac7723cf7f1e258eb6d2a6e1da2d25ddca5348e51f0041
3
- size 1064
 
 
 
 
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
trainer_state.json DELETED
@@ -1,143 +0,0 @@
1
- {
2
- "best_metric": 0.2674808204174042,
3
- "best_model_checkpoint": "./results/checkpoint-2416",
4
- "epoch": 3.0,
5
- "eval_steps": 500,
6
- "global_step": 7248,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.20695364238410596,
13
- "grad_norm": 0.06496760994195938,
14
- "learning_rate": 1.862030905077263e-05,
15
- "loss": 0.1879,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.4139072847682119,
20
- "grad_norm": 0.2046840637922287,
21
- "learning_rate": 1.7240618101545256e-05,
22
- "loss": 0.1544,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.6208609271523179,
27
- "grad_norm": 78.30413818359375,
28
- "learning_rate": 1.5860927152317882e-05,
29
- "loss": 0.1443,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.8278145695364238,
34
- "grad_norm": 13.444316864013672,
35
- "learning_rate": 1.448123620309051e-05,
36
- "loss": 0.1214,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 1.0,
41
- "eval_loss": 0.2674808204174042,
42
- "eval_runtime": 68.0862,
43
- "eval_samples_per_second": 31.548,
44
- "eval_steps_per_second": 3.951,
45
- "step": 2416
46
- },
47
- {
48
- "epoch": 1.0347682119205297,
49
- "grad_norm": 11.859747886657715,
50
- "learning_rate": 1.3101545253863135e-05,
51
- "loss": 0.1299,
52
- "step": 2500
53
- },
54
- {
55
- "epoch": 1.2417218543046358,
56
- "grad_norm": 7.801605701446533,
57
- "learning_rate": 1.1721854304635763e-05,
58
- "loss": 0.1358,
59
- "step": 3000
60
- },
61
- {
62
- "epoch": 1.4486754966887418,
63
- "grad_norm": 9.417176246643066,
64
- "learning_rate": 1.034216335540839e-05,
65
- "loss": 0.1331,
66
- "step": 3500
67
- },
68
- {
69
- "epoch": 1.6556291390728477,
70
- "grad_norm": 0.017003627493977547,
71
- "learning_rate": 8.962472406181017e-06,
72
- "loss": 0.1325,
73
- "step": 4000
74
- },
75
- {
76
- "epoch": 1.8625827814569536,
77
- "grad_norm": 7.609455585479736,
78
- "learning_rate": 7.582781456953643e-06,
79
- "loss": 0.1273,
80
- "step": 4500
81
- },
82
- {
83
- "epoch": 2.0,
84
- "eval_loss": 0.2778768837451935,
85
- "eval_runtime": 67.8572,
86
- "eval_samples_per_second": 31.655,
87
- "eval_steps_per_second": 3.964,
88
- "step": 4832
89
- },
90
- {
91
- "epoch": 2.0695364238410594,
92
- "grad_norm": 0.0077050491236150265,
93
- "learning_rate": 6.203090507726269e-06,
94
- "loss": 0.125,
95
- "step": 5000
96
- },
97
- {
98
- "epoch": 2.2764900662251657,
99
- "grad_norm": 9.524530410766602,
100
- "learning_rate": 4.823399558498897e-06,
101
- "loss": 0.0672,
102
- "step": 5500
103
- },
104
- {
105
- "epoch": 2.4834437086092715,
106
- "grad_norm": 3.2392263412475586,
107
- "learning_rate": 3.443708609271523e-06,
108
- "loss": 0.0731,
109
- "step": 6000
110
- },
111
- {
112
- "epoch": 2.6903973509933774,
113
- "grad_norm": 0.4321236312389374,
114
- "learning_rate": 2.06401766004415e-06,
115
- "loss": 0.0818,
116
- "step": 6500
117
- },
118
- {
119
- "epoch": 2.8973509933774837,
120
- "grad_norm": 0.006676756776869297,
121
- "learning_rate": 6.843267108167771e-07,
122
- "loss": 0.0691,
123
- "step": 7000
124
- },
125
- {
126
- "epoch": 3.0,
127
- "eval_loss": 0.30005592107772827,
128
- "eval_runtime": 67.9967,
129
- "eval_samples_per_second": 31.59,
130
- "eval_steps_per_second": 3.956,
131
- "step": 7248
132
- }
133
- ],
134
- "logging_steps": 500,
135
- "max_steps": 7248,
136
- "num_input_tokens_seen": 0,
137
- "num_train_epochs": 3,
138
- "save_steps": 500,
139
- "total_flos": 1.52529694887168e+16,
140
- "train_batch_size": 8,
141
- "trial_name": null,
142
- "trial_params": null
143
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff