sallywww commited on
Commit
f7060f2
1 Parent(s): c89597d
Files changed (32) hide show
  1. config.json +29 -0
  2. merges.txt +0 -0
  3. pytorch_model.bin +3 -0
  4. runs/._Mar21_20-50-00_nv23u9o4yt +0 -0
  5. runs/._Mar21_20-53-18_nv23u9o4yt +0 -0
  6. runs/._Mar21_22-56-19_nv23u9o4yt +0 -0
  7. runs/._Mar21_22-57-47_nv23u9o4yt +0 -0
  8. runs/Mar21_20-50-00_nv23u9o4yt/._1679431813.4022052 +0 -0
  9. runs/Mar21_20-50-00_nv23u9o4yt/._events.out.tfevents.1679431813.nv23u9o4yt.436.0 +3 -0
  10. runs/Mar21_20-50-00_nv23u9o4yt/1679431813.4022052/._events.out.tfevents.1679431813.nv23u9o4yt.436.1 +3 -0
  11. runs/Mar21_20-50-00_nv23u9o4yt/1679431813.4022052/events.out.tfevents.1679431813.nv23u9o4yt.436.1 +3 -0
  12. runs/Mar21_20-50-00_nv23u9o4yt/events.out.tfevents.1679431813.nv23u9o4yt.436.0 +3 -0
  13. runs/Mar21_20-53-18_nv23u9o4yt/._1679432005.2915475 +0 -0
  14. runs/Mar21_20-53-18_nv23u9o4yt/._events.out.tfevents.1679432005.nv23u9o4yt.497.0 +3 -0
  15. runs/Mar21_20-53-18_nv23u9o4yt/1679432005.2915475/._events.out.tfevents.1679432005.nv23u9o4yt.497.1 +3 -0
  16. runs/Mar21_20-53-18_nv23u9o4yt/1679432005.2915475/events.out.tfevents.1679432005.nv23u9o4yt.497.1 +3 -0
  17. runs/Mar21_20-53-18_nv23u9o4yt/events.out.tfevents.1679432005.nv23u9o4yt.497.0 +3 -0
  18. runs/Mar21_22-56-19_nv23u9o4yt/._1679439386.2769547 +0 -0
  19. runs/Mar21_22-56-19_nv23u9o4yt/._events.out.tfevents.1679439386.nv23u9o4yt.2197.0 +3 -0
  20. runs/Mar21_22-56-19_nv23u9o4yt/1679439386.2769547/._events.out.tfevents.1679439386.nv23u9o4yt.2197.1 +3 -0
  21. runs/Mar21_22-56-19_nv23u9o4yt/1679439386.2769547/events.out.tfevents.1679439386.nv23u9o4yt.2197.1 +3 -0
  22. runs/Mar21_22-56-19_nv23u9o4yt/events.out.tfevents.1679439386.nv23u9o4yt.2197.0 +3 -0
  23. runs/Mar21_22-57-47_nv23u9o4yt/._1679439474.4146004 +0 -0
  24. runs/Mar21_22-57-47_nv23u9o4yt/._events.out.tfevents.1679439474.nv23u9o4yt.2261.0 +3 -0
  25. runs/Mar21_22-57-47_nv23u9o4yt/1679439474.4146004/._events.out.tfevents.1679439474.nv23u9o4yt.2261.1 +3 -0
  26. runs/Mar21_22-57-47_nv23u9o4yt/1679439474.4146004/events.out.tfevents.1679439474.nv23u9o4yt.2261.1 +3 -0
  27. runs/Mar21_22-57-47_nv23u9o4yt/events.out.tfevents.1679439474.nv23u9o4yt.2261.0 +3 -0
  28. special_tokens_map.json +30 -0
  29. tokenizer_config.json +42 -0
  30. trainer_state.json +295 -0
  31. training_args.bin +3 -0
  32. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/opt-350m",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "architectures": [
7
+ "OPTForCausalLM"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": false,
12
+ "dropout": 0.1,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 4096,
15
+ "hidden_size": 1024,
16
+ "init_std": 0.02,
17
+ "layerdrop": 0.0,
18
+ "max_position_embeddings": 2048,
19
+ "model_type": "opt",
20
+ "num_attention_heads": 16,
21
+ "num_hidden_layers": 24,
22
+ "pad_token_id": 1,
23
+ "prefix": "</s>",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.24.0",
26
+ "use_cache": true,
27
+ "vocab_size": 50272,
28
+ "word_embed_proj_dim": 512
29
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725deaec29f5aa801947dbf2325a2b2e38059a0e804eada9d4a0ad47d5017ec2
3
+ size 1427866167
runs/._Mar21_20-50-00_nv23u9o4yt ADDED
Binary file (4.1 kB). View file
 
runs/._Mar21_20-53-18_nv23u9o4yt ADDED
Binary file (4.1 kB). View file
 
runs/._Mar21_22-56-19_nv23u9o4yt ADDED
Binary file (4.1 kB). View file
 
runs/._Mar21_22-57-47_nv23u9o4yt ADDED
Binary file (4.1 kB). View file
 
runs/Mar21_20-50-00_nv23u9o4yt/._1679431813.4022052 ADDED
Binary file (4.1 kB). View file
 
runs/Mar21_20-50-00_nv23u9o4yt/._events.out.tfevents.1679431813.nv23u9o4yt.436.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_20-50-00_nv23u9o4yt/1679431813.4022052/._events.out.tfevents.1679431813.nv23u9o4yt.436.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_20-50-00_nv23u9o4yt/1679431813.4022052/events.out.tfevents.1679431813.nv23u9o4yt.436.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a3de860c9172650a7eae13b0386092638dc20edccce596f467991070f85173
3
+ size 5613
runs/Mar21_20-50-00_nv23u9o4yt/events.out.tfevents.1679431813.nv23u9o4yt.436.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a14b7921cb9d8e3bce4cb4995aed211abfa6b643da42093baf5a184c50ac352
3
+ size 40
runs/Mar21_20-53-18_nv23u9o4yt/._1679432005.2915475 ADDED
Binary file (4.1 kB). View file
 
runs/Mar21_20-53-18_nv23u9o4yt/._events.out.tfevents.1679432005.nv23u9o4yt.497.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_20-53-18_nv23u9o4yt/1679432005.2915475/._events.out.tfevents.1679432005.nv23u9o4yt.497.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_20-53-18_nv23u9o4yt/1679432005.2915475/events.out.tfevents.1679432005.nv23u9o4yt.497.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13100a2c667fbb29407191350cb5a434b971cac1aaaf02e4ef65dc5232679883
3
+ size 5604
runs/Mar21_20-53-18_nv23u9o4yt/events.out.tfevents.1679432005.nv23u9o4yt.497.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a50b80f01951f32a9f8e2e336bd4a63d511781b9b1a6b15bd9ae4969524b5b6
3
+ size 4655
runs/Mar21_22-56-19_nv23u9o4yt/._1679439386.2769547 ADDED
Binary file (4.1 kB). View file
 
runs/Mar21_22-56-19_nv23u9o4yt/._events.out.tfevents.1679439386.nv23u9o4yt.2197.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_22-56-19_nv23u9o4yt/1679439386.2769547/._events.out.tfevents.1679439386.nv23u9o4yt.2197.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_22-56-19_nv23u9o4yt/1679439386.2769547/events.out.tfevents.1679439386.nv23u9o4yt.2197.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f65e132a4bf54dd36aede43c633e3534348bdb33f04fc9784f6ffc2326fca1
3
+ size 5604
runs/Mar21_22-56-19_nv23u9o4yt/events.out.tfevents.1679439386.nv23u9o4yt.2197.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59940f2dff78489e8d2fe5e941fa13904d4d6b1881da453cebcc29e12bc26e84
3
+ size 8814
runs/Mar21_22-57-47_nv23u9o4yt/._1679439474.4146004 ADDED
Binary file (4.1 kB). View file
 
runs/Mar21_22-57-47_nv23u9o4yt/._events.out.tfevents.1679439474.nv23u9o4yt.2261.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_22-57-47_nv23u9o4yt/1679439474.4146004/._events.out.tfevents.1679439474.nv23u9o4yt.2261.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f154d1138cb627d70f3a913f380171e0ea3fd990979bb70a9aa2210dbab4a9
3
+ size 4096
runs/Mar21_22-57-47_nv23u9o4yt/1679439474.4146004/events.out.tfevents.1679439474.nv23u9o4yt.2261.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a0682d5faeba6f762f25c28327d63b4f3453c0c853518820543e761b3d24d
3
+ size 5604
runs/Mar21_22-57-47_nv23u9o4yt/events.out.tfevents.1679439474.nv23u9o4yt.2261.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:641a45193e22c020cb2bb427fbcca217be92bdddc7762245558dec429430fc26
3
+ size 11124
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "</s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 512,
22
+ "name_or_path": "facebook/opt-350m",
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<pad>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "padding_side": "right",
32
+ "special_tokens_map_file": null,
33
+ "tokenizer_class": "GPT2Tokenizer",
34
+ "unk_token": {
35
+ "__type": "AddedToken",
36
+ "content": "</s>",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ }
42
+ }
trainer_state.json ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 44.61538461538461,
5
+ "global_step": 45,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.62,
12
+ "learning_rate": 1e-05,
13
+ "loss": 5.5122,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 1.62,
18
+ "learning_rate": 2e-05,
19
+ "loss": 14.9312,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 2.62,
24
+ "learning_rate": 1.9973322836635517e-05,
25
+ "loss": 4.8242,
26
+ "step": 3
27
+ },
28
+ {
29
+ "epoch": 3.62,
30
+ "learning_rate": 1.9893433680751105e-05,
31
+ "loss": 5.7161,
32
+ "step": 4
33
+ },
34
+ {
35
+ "epoch": 4.62,
36
+ "learning_rate": 1.9760758775559275e-05,
37
+ "loss": 2.5249,
38
+ "step": 5
39
+ },
40
+ {
41
+ "epoch": 5.62,
42
+ "learning_rate": 1.957600599908406e-05,
43
+ "loss": 2.6135,
44
+ "step": 6
45
+ },
46
+ {
47
+ "epoch": 6.62,
48
+ "learning_rate": 1.9340161087325483e-05,
49
+ "loss": 1.7229,
50
+ "step": 7
51
+ },
52
+ {
53
+ "epoch": 7.62,
54
+ "learning_rate": 1.905448237493147e-05,
55
+ "loss": 1.7059,
56
+ "step": 8
57
+ },
58
+ {
59
+ "epoch": 8.62,
60
+ "learning_rate": 1.872049408143808e-05,
61
+ "loss": 1.4524,
62
+ "step": 9
63
+ },
64
+ {
65
+ "epoch": 9.62,
66
+ "learning_rate": 1.833997817889878e-05,
67
+ "loss": 2.1753,
68
+ "step": 10
69
+ },
70
+ {
71
+ "epoch": 10.62,
72
+ "learning_rate": 1.7914964884292543e-05,
73
+ "loss": 1.9059,
74
+ "step": 11
75
+ },
76
+ {
77
+ "epoch": 11.62,
78
+ "learning_rate": 1.744772182743782e-05,
79
+ "loss": 1.0491,
80
+ "step": 12
81
+ },
82
+ {
83
+ "epoch": 12.62,
84
+ "learning_rate": 1.6940741952206342e-05,
85
+ "loss": 1.1342,
86
+ "step": 13
87
+ },
88
+ {
89
+ "epoch": 13.62,
90
+ "learning_rate": 1.6396730215588913e-05,
91
+ "loss": 1.0765,
92
+ "step": 14
93
+ },
94
+ {
95
+ "epoch": 14.62,
96
+ "learning_rate": 1.581858915557953e-05,
97
+ "loss": 1.0615,
98
+ "step": 15
99
+ },
100
+ {
101
+ "epoch": 15.62,
102
+ "learning_rate": 1.5209403404879305e-05,
103
+ "loss": 0.6614,
104
+ "step": 16
105
+ },
106
+ {
107
+ "epoch": 16.62,
108
+ "learning_rate": 1.4572423233046386e-05,
109
+ "loss": 1.1994,
110
+ "step": 17
111
+ },
112
+ {
113
+ "epoch": 17.62,
114
+ "learning_rate": 1.391104720490156e-05,
115
+ "loss": 0.7368,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 18.62,
120
+ "learning_rate": 1.3228804047714462e-05,
121
+ "loss": 0.6948,
122
+ "step": 19
123
+ },
124
+ {
125
+ "epoch": 19.62,
126
+ "learning_rate": 1.2529333823916807e-05,
127
+ "loss": 0.4572,
128
+ "step": 20
129
+ },
130
+ {
131
+ "epoch": 20.62,
132
+ "learning_rate": 1.1816368509794365e-05,
133
+ "loss": 0.4539,
134
+ "step": 21
135
+ },
136
+ {
137
+ "epoch": 21.62,
138
+ "learning_rate": 1.1093712083778748e-05,
139
+ "loss": 0.596,
140
+ "step": 22
141
+ },
142
+ {
143
+ "epoch": 22.62,
144
+ "learning_rate": 1.0365220230576592e-05,
145
+ "loss": 0.6658,
146
+ "step": 23
147
+ },
148
+ {
149
+ "epoch": 23.62,
150
+ "learning_rate": 9.634779769423412e-06,
151
+ "loss": 0.449,
152
+ "step": 24
153
+ },
154
+ {
155
+ "epoch": 24.62,
156
+ "learning_rate": 8.906287916221259e-06,
157
+ "loss": 0.416,
158
+ "step": 25
159
+ },
160
+ {
161
+ "epoch": 25.62,
162
+ "learning_rate": 8.183631490205636e-06,
163
+ "loss": 0.7238,
164
+ "step": 26
165
+ },
166
+ {
167
+ "epoch": 26.62,
168
+ "learning_rate": 7.470666176083193e-06,
169
+ "loss": 0.2661,
170
+ "step": 27
171
+ },
172
+ {
173
+ "epoch": 27.62,
174
+ "learning_rate": 6.771195952285541e-06,
175
+ "loss": 0.4413,
176
+ "step": 28
177
+ },
178
+ {
179
+ "epoch": 28.62,
180
+ "learning_rate": 6.088952795098442e-06,
181
+ "loss": 0.2416,
182
+ "step": 29
183
+ },
184
+ {
185
+ "epoch": 29.62,
186
+ "learning_rate": 5.427576766953615e-06,
187
+ "loss": 0.1973,
188
+ "step": 30
189
+ },
190
+ {
191
+ "epoch": 30.62,
192
+ "learning_rate": 4.790596595120699e-06,
193
+ "loss": 0.253,
194
+ "step": 31
195
+ },
196
+ {
197
+ "epoch": 31.62,
198
+ "learning_rate": 4.181410844420473e-06,
199
+ "loss": 0.2261,
200
+ "step": 32
201
+ },
202
+ {
203
+ "epoch": 32.62,
204
+ "learning_rate": 3.6032697844110896e-06,
205
+ "loss": 0.195,
206
+ "step": 33
207
+ },
208
+ {
209
+ "epoch": 33.62,
210
+ "learning_rate": 3.0592580477936606e-06,
211
+ "loss": 0.2191,
212
+ "step": 34
213
+ },
214
+ {
215
+ "epoch": 34.62,
216
+ "learning_rate": 2.5522781725621814e-06,
217
+ "loss": 0.1858,
218
+ "step": 35
219
+ },
220
+ {
221
+ "epoch": 35.62,
222
+ "learning_rate": 2.08503511570746e-06,
223
+ "loss": 0.1376,
224
+ "step": 36
225
+ },
226
+ {
227
+ "epoch": 36.62,
228
+ "learning_rate": 1.660021821101222e-06,
229
+ "loss": 0.1918,
230
+ "step": 37
231
+ },
232
+ {
233
+ "epoch": 37.62,
234
+ "learning_rate": 1.279505918561923e-06,
235
+ "loss": 0.1518,
236
+ "step": 38
237
+ },
238
+ {
239
+ "epoch": 38.62,
240
+ "learning_rate": 9.455176250685338e-07,
241
+ "loss": 0.2115,
242
+ "step": 39
243
+ },
244
+ {
245
+ "epoch": 39.62,
246
+ "learning_rate": 6.598389126745209e-07,
247
+ "loss": 0.1276,
248
+ "step": 40
249
+ },
250
+ {
251
+ "epoch": 40.62,
252
+ "learning_rate": 4.2399400091594154e-07,
253
+ "loss": 0.1432,
254
+ "step": 41
255
+ },
256
+ {
257
+ "epoch": 41.62,
258
+ "learning_rate": 2.392412244407294e-07,
259
+ "loss": 0.1248,
260
+ "step": 42
261
+ },
262
+ {
263
+ "epoch": 42.62,
264
+ "learning_rate": 1.0656631924889749e-07,
265
+ "loss": 0.1649,
266
+ "step": 43
267
+ },
268
+ {
269
+ "epoch": 43.62,
270
+ "learning_rate": 2.667716336448356e-08,
271
+ "loss": 0.1561,
272
+ "step": 44
273
+ },
274
+ {
275
+ "epoch": 44.62,
276
+ "learning_rate": 0.0,
277
+ "loss": 0.1263,
278
+ "step": 45
279
+ },
280
+ {
281
+ "epoch": 44.62,
282
+ "step": 45,
283
+ "total_flos": 2105118038163456.0,
284
+ "train_loss": 1.3382400512695312,
285
+ "train_runtime": 78.1057,
286
+ "train_samples_per_second": 59.343,
287
+ "train_steps_per_second": 0.576
288
+ }
289
+ ],
290
+ "max_steps": 45,
291
+ "num_train_epochs": 45,
292
+ "total_flos": 2105118038163456.0,
293
+ "trial_name": null,
294
+ "trial_params": null
295
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a3b3bf7b86ddc5e39623ef79f86847f2c6a94685a28b9ee7de1f5245bca5f27
3
+ size 3503
vocab.json ADDED
The diff for this file is too large to render. See raw diff