Mel-Iza0 commited on
Commit
11d9ff3
1 Parent(s): 9b45be9

Upload folder using huggingface_hub

Browse files
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8307659fd0862cf0805b8206dbc2159ba622b3301d90d9634a43e58ab49be69d
3
  size 436242776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5247567c89f17309d6bd1c4245c9652bc68a64773a779b6eae919705c139699d
3
  size 436242776
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7ba7a8726d78998756dd55f553ecc0a377148f2698744401894deb794dff765
3
  size 872568314
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f828cdf072406ebfb292794d1d8b49d1b865ec53d67d323901101473f311659
3
  size 872568314
checkpoint-100/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23673dcfe8f26201d1b64968cfb8e7aeafa0dd44591f91dcef9addc46b95c804
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ecafeb055d0737cc8a515833c5ec74971ae3eca7cbc82a612954adc47f89875
3
  size 15024
checkpoint-100/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6469a17730051cc187688ed7abb5fd4c8dde61701d8ada08c6a999a9463ac217
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e353e6e24755f3598843a9d6ddb81d056d2c2776de182a2e723161538e09e6
3
  size 15024
checkpoint-100/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da42dc560ef4b9e87e63917c7f9bc28a7815a8897a1f139d152dd2da3d45ba02
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229ba7afe8835f676d906302a3e89be6b08a56a2d4bc503b4d135b1ca8bf0d45
3
  size 15024
checkpoint-100/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df3fbb2edb8d685d2994898809a96aa0f19695a5e60085f742316550a6914b28
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7096f705e2e21ec95c103ce49a218bae8963605c3414dad81b891fb62b4032af
3
  size 15024
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad10f9bdd16d4aa7ed3e572ac6474b4271d2e92b701684b91f54abd90b8638ef
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c5e13806ff8a94431142a878d0fb96436e2e5061be80fcb3fe37c6be5acef0
3
  size 1000
checkpoint-100/trainer_state.json CHANGED
@@ -1,135 +1,63 @@
1
  {
2
- "best_metric": 0.3796847462654114,
3
- "best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-100",
4
  "epoch": 0.9900990099009901,
5
- "eval_steps": 10,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.1,
13
- "eval_loss": 0.6981944441795349,
14
- "eval_runtime": 94.6462,
15
- "eval_samples_per_second": 30.292,
16
- "eval_steps_per_second": 0.951,
17
- "step": 10
18
- },
19
  {
20
  "epoch": 0.2,
21
- "grad_norm": 0.2654201090335846,
22
- "learning_rate": 0.0001961261695938319,
23
- "loss": 1.0016,
24
  "step": 20
25
  },
26
- {
27
- "epoch": 0.2,
28
- "eval_loss": 0.467332661151886,
29
- "eval_runtime": 94.5373,
30
- "eval_samples_per_second": 30.327,
31
- "eval_steps_per_second": 0.952,
32
- "step": 20
33
- },
34
- {
35
- "epoch": 0.3,
36
- "eval_loss": 0.434685617685318,
37
- "eval_runtime": 94.5899,
38
- "eval_samples_per_second": 30.31,
39
- "eval_steps_per_second": 0.951,
40
- "step": 30
41
- },
42
- {
43
- "epoch": 0.4,
44
- "grad_norm": 1.3763986825942993,
45
- "learning_rate": 0.0001559192903470747,
46
- "loss": 0.4357,
47
- "step": 40
48
- },
49
  {
50
  "epoch": 0.4,
51
- "eval_loss": 0.4082697033882141,
52
- "eval_runtime": 94.551,
53
- "eval_samples_per_second": 30.322,
54
- "eval_steps_per_second": 0.952,
55
  "step": 40
56
  },
57
- {
58
- "epoch": 0.5,
59
- "eval_loss": 0.40642818808555603,
60
- "eval_runtime": 94.5831,
61
- "eval_samples_per_second": 30.312,
62
- "eval_steps_per_second": 0.952,
63
- "step": 50
64
- },
65
  {
66
  "epoch": 0.59,
67
- "grad_norm": 0.16300354897975922,
68
- "learning_rate": 8.954715367323468e-05,
69
- "loss": 0.4012,
70
  "step": 60
71
  },
72
- {
73
- "epoch": 0.59,
74
- "eval_loss": 0.39164847135543823,
75
- "eval_runtime": 94.6074,
76
- "eval_samples_per_second": 30.304,
77
- "eval_steps_per_second": 0.951,
78
- "step": 60
79
- },
80
- {
81
- "epoch": 0.69,
82
- "eval_loss": 0.3856147527694702,
83
- "eval_runtime": 94.5883,
84
- "eval_samples_per_second": 30.31,
85
- "eval_steps_per_second": 0.951,
86
- "step": 70
87
- },
88
  {
89
  "epoch": 0.79,
90
- "grad_norm": 0.14875428378582,
91
- "learning_rate": 2.8066019966134904e-05,
92
- "loss": 0.3846,
93
  "step": 80
94
  },
95
- {
96
- "epoch": 0.79,
97
- "eval_loss": 0.38140159845352173,
98
- "eval_runtime": 94.5138,
99
- "eval_samples_per_second": 30.334,
100
- "eval_steps_per_second": 0.952,
101
- "step": 80
102
- },
103
- {
104
- "epoch": 0.89,
105
- "eval_loss": 0.3798506557941437,
106
- "eval_runtime": 94.6456,
107
- "eval_samples_per_second": 30.292,
108
- "eval_steps_per_second": 0.951,
109
- "step": 90
110
- },
111
  {
112
  "epoch": 0.99,
113
- "grad_norm": 3.9488003253936768,
114
- "learning_rate": 2.4359497401758024e-07,
115
- "loss": 0.3775,
116
  "step": 100
117
  },
118
  {
119
  "epoch": 0.99,
120
- "eval_loss": 0.3796847462654114,
121
- "eval_runtime": 94.5333,
122
- "eval_samples_per_second": 30.328,
123
- "eval_steps_per_second": 0.952,
124
  "step": 100
125
  }
126
  ],
127
  "logging_steps": 20,
128
- "max_steps": 100,
129
  "num_input_tokens_seen": 0,
130
- "num_train_epochs": 1,
131
- "save_steps": 10,
132
- "total_flos": 9.048313688175411e+17,
133
  "train_batch_size": 16,
134
  "trial_name": null,
135
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7399550676345825,
3
+ "best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-800_batch_256_2024-03-01_ppid_7/checkpoint-100",
4
  "epoch": 0.9900990099009901,
5
+ "eval_steps": 100,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.2,
13
+ "grad_norm": 2.141737699508667,
14
+ "learning_rate": 4.5e-05,
15
+ "loss": 1.4271,
16
  "step": 20
17
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {
19
  "epoch": 0.4,
20
+ "grad_norm": 0.741797924041748,
21
+ "learning_rate": 9.5e-05,
22
+ "loss": 0.5555,
 
23
  "step": 40
24
  },
 
 
 
 
 
 
 
 
25
  {
26
  "epoch": 0.59,
27
+ "grad_norm": 0.7615867853164673,
28
+ "learning_rate": 0.00014250000000000002,
29
+ "loss": 0.4788,
30
  "step": 60
31
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  {
33
  "epoch": 0.79,
34
+ "grad_norm": NaN,
35
+ "learning_rate": 0.0001775,
36
+ "loss": 0.442,
37
  "step": 80
38
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "epoch": 0.99,
41
+ "grad_norm": 134.0851593017578,
42
+ "learning_rate": 0.0001999533590836713,
43
+ "loss": 1.6564,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 0.99,
48
+ "eval_loss": 0.7399550676345825,
49
+ "eval_runtime": 93.2428,
50
+ "eval_samples_per_second": 30.748,
51
+ "eval_steps_per_second": 0.965,
52
  "step": 100
53
  }
54
  ],
55
  "logging_steps": 20,
56
+ "max_steps": 800,
57
  "num_input_tokens_seen": 0,
58
+ "num_train_epochs": 8,
59
+ "save_steps": 100,
60
+ "total_flos": 9.102576047295037e+17,
61
  "train_batch_size": 16,
62
  "trial_name": null,
63
  "trial_params": null
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66ceb8ab0424702f162d3dcd459022d693533c009fa75ecbe9af10b7fcf8a54d
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6aa370c2182c787eeddfbdff14f25b598288760eb660145f75aa6cc21f88f4e
3
  size 5176