navjordj commited on
Commit
fbf1b06
1 Parent(s): 085172b

Model save

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 2.0967203749837324,
4
- "train_runtime": 2434.7534,
5
  "train_samples": 62107,
6
- "train_samples_per_second": 76.526,
7
- "train_steps_per_second": 4.783
8
  }
 
1
  {
2
+ "epoch": 0.01,
3
+ "train_loss": 3.6924142456054687,
4
+ "train_runtime": 10.341,
5
  "train_samples": 62107,
6
+ "train_samples_per_second": 77.362,
7
+ "train_steps_per_second": 9.67
8
  }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 2.0967203749837324,
4
- "train_runtime": 2434.7534,
5
  "train_samples": 62107,
6
- "train_samples_per_second": 76.526,
7
- "train_steps_per_second": 4.783
8
  }
 
1
  {
2
+ "epoch": 0.01,
3
+ "train_loss": 3.6924142456054687,
4
+ "train_runtime": 10.341,
5
  "train_samples": 62107,
6
+ "train_samples_per_second": 77.362,
7
+ "train_steps_per_second": 9.67
8
  }
trainer_state.json CHANGED
@@ -1,163 +1,25 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 11646,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.13,
12
- "learning_rate": 4.7853340202644685e-05,
13
- "loss": 3.1971,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.26,
18
- "learning_rate": 4.570668040528937e-05,
19
- "loss": 2.7354,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.39,
24
- "learning_rate": 4.356002060793406e-05,
25
- "loss": 2.5217,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.52,
30
- "learning_rate": 4.1413360810578746e-05,
31
- "loss": 2.3945,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.64,
36
- "learning_rate": 3.926670101322343e-05,
37
- "loss": 2.2924,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.77,
42
- "learning_rate": 3.712004121586811e-05,
43
- "loss": 2.2309,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.9,
48
- "learning_rate": 3.4973381418512793e-05,
49
- "loss": 2.1581,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 1.03,
54
- "learning_rate": 3.282672162115748e-05,
55
- "loss": 2.1062,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 1.16,
60
- "learning_rate": 3.0680061823802165e-05,
61
- "loss": 2.0668,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 1.29,
66
- "learning_rate": 2.8533402026446848e-05,
67
- "loss": 2.017,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 1.42,
72
- "learning_rate": 2.6386742229091534e-05,
73
- "loss": 1.9905,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 1.55,
78
- "learning_rate": 2.424008243173622e-05,
79
- "loss": 1.9665,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 1.67,
84
- "learning_rate": 2.2093422634380902e-05,
85
- "loss": 1.9443,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 1.8,
90
- "learning_rate": 1.994676283702559e-05,
91
- "loss": 1.9293,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 1.93,
96
- "learning_rate": 1.7800103039670274e-05,
97
- "loss": 1.8996,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 2.06,
102
- "learning_rate": 1.5653443242314956e-05,
103
- "loss": 1.8974,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 2.19,
108
- "learning_rate": 1.3506783444959644e-05,
109
- "loss": 1.8627,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 2.32,
114
- "learning_rate": 1.1360123647604328e-05,
115
- "loss": 1.868,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 2.45,
120
- "learning_rate": 9.213463850249012e-06,
121
- "loss": 1.8579,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 2.58,
126
- "learning_rate": 7.066804052893698e-06,
127
- "loss": 1.8435,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 2.7,
132
- "learning_rate": 4.9201442555383824e-06,
133
- "loss": 1.8395,
134
- "step": 10500
135
- },
136
- {
137
- "epoch": 2.83,
138
- "learning_rate": 2.7734844581830675e-06,
139
- "loss": 1.8345,
140
- "step": 11000
141
- },
142
- {
143
- "epoch": 2.96,
144
- "learning_rate": 6.26824660827752e-07,
145
- "loss": 1.8475,
146
- "step": 11500
147
- },
148
- {
149
- "epoch": 3.0,
150
- "step": 11646,
151
- "total_flos": 5065259927605248.0,
152
- "train_loss": 2.0967203749837324,
153
- "train_runtime": 2434.7534,
154
- "train_samples_per_second": 76.526,
155
- "train_steps_per_second": 4.783
156
  }
157
  ],
158
- "max_steps": 11646,
159
- "num_train_epochs": 3,
160
- "total_flos": 5065259927605248.0,
161
  "trial_name": null,
162
  "trial_params": null
163
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01287995878413189,
5
+ "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "step": 100,
13
+ "total_flos": 19701488959488.0,
14
+ "train_loss": 3.6924142456054687,
15
+ "train_runtime": 10.341,
16
+ "train_samples_per_second": 77.362,
17
+ "train_steps_per_second": 9.67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
+ "max_steps": 100,
21
+ "num_train_epochs": 1,
22
+ "total_flos": 19701488959488.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b073f1c34a6d9d29417c68b7cb004fbc40f0fd3ffab51d8c9fcaae20ffb3423
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d190fa24deb5ce9547e445f8d59cb6f296f3e35503e5600f11ef2b45a7ff4532
3
  size 3515