cahya commited on
Commit
6b52e64
1 Parent(s): 0a00ac2

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +115 -16
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.14093226194381714,
4
- "eval_runtime": 180.4366,
5
  "eval_samples": 3209,
6
- "eval_samples_per_second": 17.785,
7
- "eval_steps_per_second": 8.895,
8
- "eval_wer": 0.13087634802638468,
9
- "train_loss": 1.532381568636213,
10
- "train_runtime": 408.5144,
11
  "train_samples": 7188,
12
- "train_samples_per_second": 17.595,
13
- "train_steps_per_second": 0.274
14
  }
1
  {
2
+ "epoch": 99.98,
3
+ "eval_loss": 0.28930261731147766,
4
+ "eval_runtime": 108.745,
5
  "eval_samples": 3209,
6
+ "eval_samples_per_second": 29.509,
7
+ "eval_steps_per_second": 3.697,
8
+ "eval_wer": 0.27128049418909017,
9
+ "train_loss": 1.2002243913922992,
10
+ "train_runtime": 17065.0192,
11
  "train_samples": 7188,
12
+ "train_samples_per_second": 42.121,
13
+ "train_steps_per_second": 0.082
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.14093226194381714,
4
- "eval_runtime": 180.4366,
5
  "eval_samples": 3209,
6
- "eval_samples_per_second": 17.785,
7
- "eval_steps_per_second": 8.895,
8
- "eval_wer": 0.13087634802638468
9
  }
1
  {
2
+ "epoch": 99.98,
3
+ "eval_loss": 0.28930261731147766,
4
+ "eval_runtime": 108.745,
5
  "eval_samples": 3209,
6
+ "eval_samples_per_second": 29.509,
7
+ "eval_steps_per_second": 3.697,
8
+ "eval_wer": 0.27128049418909017
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 1.532381568636213,
4
- "train_runtime": 408.5144,
5
  "train_samples": 7188,
6
- "train_samples_per_second": 17.595,
7
- "train_steps_per_second": 0.274
8
  }
1
  {
2
+ "epoch": 99.98,
3
+ "train_loss": 1.2002243913922992,
4
+ "train_runtime": 17065.0192,
5
  "train_samples": 7188,
6
+ "train_samples_per_second": 42.121,
7
+ "train_steps_per_second": 0.082
8
  }
trainer_state.json CHANGED
@@ -1,31 +1,130 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9955555555555555,
5
- "global_step": 112,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.89,
12
- "learning_rate": 1.4999999999999999e-05,
13
- "loss": 1.5179,
14
- "step": 100
15
  },
16
  {
17
- "epoch": 1.0,
18
- "step": 112,
19
- "total_flos": 2.2375521021350707e+17,
20
- "train_loss": 1.532381568636213,
21
- "train_runtime": 408.5144,
22
- "train_samples_per_second": 17.595,
23
- "train_steps_per_second": 0.274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
- "max_steps": 112,
27
- "num_train_epochs": 1,
28
- "total_flos": 2.2375521021350707e+17,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 99.98245614035088,
5
+ "global_step": 1400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 14.28,
12
+ "learning_rate": 0.0002769230769230769,
13
+ "loss": 1.8647,
14
+ "step": 200
15
  },
16
  {
17
+ "epoch": 14.28,
18
+ "eval_loss": 0.2757788896560669,
19
+ "eval_runtime": 104.0875,
20
+ "eval_samples_per_second": 30.83,
21
+ "eval_steps_per_second": 3.862,
22
+ "eval_wer": 0.2568317453669773,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 28.56,
27
+ "learning_rate": 0.00023076923076923076,
28
+ "loss": 1.3376,
29
+ "step": 400
30
+ },
31
+ {
32
+ "epoch": 28.56,
33
+ "eval_loss": 0.2754214107990265,
34
+ "eval_runtime": 104.1937,
35
+ "eval_samples_per_second": 30.798,
36
+ "eval_steps_per_second": 3.858,
37
+ "eval_wer": 0.2721704533556696,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 42.84,
42
+ "learning_rate": 0.0001846153846153846,
43
+ "loss": 1.1975,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 42.84,
48
+ "eval_loss": 0.29288050532341003,
49
+ "eval_runtime": 101.9031,
50
+ "eval_samples_per_second": 31.491,
51
+ "eval_steps_per_second": 3.945,
52
+ "eval_wer": 0.290074337765679,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 57.14,
57
+ "learning_rate": 0.00013846153846153845,
58
+ "loss": 1.1024,
59
+ "step": 800
60
+ },
61
+ {
62
+ "epoch": 57.14,
63
+ "eval_loss": 0.2903825342655182,
64
+ "eval_runtime": 103.1754,
65
+ "eval_samples_per_second": 31.102,
66
+ "eval_steps_per_second": 3.896,
67
+ "eval_wer": 0.29279656580462776,
68
+ "step": 800
69
+ },
70
+ {
71
+ "epoch": 71.42,
72
+ "learning_rate": 9.23076923076923e-05,
73
+ "loss": 1.0257,
74
+ "step": 1000
75
+ },
76
+ {
77
+ "epoch": 71.42,
78
+ "eval_loss": 0.2915154993534088,
79
+ "eval_runtime": 103.3451,
80
+ "eval_samples_per_second": 31.051,
81
+ "eval_steps_per_second": 3.89,
82
+ "eval_wer": 0.28232645796251704,
83
+ "step": 1000
84
+ },
85
+ {
86
+ "epoch": 85.7,
87
+ "learning_rate": 4.615384615384615e-05,
88
+ "loss": 0.9628,
89
+ "step": 1200
90
+ },
91
+ {
92
+ "epoch": 85.7,
93
+ "eval_loss": 0.2936263978481293,
94
+ "eval_runtime": 102.9525,
95
+ "eval_samples_per_second": 31.17,
96
+ "eval_steps_per_second": 3.905,
97
+ "eval_wer": 0.2749450319338289,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 99.98,
102
+ "learning_rate": 0.0,
103
+ "loss": 0.9109,
104
+ "step": 1400
105
+ },
106
+ {
107
+ "epoch": 99.98,
108
+ "eval_loss": 0.28930261731147766,
109
+ "eval_runtime": 104.444,
110
+ "eval_samples_per_second": 30.725,
111
+ "eval_steps_per_second": 3.849,
112
+ "eval_wer": 0.27128049418909017,
113
+ "step": 1400
114
+ },
115
+ {
116
+ "epoch": 99.98,
117
+ "step": 1400,
118
+ "total_flos": 1.9811825337670926e+19,
119
+ "train_loss": 1.2002243913922992,
120
+ "train_runtime": 17065.0192,
121
+ "train_samples_per_second": 42.121,
122
+ "train_steps_per_second": 0.082
123
  }
124
  ],
125
+ "max_steps": 1400,
126
+ "num_train_epochs": 100,
127
+ "total_flos": 1.9811825337670926e+19,
128
  "trial_name": null,
129
  "trial_params": null
130
  }