DrishtiSharma commited on
Commit
698cede
1 Parent(s): 34e9619

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.5885377526283264,
4
- "eval_runtime": 12.7017,
5
  "eval_samples": 301,
6
- "eval_samples_per_second": 23.698,
7
- "eval_steps_per_second": 2.992,
8
- "eval_wer": 0.5898617511520737,
9
- "train_loss": 2.0729129652543503,
10
- "train_runtime": 5588.1144,
11
  "train_samples": 704,
12
- "train_samples_per_second": 12.598,
13
- "train_steps_per_second": 0.394
14
  }
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.5620014667510986,
4
+ "eval_runtime": 12.4265,
5
  "eval_samples": 301,
6
+ "eval_samples_per_second": 24.223,
7
+ "eval_steps_per_second": 3.058,
8
+ "eval_wer": 0.5651445328864684,
9
+ "train_loss": 2.969521954276345,
10
+ "train_runtime": 5494.3742,
11
  "train_samples": 704,
12
+ "train_samples_per_second": 12.813,
13
+ "train_steps_per_second": 0.4
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.5885377526283264,
4
- "eval_runtime": 12.7017,
5
  "eval_samples": 301,
6
- "eval_samples_per_second": 23.698,
7
- "eval_steps_per_second": 2.992,
8
- "eval_wer": 0.5898617511520737
9
  }
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.5620014667510986,
4
+ "eval_runtime": 12.4265,
5
  "eval_samples": 301,
6
+ "eval_samples_per_second": 24.223,
7
+ "eval_steps_per_second": 3.058,
8
+ "eval_wer": 0.5651445328864684
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e7a4394b288e2177da5753bbb6429436ebf67bd1aa05d5ad56cd386dad6d64a
3
  size 1262108145
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0eb7adda32a4da8134eb8bc761eb80fae0ded02c211509c87f3ab8a96a2f79
3
  size 1262108145
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 2.0729129652543503,
4
- "train_runtime": 5588.1144,
5
  "train_samples": 704,
6
- "train_samples_per_second": 12.598,
7
- "train_steps_per_second": 0.394
8
  }
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 2.969521954276345,
4
+ "train_runtime": 5494.3742,
5
  "train_samples": 704,
6
+ "train_samples_per_second": 12.813,
7
+ "train_steps_per_second": 0.4
8
  }
trainer_state.json CHANGED
@@ -9,117 +9,117 @@
9
  "log_history": [
10
  {
11
  "epoch": 13.64,
12
- "learning_rate": 2.99e-05,
13
- "loss": 7.5798,
14
  "step": 300
15
  },
16
  {
17
  "epoch": 13.64,
18
- "eval_loss": 3.4349324703216553,
19
- "eval_runtime": 13.2159,
20
- "eval_samples_per_second": 22.776,
21
- "eval_steps_per_second": 2.875,
22
  "eval_wer": 1.0,
23
  "step": 300
24
  },
25
  {
26
  "epoch": 27.27,
27
- "learning_rate": 5.989999999999999e-05,
28
- "loss": 3.1252,
29
  "step": 600
30
  },
31
  {
32
  "epoch": 27.27,
33
- "eval_loss": 3.0706284046173096,
34
- "eval_runtime": 13.3085,
35
- "eval_samples_per_second": 22.617,
36
- "eval_steps_per_second": 2.855,
37
  "eval_wer": 1.0,
38
  "step": 600
39
  },
40
  {
41
  "epoch": 40.91,
42
- "learning_rate": 6.729310344827586e-05,
43
- "loss": 2.2546,
44
  "step": 900
45
  },
46
  {
47
  "epoch": 40.91,
48
- "eval_loss": 0.8426555395126343,
49
- "eval_runtime": 12.9127,
50
- "eval_samples_per_second": 23.31,
51
- "eval_steps_per_second": 2.943,
52
- "eval_wer": 0.7762882279011312,
53
  "step": 900
54
  },
55
  {
56
  "epoch": 54.55,
57
- "learning_rate": 5.177586206896551e-05,
58
- "loss": 0.7564,
59
  "step": 1200
60
  },
61
  {
62
  "epoch": 54.55,
63
- "eval_loss": 0.6129118800163269,
64
- "eval_runtime": 13.2137,
65
- "eval_samples_per_second": 22.779,
66
- "eval_steps_per_second": 2.876,
67
- "eval_wer": 0.637620444072057,
68
  "step": 1200
69
  },
70
  {
71
  "epoch": 68.18,
72
- "learning_rate": 3.625862068965517e-05,
73
- "loss": 0.5239,
74
  "step": 1500
75
  },
76
  {
77
  "epoch": 68.18,
78
- "eval_loss": 0.5769144892692566,
79
- "eval_runtime": 12.822,
80
- "eval_samples_per_second": 23.475,
81
- "eval_steps_per_second": 2.964,
82
- "eval_wer": 0.6036866359447005,
83
  "step": 1500
84
  },
85
  {
86
  "epoch": 81.82,
87
- "learning_rate": 2.074137931034483e-05,
88
- "loss": 0.438,
89
  "step": 1800
90
  },
91
  {
92
  "epoch": 81.82,
93
- "eval_loss": 0.5937696099281311,
94
- "eval_runtime": 13.4551,
95
- "eval_samples_per_second": 22.371,
96
- "eval_steps_per_second": 2.824,
97
- "eval_wer": 0.5915374947633012,
98
  "step": 1800
99
  },
100
  {
101
  "epoch": 95.45,
102
- "learning_rate": 5.224137931034482e-06,
103
- "loss": 0.3945,
104
  "step": 2100
105
  },
106
  {
107
  "epoch": 95.45,
108
- "eval_loss": 0.5868746042251587,
109
- "eval_runtime": 13.1379,
110
- "eval_samples_per_second": 22.911,
111
- "eval_steps_per_second": 2.892,
112
- "eval_wer": 0.5860913280268119,
113
  "step": 2100
114
  },
115
  {
116
  "epoch": 100.0,
117
  "step": 2200,
118
  "total_flos": 1.3980125790314312e+19,
119
- "train_loss": 2.0729129652543503,
120
- "train_runtime": 5588.1144,
121
- "train_samples_per_second": 12.598,
122
- "train_steps_per_second": 0.394
123
  }
124
  ],
125
  "max_steps": 2200,
9
  "log_history": [
10
  {
11
  "epoch": 13.64,
12
+ "learning_rate": 1.1212499999999998e-05,
13
+ "loss": 9.6445,
14
  "step": 300
15
  },
16
  {
17
  "epoch": 13.64,
18
+ "eval_loss": 4.396285057067871,
19
+ "eval_runtime": 12.5672,
20
+ "eval_samples_per_second": 23.951,
21
+ "eval_steps_per_second": 3.024,
22
  "eval_wer": 1.0,
23
  "step": 300
24
  },
25
  {
26
  "epoch": 27.27,
27
+ "learning_rate": 2.2462499999999997e-05,
28
+ "loss": 3.6459,
29
  "step": 600
30
  },
31
  {
32
  "epoch": 27.27,
33
+ "eval_loss": 3.2267072200775146,
34
+ "eval_runtime": 12.4767,
35
+ "eval_samples_per_second": 24.125,
36
+ "eval_steps_per_second": 3.046,
37
  "eval_wer": 1.0,
38
  "step": 600
39
  },
40
  {
41
  "epoch": 40.91,
42
+ "learning_rate": 3.37125e-05,
43
+ "loss": 3.0978,
44
  "step": 900
45
  },
46
  {
47
  "epoch": 40.91,
48
+ "eval_loss": 3.0927422046661377,
49
+ "eval_runtime": 12.5507,
50
+ "eval_samples_per_second": 23.983,
51
+ "eval_steps_per_second": 3.028,
52
+ "eval_wer": 1.0,
53
  "step": 900
54
  },
55
  {
56
  "epoch": 54.55,
57
+ "learning_rate": 4.4962499999999995e-05,
58
+ "loss": 2.8357,
59
  "step": 1200
60
  },
61
  {
62
  "epoch": 54.55,
63
+ "eval_loss": 2.146217107772827,
64
+ "eval_runtime": 12.4871,
65
+ "eval_samples_per_second": 24.105,
66
+ "eval_steps_per_second": 3.043,
67
+ "eval_wer": 1.002932551319648,
68
  "step": 1200
69
  },
70
  {
71
  "epoch": 68.18,
72
+ "learning_rate": 5.62125e-05,
73
+ "loss": 1.2723,
74
  "step": 1500
75
  },
76
  {
77
  "epoch": 68.18,
78
+ "eval_loss": 0.6747255325317383,
79
+ "eval_runtime": 12.3256,
80
+ "eval_samples_per_second": 24.421,
81
+ "eval_steps_per_second": 3.083,
82
+ "eval_wer": 0.6996229576874738,
83
  "step": 1500
84
  },
85
  {
86
  "epoch": 81.82,
87
+ "learning_rate": 6.746249999999999e-05,
88
+ "loss": 0.6528,
89
  "step": 1800
90
  },
91
  {
92
  "epoch": 81.82,
93
+ "eval_loss": 0.5928319096565247,
94
+ "eval_runtime": 12.4668,
95
+ "eval_samples_per_second": 24.144,
96
+ "eval_steps_per_second": 3.048,
97
+ "eval_wer": 0.6422287390029325,
98
  "step": 1800
99
  },
100
  {
101
  "epoch": 95.45,
102
+ "learning_rate": 3.7875e-05,
103
+ "loss": 0.4905,
104
  "step": 2100
105
  },
106
  {
107
  "epoch": 95.45,
108
+ "eval_loss": 0.5586517453193665,
109
+ "eval_runtime": 12.2287,
110
+ "eval_samples_per_second": 24.614,
111
+ "eval_steps_per_second": 3.107,
112
+ "eval_wer": 0.5680770842061165,
113
  "step": 2100
114
  },
115
  {
116
  "epoch": 100.0,
117
  "step": 2200,
118
  "total_flos": 1.3980125790314312e+19,
119
+ "train_loss": 2.969521954276345,
120
+ "train_runtime": 5494.3742,
121
+ "train_samples_per_second": 12.813,
122
+ "train_steps_per_second": 0.4
123
  }
124
  ],
125
  "max_steps": 2200,