sakares commited on
Commit
004e024
1 Parent(s): df77df3

update model with lower lr and longer epoch

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +113 -15
  5. training_args.bin +1 -1
  6. vocab.json +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e36aaf1858a24d803db0a490c64e5dd62fbdad7832df57072c1b306cab70de
3
  size 2490659335
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb1f03e6996f45067b37846665462c9469495843102ce187588b897e28a7f4e
3
  size 2490659335
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6100534b99cfe1d84ca744d05d3f994e2906bad2ef513efedd3eec3e36661154
3
  size 1262224919
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf9e30486947e5ed90fbee04baf77edce0daadf0effb971a783e7eee53f6215
3
  size 1262224919
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b50cc95e12bd09b0797fb537b7270584feb80c90ed9167113cb636c72c3194d
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eadfc8ad40d25eb205d452eb44ca33a8d304d9199be7da405ad3378ff843c81
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.297029702970297,
5
- "global_step": 800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,35 +10,133 @@
10
  {
11
  "epoch": 2.65,
12
  "learning_rate": 0.00023999999999999998,
13
- "loss": 6.8625,
14
  "step": 400
15
  },
16
  {
17
  "epoch": 2.65,
18
- "eval_loss": 3.4594738483428955,
19
- "eval_runtime": 211.356,
20
- "eval_samples_per_second": 10.352,
21
  "eval_wer": 1.0,
22
  "step": 400
23
  },
24
  {
25
  "epoch": 5.3,
26
- "learning_rate": 7.832512315270935e-05,
27
- "loss": 1.5787,
28
  "step": 800
29
  },
30
  {
31
  "epoch": 5.3,
32
- "eval_loss": 0.6689605116844177,
33
- "eval_runtime": 220.0088,
34
- "eval_samples_per_second": 9.945,
35
- "eval_wer": 0.6838202674440712,
36
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  ],
39
- "max_steps": 906,
40
- "num_train_epochs": 6,
41
- "total_flos": 3.298739982613021e+18,
42
  "trial_name": null,
43
  "trial_params": null
44
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.83828382838284,
5
+ "global_step": 3600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
10
  {
11
  "epoch": 2.65,
12
  "learning_rate": 0.00023999999999999998,
13
+ "loss": 6.5741,
14
  "step": 400
15
  },
16
  {
17
  "epoch": 2.65,
18
+ "eval_loss": 3.4423439502716064,
19
+ "eval_runtime": 231.306,
20
+ "eval_samples_per_second": 9.459,
21
  "eval_wer": 1.0,
22
  "step": 400
23
  },
24
  {
25
  "epoch": 5.3,
26
+ "learning_rate": 0.0002711907810499359,
27
+ "loss": 1.6231,
28
  "step": 800
29
  },
30
  {
31
  "epoch": 5.3,
32
+ "eval_loss": 0.6918022036552429,
33
+ "eval_runtime": 234.8428,
34
+ "eval_samples_per_second": 9.317,
35
+ "eval_wer": 0.7177260916407884,
36
  "step": 800
37
+ },
38
+ {
39
+ "epoch": 7.94,
40
+ "learning_rate": 0.00023277848911651725,
41
+ "loss": 0.5103,
42
+ "step": 1200
43
+ },
44
+ {
45
+ "epoch": 7.94,
46
+ "eval_loss": 0.5981740951538086,
47
+ "eval_runtime": 233.6413,
48
+ "eval_samples_per_second": 9.365,
49
+ "eval_wer": 0.6444007858546169,
50
+ "step": 1200
51
+ },
52
+ {
53
+ "epoch": 10.59,
54
+ "learning_rate": 0.0001943661971830986,
55
+ "loss": 0.316,
56
+ "step": 1600
57
+ },
58
+ {
59
+ "epoch": 10.59,
60
+ "eval_loss": 0.6127611398696899,
61
+ "eval_runtime": 239.3303,
62
+ "eval_samples_per_second": 9.142,
63
+ "eval_wer": 0.6053615564991445,
64
+ "step": 1600
65
+ },
66
+ {
67
+ "epoch": 13.24,
68
+ "learning_rate": 0.0001559539052496799,
69
+ "loss": 0.227,
70
+ "step": 2000
71
+ },
72
+ {
73
+ "epoch": 13.24,
74
+ "eval_loss": 0.6392495036125183,
75
+ "eval_runtime": 236.7294,
76
+ "eval_samples_per_second": 9.243,
77
+ "eval_wer": 0.5767158882058432,
78
+ "step": 2000
79
+ },
80
+ {
81
+ "epoch": 15.89,
82
+ "learning_rate": 0.00011754161331626119,
83
+ "loss": 0.1783,
84
+ "step": 2400
85
+ },
86
+ {
87
+ "epoch": 15.89,
88
+ "eval_loss": 0.6448690891265869,
89
+ "eval_runtime": 240.8912,
90
+ "eval_samples_per_second": 9.083,
91
+ "eval_wer": 0.5625831801761836,
92
+ "step": 2400
93
+ },
94
+ {
95
+ "epoch": 18.54,
96
+ "learning_rate": 7.91293213828425e-05,
97
+ "loss": 0.1346,
98
+ "step": 2800
99
+ },
100
+ {
101
+ "epoch": 18.54,
102
+ "eval_loss": 0.6509573459625244,
103
+ "eval_runtime": 238.9168,
104
+ "eval_samples_per_second": 9.158,
105
+ "eval_wer": 0.5524431206033336,
106
+ "step": 2800
107
+ },
108
+ {
109
+ "epoch": 21.19,
110
+ "learning_rate": 4.071702944942381e-05,
111
+ "loss": 0.1149,
112
+ "step": 3200
113
+ },
114
+ {
115
+ "epoch": 21.19,
116
+ "eval_loss": 0.7118895053863525,
117
+ "eval_runtime": 239.1827,
118
+ "eval_samples_per_second": 9.148,
119
+ "eval_wer": 0.5582736548577223,
120
+ "step": 3200
121
+ },
122
+ {
123
+ "epoch": 23.84,
124
+ "learning_rate": 2.3047375160051214e-06,
125
+ "loss": 0.1024,
126
+ "step": 3600
127
+ },
128
+ {
129
+ "epoch": 23.84,
130
+ "eval_loss": 0.6984374523162842,
131
+ "eval_runtime": 239.7197,
132
+ "eval_samples_per_second": 9.127,
133
+ "eval_wer": 0.5488307243805057,
134
+ "step": 3600
135
  }
136
  ],
137
+ "max_steps": 3624,
138
+ "num_train_epochs": 24,
139
+ "total_flos": 1.4828294022260212e+19,
140
  "trial_name": null,
141
  "trial_params": null
142
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:856f3181b05eb1e8d3fc464f45704f4c8f04c07811d54bd5f1d8ef94d984ec90
3
  size 2351
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172c59484ed932416417e6a198f03373837732beb2793acc906d873aa5e63514
3
  size 2351
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "": 6, "": 8, "": 9, "": 10, "": 11, "": 12, "": 13, "": 14, "": 15, "": 16, "": 17, "": 18, "": 19, "": 20, "": 21, "": 22, "": 23, "": 24, "": 25, "": 26, "": 27, "": 28, "ป": 29, "": 30, "": 31, "": 32, "": 33, "": 34, "": 35, "'": 36, "": 37, "": 38, "": 39, "": 40, "": 41, "": 42, "": 43, "": 44, "": 45, "": 46, "": 47, "": 48, "": 49, "": 50, "": 51, "": 52, "": 53, "": 54, "": 55, "": 56, "": 57, "": 58, "": 59, "": 60, "": 61, "": 62, "": 63, "": 64, "": 65, "": 66, "": 67, "": 68, "|": 7, "[UNK]": 69, "[PAD]": 70}
1
+ {"": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "": 6, "": 7, "์": 8, "": 9, "": 10, "": 11, "": 12, "": 13, "": 14, "": 15, "": 16, "": 17, "": 18, "": 19, "": 20, "": 21, "": 22, "": 23, "": 24, "": 25, "": 26, "": 27, "": 29, "": 30, "": 31, "": 32, "": 33, "": 34, "": 35, "": 36, "": 37, "": 38, "": 39, "": 40, "": 41, "": 42, "": 43, "": 44, "": 45, "": 46, "": 47, "": 48, "": 49, "": 50, "": 51, "'": 52, "": 53, "": 54, "": 55, "": 56, "": 57, "": 58, "": 59, "": 60, "": 61, "": 62, "": 63, "": 64, "": 65, "": 66, "": 67, "": 68, "|": 28, "[UNK]": 69, "[PAD]": 70}