guymorlan commited on
Commit
de63766
·
1 Parent(s): 8c47ffa

add updated model files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-ar-en",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "swish",
@@ -55,7 +55,7 @@
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.23.1",
59
  "use_cache": true,
60
  "vocab_size": 62834
61
  }
 
1
  {
2
+ "_name_or_path": "/home/etherx/transliteration/Shami2English/__23epochs/checkpoint-17411/",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "swish",
 
55
  "share_encoder_decoder_embeddings": true,
56
  "static_position_embeddings": true,
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.26.0",
59
  "use_cache": true,
60
  "vocab_size": 62834
61
  }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 62833
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 62833,
10
+ "eos_token_id": 0,
11
+ "forced_eos_token_id": 0,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 62833,
15
+ "transformers_version": "4.26.0"
16
+ }
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83b7701a1b58df5cab82f9c7af326c02debf5427abb158ea79472871e661c88
3
- size 610624517
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cad576ee017d39807c31b547cb3046116fe386f601db72f525c5c11c7daf859
3
- size 305575877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37201e8a36bd5c82ab0fb0a122137bcd74d341ea47abec174e133b8ffe040478
3
+ size 305572293
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b265a7b340cd17f16ab3488b8c8b7f9ba0a60f2cdf7143558a3c3761756b1c64
3
- size 14575
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d810deb6dff718ecfd7e25148d29ab8f7e0c54d612042e9544105023f96f534b
3
- size 627
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "eos_token": "</s>",
3
  "model_max_length": 512,
4
- "name_or_path": "Helsinki-NLP/opus-mt-ar-en",
5
  "pad_token": "<pad>",
6
  "return_tensors": "pt",
7
  "separate_vocabs": false,
 
1
  {
2
  "eos_token": "</s>",
3
  "model_max_length": 512,
4
+ "name_or_path": "/home/etherx/transliteration/Shami2English/__23epochs/checkpoint-17411/",
5
  "pad_token": "<pad>",
6
  "return_tensors": "pt",
7
  "separate_vocabs": false,
trainer_state.json DELETED
@@ -1,260 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 12.999339933993399,
5
- "global_step": 9841,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.66,
12
- "learning_rate": 4.779832672831352e-05,
13
- "loss": 1.7977,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 1.0,
18
- "eval_bleu": 3.9153311559768165,
19
- "eval_cer": 0.20409235310303425,
20
- "eval_loss": 0.6436189413070679,
21
- "eval_runtime": 882.9917,
22
- "eval_samples_per_second": 5.717,
23
- "eval_steps_per_second": 0.287,
24
- "step": 757
25
- },
26
- {
27
- "epoch": 1.32,
28
- "learning_rate": 4.5596653456627034e-05,
29
- "loss": 0.768,
30
- "step": 1000
31
- },
32
- {
33
- "epoch": 1.98,
34
- "learning_rate": 4.3394980184940556e-05,
35
- "loss": 0.5495,
36
- "step": 1500
37
- },
38
- {
39
- "epoch": 2.0,
40
- "eval_bleu": 14.177638708391077,
41
- "eval_cer": 0.14333241613928008,
42
- "eval_loss": 0.4357180893421173,
43
- "eval_runtime": 876.0468,
44
- "eval_samples_per_second": 5.762,
45
- "eval_steps_per_second": 0.289,
46
- "step": 1514
47
- },
48
- {
49
- "epoch": 2.64,
50
- "learning_rate": 4.119330691325408e-05,
51
- "loss": 0.4073,
52
- "step": 2000
53
- },
54
- {
55
- "epoch": 3.0,
56
- "eval_bleu": 18.97804010069212,
57
- "eval_cer": 0.12509693073517272,
58
- "eval_loss": 0.36574670672416687,
59
- "eval_runtime": 906.2682,
60
- "eval_samples_per_second": 5.57,
61
- "eval_steps_per_second": 0.279,
62
- "step": 2271
63
- },
64
- {
65
- "epoch": 3.3,
66
- "learning_rate": 3.8991633641567594e-05,
67
- "loss": 0.3404,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 3.96,
72
- "learning_rate": 3.678996036988111e-05,
73
- "loss": 0.3022,
74
- "step": 3000
75
- },
76
- {
77
- "epoch": 4.0,
78
- "eval_bleu": 21.33582813630578,
79
- "eval_cer": 0.11919353628336293,
80
- "eval_loss": 0.3364902138710022,
81
- "eval_runtime": 749.8243,
82
- "eval_samples_per_second": 6.732,
83
- "eval_steps_per_second": 0.337,
84
- "step": 3028
85
- },
86
- {
87
- "epoch": 4.62,
88
- "learning_rate": 3.4588287098194625e-05,
89
- "loss": 0.2458,
90
- "step": 3500
91
- },
92
- {
93
- "epoch": 5.0,
94
- "eval_bleu": 21.899151496248283,
95
- "eval_cer": 0.11376541511369037,
96
- "eval_loss": 0.31889599561691284,
97
- "eval_runtime": 778.901,
98
- "eval_samples_per_second": 6.481,
99
- "eval_steps_per_second": 0.325,
100
- "step": 3785
101
- },
102
- {
103
- "epoch": 5.28,
104
- "learning_rate": 3.238661382650815e-05,
105
- "loss": 0.2265,
106
- "step": 4000
107
- },
108
- {
109
- "epoch": 5.94,
110
- "learning_rate": 3.0184940554821667e-05,
111
- "loss": 0.2088,
112
- "step": 4500
113
- },
114
- {
115
- "epoch": 6.0,
116
- "eval_bleu": 23.47915169940915,
117
- "eval_cer": 0.11028841583910749,
118
- "eval_loss": 0.3102637529373169,
119
- "eval_runtime": 958.4916,
120
- "eval_samples_per_second": 5.267,
121
- "eval_steps_per_second": 0.264,
122
- "step": 4542
123
- },
124
- {
125
- "epoch": 6.6,
126
- "learning_rate": 2.7983267283135182e-05,
127
- "loss": 0.1791,
128
- "step": 5000
129
- },
130
- {
131
- "epoch": 7.0,
132
- "eval_bleu": 24.623759800702004,
133
- "eval_cer": 0.11083873227105585,
134
- "eval_loss": 0.3097042739391327,
135
- "eval_runtime": 840.3831,
136
- "eval_samples_per_second": 6.007,
137
- "eval_steps_per_second": 0.301,
138
- "step": 5299
139
- },
140
- {
141
- "epoch": 7.27,
142
- "learning_rate": 2.57815940114487e-05,
143
- "loss": 0.1675,
144
- "step": 5500
145
- },
146
- {
147
- "epoch": 7.93,
148
- "learning_rate": 2.357992073976222e-05,
149
- "loss": 0.1571,
150
- "step": 6000
151
- },
152
- {
153
- "epoch": 8.0,
154
- "eval_bleu": 25.52886648930721,
155
- "eval_cer": 0.10933786927483304,
156
- "eval_loss": 0.3041631877422333,
157
- "eval_runtime": 953.6191,
158
- "eval_samples_per_second": 5.294,
159
- "eval_steps_per_second": 0.265,
160
- "step": 6056
161
- },
162
- {
163
- "epoch": 8.59,
164
- "learning_rate": 2.1378247468075736e-05,
165
- "loss": 0.1412,
166
- "step": 6500
167
- },
168
- {
169
- "epoch": 9.0,
170
- "eval_bleu": 25.84181280580903,
171
- "eval_cer": 0.11188933636841183,
172
- "eval_loss": 0.2990793287754059,
173
- "eval_runtime": 911.9381,
174
- "eval_samples_per_second": 5.535,
175
- "eval_steps_per_second": 0.277,
176
- "step": 6813
177
- },
178
- {
179
- "epoch": 9.25,
180
- "learning_rate": 1.917657419638926e-05,
181
- "loss": 0.1334,
182
- "step": 7000
183
- },
184
- {
185
- "epoch": 9.91,
186
- "learning_rate": 1.6974900924702774e-05,
187
- "loss": 0.1261,
188
- "step": 7500
189
- },
190
- {
191
- "epoch": 10.0,
192
- "eval_bleu": 26.361598197934196,
193
- "eval_cer": 0.11131400555319308,
194
- "eval_loss": 0.29519614577293396,
195
- "eval_runtime": 800.3687,
196
- "eval_samples_per_second": 6.307,
197
- "eval_steps_per_second": 0.316,
198
- "step": 7570
199
- },
200
- {
201
- "epoch": 10.57,
202
- "learning_rate": 1.4773227653016295e-05,
203
- "loss": 0.1172,
204
- "step": 8000
205
- },
206
- {
207
- "epoch": 11.0,
208
- "eval_bleu": 26.89660242215556,
209
- "eval_cer": 0.10981314255697026,
210
- "eval_loss": 0.3019121289253235,
211
- "eval_runtime": 782.4891,
212
- "eval_samples_per_second": 6.451,
213
- "eval_steps_per_second": 0.323,
214
- "step": 8327
215
- },
216
- {
217
- "epoch": 11.23,
218
- "learning_rate": 1.2571554381329812e-05,
219
- "loss": 0.114,
220
- "step": 8500
221
- },
222
- {
223
- "epoch": 11.89,
224
- "learning_rate": 1.036988110964333e-05,
225
- "loss": 0.1079,
226
- "step": 9000
227
- },
228
- {
229
- "epoch": 12.0,
230
- "eval_bleu": 27.22667561487407,
231
- "eval_cer": 0.110913775420867,
232
- "eval_loss": 0.2969663143157959,
233
- "eval_runtime": 847.1515,
234
- "eval_samples_per_second": 5.959,
235
- "eval_steps_per_second": 0.299,
236
- "step": 9084
237
- },
238
- {
239
- "epoch": 12.55,
240
- "learning_rate": 8.168207837956847e-06,
241
- "loss": 0.101,
242
- "step": 9500
243
- },
244
- {
245
- "epoch": 13.0,
246
- "eval_bleu": 27.621253608478224,
247
- "eval_cer": 0.11243965280036021,
248
- "eval_loss": 0.29766184091567993,
249
- "eval_runtime": 818.4316,
250
- "eval_samples_per_second": 6.168,
251
- "eval_steps_per_second": 0.309,
252
- "step": 9841
253
- }
254
- ],
255
- "max_steps": 11355,
256
- "num_train_epochs": 15,
257
- "total_flos": 1473244126248960.0,
258
- "trial_name": null,
259
- "trial_params": null
260
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1a90a340bb3d171379543a4b07cc5446e686570e103ffe1d82e5d7d81db40c
3
- size 3579