Upload 13 files
Browse files- flax_model.msgpack +3 -0
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scaler.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +63 -3
- vocab.json +1 -1
flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b81a6443503273ade01e51ccf3ce7e8649ec7d8744626fb5fba5aac1da2cd396
|
3 |
+
size 1269577963
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490594117
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb7750a10cf414e7cf57db59fee51e82cf10bd6c74934f4e5000135b87050316
|
3 |
size 2490594117
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262168365
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:073b9ce2badcff0059ea13062bc550a34d8961a512ca0cee6aa15940c3e9bafb
|
3 |
size 1262168365
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03df94d786d198b7279e6cbae44d7c52f57ee3dc05dadfdcb8bd58655e851b41
|
3 |
+
size 14575
|
scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c17afbca2619611e7e8b4bf54fd16906bc9961ed2d474309c8d35540b117170
|
3 |
size 557
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e82518d5762a21ba4f0c30189155912d4bd11e2e762621ec47d40c6a19b601ba
|
3 |
size 627
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -381,11 +381,71 @@
|
|
381 |
"eval_steps_per_second": 2.188,
|
382 |
"eval_wer": 0.2347081333900261,
|
383 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
}
|
385 |
],
|
386 |
"max_steps": 81000,
|
387 |
"num_train_epochs": 1000,
|
388 |
-
"total_flos":
|
389 |
"trial_name": null,
|
390 |
"trial_params": null
|
391 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 35.80246913580247,
|
5 |
+
"global_step": 2900,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
381 |
"eval_steps_per_second": 2.188,
|
382 |
"eval_wer": 0.2347081333900261,
|
383 |
"step": 2500
|
384 |
+
},
|
385 |
+
{
|
386 |
+
"epoch": 32.1,
|
387 |
+
"learning_rate": 9.690976514215081e-05,
|
388 |
+
"loss": 0.0335,
|
389 |
+
"step": 2600
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 32.1,
|
393 |
+
"eval_loss": 0.3765297532081604,
|
394 |
+
"eval_runtime": 152.737,
|
395 |
+
"eval_samples_per_second": 21.292,
|
396 |
+
"eval_steps_per_second": 2.665,
|
397 |
+
"eval_wer": 0.2403571645508109,
|
398 |
+
"step": 2600
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"epoch": 33.33,
|
402 |
+
"learning_rate": 9.678615574783683e-05,
|
403 |
+
"loss": 0.0331,
|
404 |
+
"step": 2700
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 33.33,
|
408 |
+
"eval_loss": 0.37883350253105164,
|
409 |
+
"eval_runtime": 171.3633,
|
410 |
+
"eval_samples_per_second": 18.977,
|
411 |
+
"eval_steps_per_second": 2.375,
|
412 |
+
"eval_wer": 0.23616594788313186,
|
413 |
+
"step": 2700
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"epoch": 34.57,
|
417 |
+
"learning_rate": 9.666254635352288e-05,
|
418 |
+
"loss": 0.0312,
|
419 |
+
"step": 2800
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"epoch": 34.57,
|
423 |
+
"eval_loss": 0.3879595696926117,
|
424 |
+
"eval_runtime": 186.5981,
|
425 |
+
"eval_samples_per_second": 17.428,
|
426 |
+
"eval_steps_per_second": 2.181,
|
427 |
+
"eval_wer": 0.2289376176881492,
|
428 |
+
"step": 2800
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 35.8,
|
432 |
+
"learning_rate": 9.65389369592089e-05,
|
433 |
+
"loss": 0.0306,
|
434 |
+
"step": 2900
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"epoch": 35.8,
|
438 |
+
"eval_loss": 0.38653597235679626,
|
439 |
+
"eval_runtime": 182.3902,
|
440 |
+
"eval_samples_per_second": 17.83,
|
441 |
+
"eval_steps_per_second": 2.231,
|
442 |
+
"eval_wer": 0.23258215392091355,
|
443 |
+
"step": 2900
|
444 |
}
|
445 |
],
|
446 |
"max_steps": 81000,
|
447 |
"num_train_epochs": 1000,
|
448 |
+
"total_flos": 9.224758770642593e+19,
|
449 |
"trial_name": null,
|
450 |
"trial_params": null
|
451 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ا": 0, "_": 1, "ك": 3, "ن": 4, "@": 5, "ذ": 6, "ٖ": 7, "ش": 8, "غ": 9, "آ": 10, "ص": 11, "ي": 12, "ث": 13, "n": 14, "ج": 15, "d": 16, "ئ": 17, "ة": 18, "ه": 19, "ز": 20, "ع": 21, "ف": 22, "i": 23, "r": 24, "v": 25, "ڸ": 26, "k": 27, "ِ": 28, "ء": 29, "ر": 30, "s": 31, "t": 32, "ى": 33, "ـ": 34, " ": 35, "ؤ": 36, "ق": 37, "ض": 38, "م": 39, "ل": 40, "…": 41, "ط": 42, "ت": 43, "ّ": 44, "c": 45, "ظ": 46, "e": 47, "؟": 48, "h": 49, "ب": 50, "o": 51, "س": 52, "a": 53, "د": 54, "p": 55, "أ": 56, "ْ": 57, "\n": 58, "خ": 59, "ح": 60, "و": 61, "إ": 62, "|": 2, "[UNK]": 63, "[PAD]": 64}
|