Upload folder using huggingface_hub
Browse files- config.json +1 -2
- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +89 -5
config.json
CHANGED
@@ -15,8 +15,7 @@
|
|
15 |
},
|
16 |
"auto_map": {
|
17 |
"AutoConfig": "ultravox_config.UltravoxConfig",
|
18 |
-
"AutoModel": "ultravox_model.UltravoxModel"
|
19 |
-
"AutoProcessor": "ultravox_processing.UltravoxProcessor"
|
20 |
},
|
21 |
"hidden_size": 4096,
|
22 |
"ignore_index": -100,
|
|
|
15 |
},
|
16 |
"auto_map": {
|
17 |
"AutoConfig": "ultravox_config.UltravoxConfig",
|
18 |
+
"AutoModel": "ultravox_model.UltravoxModel"
|
|
|
19 |
},
|
20 |
"hidden_size": 4096,
|
21 |
"ignore_index": -100,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 93348824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:844a3876dbf4fb559dd52a63c6732fe1bbff100eff60b3cc3896f126d1ba187e
|
3 |
size 93348824
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 186701138
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdf1123ba05cf5d672c5852f3320f551e8d1a087f05d2182f99b0921467a19d7
|
3 |
size 186701138
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ead2bf9cd84322800eb2f2fc4d4c95ffc6c4f969b8f6967eab4f40dffadb6428
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cccfc6f01ca6a15cf846ffdba9f6cd8d0e3d353032a1524b92b53154b87a0b1
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -434,6 +434,90 @@
|
|
434 |
"learning_rate": 0.00017923655879272394,
|
435 |
"loss": 0.0743,
|
436 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
}
|
438 |
],
|
439 |
"logging_steps": 100,
|
@@ -445,15 +529,15 @@
|
|
445 |
"TrainerControl": {
|
446 |
"args": {
|
447 |
"should_epoch_stop": false,
|
448 |
-
"should_evaluate":
|
449 |
"should_log": false,
|
450 |
"should_save": true,
|
451 |
-
"should_training_stop":
|
452 |
},
|
453 |
"attributes": {}
|
454 |
}
|
455 |
},
|
456 |
-
"total_flos":
|
457 |
"train_batch_size": 12,
|
458 |
"trial_name": null,
|
459 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.6021505376344085,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 7200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
434 |
"learning_rate": 0.00017923655879272394,
|
435 |
"loss": 0.0743,
|
436 |
"step": 6000
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 7.287933094384707,
|
440 |
+
"grad_norm": 0.06787109375,
|
441 |
+
"learning_rate": 0.00015135574250524898,
|
442 |
+
"loss": 0.076,
|
443 |
+
"step": 6100
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 7.407407407407407,
|
447 |
+
"grad_norm": 0.06201171875,
|
448 |
+
"learning_rate": 0.00012565338385541792,
|
449 |
+
"loss": 0.0736,
|
450 |
+
"step": 6200
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"epoch": 7.526881720430108,
|
454 |
+
"grad_norm": 0.047607421875,
|
455 |
+
"learning_rate": 0.00010219546042925842,
|
456 |
+
"loss": 0.0728,
|
457 |
+
"step": 6300
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"epoch": 7.646356033452808,
|
461 |
+
"grad_norm": 0.045654296875,
|
462 |
+
"learning_rate": 8.10421883797694e-05,
|
463 |
+
"loss": 0.0725,
|
464 |
+
"step": 6400
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 7.765830346475508,
|
468 |
+
"grad_norm": 0.05126953125,
|
469 |
+
"learning_rate": 6.22478678529197e-05,
|
470 |
+
"loss": 0.0717,
|
471 |
+
"step": 6500
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"epoch": 7.885304659498208,
|
475 |
+
"grad_norm": 0.046875,
|
476 |
+
"learning_rate": 4.5860743599951184e-05,
|
477 |
+
"loss": 0.0726,
|
478 |
+
"step": 6600
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 1.004778972520908,
|
482 |
+
"grad_norm": 0.050048828125,
|
483 |
+
"learning_rate": 3.192288113379582e-05,
|
484 |
+
"loss": 0.0732,
|
485 |
+
"step": 6700
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 1.124253285543608,
|
489 |
+
"grad_norm": 0.0498046875,
|
490 |
+
"learning_rate": 2.0470058747505516e-05,
|
491 |
+
"loss": 0.0724,
|
492 |
+
"step": 6800
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 1.2437275985663083,
|
496 |
+
"grad_norm": 0.050537109375,
|
497 |
+
"learning_rate": 1.153167567188862e-05,
|
498 |
+
"loss": 0.0748,
|
499 |
+
"step": 6900
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 1.3632019115890084,
|
503 |
+
"grad_norm": 0.04443359375,
|
504 |
+
"learning_rate": 5.1306766081048454e-06,
|
505 |
+
"loss": 0.0745,
|
506 |
+
"step": 7000
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 1.4826762246117084,
|
510 |
+
"grad_norm": 0.050048828125,
|
511 |
+
"learning_rate": 1.2834928289472415e-06,
|
512 |
+
"loss": 0.0717,
|
513 |
+
"step": 7100
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"epoch": 1.6021505376344085,
|
517 |
+
"grad_norm": 0.0458984375,
|
518 |
+
"learning_rate": 0.0,
|
519 |
+
"loss": 0.073,
|
520 |
+
"step": 7200
|
521 |
}
|
522 |
],
|
523 |
"logging_steps": 100,
|
|
|
529 |
"TrainerControl": {
|
530 |
"args": {
|
531 |
"should_epoch_stop": false,
|
532 |
+
"should_evaluate": false,
|
533 |
"should_log": false,
|
534 |
"should_save": true,
|
535 |
+
"should_training_stop": true
|
536 |
},
|
537 |
"attributes": {}
|
538 |
}
|
539 |
},
|
540 |
+
"total_flos": 1.018481533644288e+17,
|
541 |
"train_batch_size": 12,
|
542 |
"trial_name": null,
|
543 |
"trial_params": null
|