googlefan commited on
Commit
48cf47f
1 Parent(s): aaf7d72

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. config.json +1 -2
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +89 -5
config.json CHANGED
@@ -15,8 +15,7 @@
15
  },
16
  "auto_map": {
17
  "AutoConfig": "ultravox_config.UltravoxConfig",
18
- "AutoModel": "ultravox_model.UltravoxModel",
19
- "AutoProcessor": "ultravox_processing.UltravoxProcessor"
20
  },
21
  "hidden_size": 4096,
22
  "ignore_index": -100,
 
15
  },
16
  "auto_map": {
17
  "AutoConfig": "ultravox_config.UltravoxConfig",
18
+ "AutoModel": "ultravox_model.UltravoxModel"
 
19
  },
20
  "hidden_size": 4096,
21
  "ignore_index": -100,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5778a5c6705a186dd32fd97354ac6fbfb6497b5713c6de8c0d33ac1ad36a0e6c
3
  size 93348824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844a3876dbf4fb559dd52a63c6732fe1bbff100eff60b3cc3896f126d1ba187e
3
  size 93348824
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff10ea684cccfd7292029ac84396a3366e25e9d587fff254649d097bc911c452
3
  size 186701138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf1123ba05cf5d672c5852f3320f551e8d1a087f05d2182f99b0921467a19d7
3
  size 186701138
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50406e2683fae845a67d2522407cfd71c13ce88867a1aac0dc9d26a8b3a5f840
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead2bf9cd84322800eb2f2fc4d4c95ffc6c4f969b8f6967eab4f40dffadb6428
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2253610a716ca9b86b03ae4320fb93d4188a5a6ee3993619c4ddd1d0d004f2ae
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cccfc6f01ca6a15cf846ffdba9f6cd8d0e3d353032a1524b92b53154b87a0b1
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.168458781362007,
5
  "eval_steps": 1000,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -434,6 +434,90 @@
434
  "learning_rate": 0.00017923655879272394,
435
  "loss": 0.0743,
436
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
  ],
439
  "logging_steps": 100,
@@ -445,15 +529,15 @@
445
  "TrainerControl": {
446
  "args": {
447
  "should_epoch_stop": false,
448
- "should_evaluate": true,
449
  "should_log": false,
450
  "should_save": true,
451
- "should_training_stop": false
452
  },
453
  "attributes": {}
454
  }
455
  },
456
- "total_flos": 8.490208809019392e+16,
457
  "train_batch_size": 12,
458
  "trial_name": null,
459
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6021505376344085,
5
  "eval_steps": 1000,
6
+ "global_step": 7200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
434
  "learning_rate": 0.00017923655879272394,
435
  "loss": 0.0743,
436
  "step": 6000
437
+ },
438
+ {
439
+ "epoch": 7.287933094384707,
440
+ "grad_norm": 0.06787109375,
441
+ "learning_rate": 0.00015135574250524898,
442
+ "loss": 0.076,
443
+ "step": 6100
444
+ },
445
+ {
446
+ "epoch": 7.407407407407407,
447
+ "grad_norm": 0.06201171875,
448
+ "learning_rate": 0.00012565338385541792,
449
+ "loss": 0.0736,
450
+ "step": 6200
451
+ },
452
+ {
453
+ "epoch": 7.526881720430108,
454
+ "grad_norm": 0.047607421875,
455
+ "learning_rate": 0.00010219546042925842,
456
+ "loss": 0.0728,
457
+ "step": 6300
458
+ },
459
+ {
460
+ "epoch": 7.646356033452808,
461
+ "grad_norm": 0.045654296875,
462
+ "learning_rate": 8.10421883797694e-05,
463
+ "loss": 0.0725,
464
+ "step": 6400
465
+ },
466
+ {
467
+ "epoch": 7.765830346475508,
468
+ "grad_norm": 0.05126953125,
469
+ "learning_rate": 6.22478678529197e-05,
470
+ "loss": 0.0717,
471
+ "step": 6500
472
+ },
473
+ {
474
+ "epoch": 7.885304659498208,
475
+ "grad_norm": 0.046875,
476
+ "learning_rate": 4.5860743599951184e-05,
477
+ "loss": 0.0726,
478
+ "step": 6600
479
+ },
480
+ {
481
+ "epoch": 1.004778972520908,
482
+ "grad_norm": 0.050048828125,
483
+ "learning_rate": 3.192288113379582e-05,
484
+ "loss": 0.0732,
485
+ "step": 6700
486
+ },
487
+ {
488
+ "epoch": 1.124253285543608,
489
+ "grad_norm": 0.0498046875,
490
+ "learning_rate": 2.0470058747505516e-05,
491
+ "loss": 0.0724,
492
+ "step": 6800
493
+ },
494
+ {
495
+ "epoch": 1.2437275985663083,
496
+ "grad_norm": 0.050537109375,
497
+ "learning_rate": 1.153167567188862e-05,
498
+ "loss": 0.0748,
499
+ "step": 6900
500
+ },
501
+ {
502
+ "epoch": 1.3632019115890084,
503
+ "grad_norm": 0.04443359375,
504
+ "learning_rate": 5.1306766081048454e-06,
505
+ "loss": 0.0745,
506
+ "step": 7000
507
+ },
508
+ {
509
+ "epoch": 1.4826762246117084,
510
+ "grad_norm": 0.050048828125,
511
+ "learning_rate": 1.2834928289472415e-06,
512
+ "loss": 0.0717,
513
+ "step": 7100
514
+ },
515
+ {
516
+ "epoch": 1.6021505376344085,
517
+ "grad_norm": 0.0458984375,
518
+ "learning_rate": 0.0,
519
+ "loss": 0.073,
520
+ "step": 7200
521
  }
522
  ],
523
  "logging_steps": 100,
 
529
  "TrainerControl": {
530
  "args": {
531
  "should_epoch_stop": false,
532
+ "should_evaluate": false,
533
  "should_log": false,
534
  "should_save": true,
535
+ "should_training_stop": true
536
  },
537
  "attributes": {}
538
  }
539
  },
540
+ "total_flos": 1.018481533644288e+17,
541
  "train_batch_size": 12,
542
  "trial_name": null,
543
  "trial_params": null