|
{ |
|
"best_metric": 0.258684903383255, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-combined-model/checkpoint-2200", |
|
"epoch": 1.2893243940175347, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05157297576070139, |
|
"grad_norm": 3.681674003601074, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 6.7553, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05157297576070139, |
|
"eval_loss": 0.8774181604385376, |
|
"eval_runtime": 57.8286, |
|
"eval_samples_per_second": 16.877, |
|
"eval_steps_per_second": 2.11, |
|
"eval_wer": 0.847627340008707, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10314595152140278, |
|
"grad_norm": 2.3965742588043213, |
|
"learning_rate": 0.00029950404684002066, |
|
"loss": 0.5648, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10314595152140278, |
|
"eval_loss": 0.34078654646873474, |
|
"eval_runtime": 57.7359, |
|
"eval_samples_per_second": 16.905, |
|
"eval_steps_per_second": 2.113, |
|
"eval_wer": 0.5031562908141054, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15471892728210418, |
|
"grad_norm": 12.905903816223145, |
|
"learning_rate": 0.00029898742896504216, |
|
"loss": 0.4827, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15471892728210418, |
|
"eval_loss": 0.3261478543281555, |
|
"eval_runtime": 57.6285, |
|
"eval_samples_per_second": 16.936, |
|
"eval_steps_per_second": 2.117, |
|
"eval_wer": 0.4930343926861123, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20629190304280556, |
|
"grad_norm": 1.9552323818206787, |
|
"learning_rate": 0.00029847081109006366, |
|
"loss": 0.4321, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20629190304280556, |
|
"eval_loss": 0.303559273481369, |
|
"eval_runtime": 57.7126, |
|
"eval_samples_per_second": 16.911, |
|
"eval_steps_per_second": 2.114, |
|
"eval_wer": 0.48541575968654765, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25786487880350695, |
|
"grad_norm": 2.4878430366516113, |
|
"learning_rate": 0.0002979541932150852, |
|
"loss": 0.4168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25786487880350695, |
|
"eval_loss": 0.2988507151603699, |
|
"eval_runtime": 57.9159, |
|
"eval_samples_per_second": 16.852, |
|
"eval_steps_per_second": 2.107, |
|
"eval_wer": 0.4783413147583805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30943785456420836, |
|
"grad_norm": 3.2817885875701904, |
|
"learning_rate": 0.0002974375753401067, |
|
"loss": 0.3965, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30943785456420836, |
|
"eval_loss": 0.2906985282897949, |
|
"eval_runtime": 57.5951, |
|
"eval_samples_per_second": 16.946, |
|
"eval_steps_per_second": 2.118, |
|
"eval_wer": 0.45134958641706574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36101083032490977, |
|
"grad_norm": 1.416237235069275, |
|
"learning_rate": 0.0002969209574651283, |
|
"loss": 0.4199, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36101083032490977, |
|
"eval_loss": 0.29259440302848816, |
|
"eval_runtime": 57.5412, |
|
"eval_samples_per_second": 16.962, |
|
"eval_steps_per_second": 2.12, |
|
"eval_wer": 0.4718110579016108, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4125838060856111, |
|
"grad_norm": 2.3475992679595947, |
|
"learning_rate": 0.0002964043395901498, |
|
"loss": 0.3975, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4125838060856111, |
|
"eval_loss": 0.288595974445343, |
|
"eval_runtime": 57.3013, |
|
"eval_samples_per_second": 17.033, |
|
"eval_steps_per_second": 2.129, |
|
"eval_wer": 0.44590770570309096, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46415678184631254, |
|
"grad_norm": 1.5925949811935425, |
|
"learning_rate": 0.00029588772171517134, |
|
"loss": 0.3839, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.46415678184631254, |
|
"eval_loss": 0.29075172543525696, |
|
"eval_runtime": 57.8589, |
|
"eval_samples_per_second": 16.869, |
|
"eval_steps_per_second": 2.109, |
|
"eval_wer": 0.4722464083587288, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5157297576070139, |
|
"grad_norm": 2.456458568572998, |
|
"learning_rate": 0.00029537110384019285, |
|
"loss": 0.3673, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5157297576070139, |
|
"eval_loss": 0.2836114764213562, |
|
"eval_runtime": 57.7048, |
|
"eval_samples_per_second": 16.914, |
|
"eval_steps_per_second": 2.114, |
|
"eval_wer": 0.44449281671745755, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5673027333677153, |
|
"grad_norm": 2.249444007873535, |
|
"learning_rate": 0.00029485448596521435, |
|
"loss": 0.3777, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5673027333677153, |
|
"eval_loss": 0.27841705083847046, |
|
"eval_runtime": 57.4013, |
|
"eval_samples_per_second": 17.003, |
|
"eval_steps_per_second": 2.125, |
|
"eval_wer": 0.4365476708750544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6188757091284167, |
|
"grad_norm": 1.6211791038513184, |
|
"learning_rate": 0.0002943378680902359, |
|
"loss": 0.3764, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6188757091284167, |
|
"eval_loss": 0.2790738344192505, |
|
"eval_runtime": 57.3594, |
|
"eval_samples_per_second": 17.016, |
|
"eval_steps_per_second": 2.127, |
|
"eval_wer": 0.4278406617326948, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6704486848891181, |
|
"grad_norm": 2.047067165374756, |
|
"learning_rate": 0.0002938212502152574, |
|
"loss": 0.3918, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6704486848891181, |
|
"eval_loss": 0.27573078870773315, |
|
"eval_runtime": 57.7956, |
|
"eval_samples_per_second": 16.887, |
|
"eval_steps_per_second": 2.111, |
|
"eval_wer": 0.42511972137570747, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7220216606498195, |
|
"grad_norm": 1.4619171619415283, |
|
"learning_rate": 0.00029330979851902874, |
|
"loss": 0.3669, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7220216606498195, |
|
"eval_loss": 0.2721499502658844, |
|
"eval_runtime": 57.7153, |
|
"eval_samples_per_second": 16.911, |
|
"eval_steps_per_second": 2.114, |
|
"eval_wer": 0.41815411406181974, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7735946364105208, |
|
"grad_norm": 1.6276806592941284, |
|
"learning_rate": 0.0002927931806440503, |
|
"loss": 0.377, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7735946364105208, |
|
"eval_loss": 0.27284711599349976, |
|
"eval_runtime": 57.3961, |
|
"eval_samples_per_second": 17.005, |
|
"eval_steps_per_second": 2.126, |
|
"eval_wer": 0.47572921201567264, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8251676121712223, |
|
"grad_norm": 3.681490659713745, |
|
"learning_rate": 0.0002922765627690718, |
|
"loss": 0.4174, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8251676121712223, |
|
"eval_loss": 0.2684435546398163, |
|
"eval_runtime": 57.3852, |
|
"eval_samples_per_second": 17.008, |
|
"eval_steps_per_second": 2.126, |
|
"eval_wer": 0.4242490204614715, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8767405879319237, |
|
"grad_norm": 1.2958589792251587, |
|
"learning_rate": 0.0002917599448940933, |
|
"loss": 0.3641, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8767405879319237, |
|
"eval_loss": 0.2649287283420563, |
|
"eval_runtime": 57.8551, |
|
"eval_samples_per_second": 16.87, |
|
"eval_steps_per_second": 2.109, |
|
"eval_wer": 0.4194601654331737, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9283135636926251, |
|
"grad_norm": 1.8431603908538818, |
|
"learning_rate": 0.00029124332701911486, |
|
"loss": 0.3882, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9283135636926251, |
|
"eval_loss": 0.2646693289279938, |
|
"eval_runtime": 57.7373, |
|
"eval_samples_per_second": 16.904, |
|
"eval_steps_per_second": 2.113, |
|
"eval_wer": 0.41249455811928604, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9798865394533265, |
|
"grad_norm": 2.4119603633880615, |
|
"learning_rate": 0.00029072670914413636, |
|
"loss": 0.3861, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9798865394533265, |
|
"eval_loss": 0.2668125033378601, |
|
"eval_runtime": 57.5111, |
|
"eval_samples_per_second": 16.971, |
|
"eval_steps_per_second": 2.121, |
|
"eval_wer": 0.44253373966042664, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0314595152140278, |
|
"grad_norm": 1.174663782119751, |
|
"learning_rate": 0.00029021009126915787, |
|
"loss": 0.3647, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0314595152140278, |
|
"eval_loss": 0.26750749349594116, |
|
"eval_runtime": 57.5411, |
|
"eval_samples_per_second": 16.962, |
|
"eval_steps_per_second": 2.12, |
|
"eval_wer": 0.42457553330430997, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0830324909747293, |
|
"grad_norm": 0.9301519393920898, |
|
"learning_rate": 0.0002896934733941794, |
|
"loss": 0.3467, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0830324909747293, |
|
"eval_loss": 0.2629115581512451, |
|
"eval_runtime": 57.7541, |
|
"eval_samples_per_second": 16.899, |
|
"eval_steps_per_second": 2.112, |
|
"eval_wer": 0.40977361776229865, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1346054667354306, |
|
"grad_norm": 0.7093687057495117, |
|
"learning_rate": 0.00028917685551920093, |
|
"loss": 0.3579, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1346054667354306, |
|
"eval_loss": 0.258684903383255, |
|
"eval_runtime": 57.7529, |
|
"eval_samples_per_second": 16.9, |
|
"eval_steps_per_second": 2.112, |
|
"eval_wer": 0.41858946451893775, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1861784424961321, |
|
"grad_norm": 1.027114748954773, |
|
"learning_rate": 0.00028866023764422243, |
|
"loss": 0.3544, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1861784424961321, |
|
"eval_loss": 0.2609032392501831, |
|
"eval_runtime": 57.5179, |
|
"eval_samples_per_second": 16.969, |
|
"eval_steps_per_second": 2.121, |
|
"eval_wer": 0.412712233347845, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2377514182568334, |
|
"grad_norm": 0.9472237825393677, |
|
"learning_rate": 0.000288143619769244, |
|
"loss": 0.35, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2377514182568334, |
|
"eval_loss": 0.2592073678970337, |
|
"eval_runtime": 57.1938, |
|
"eval_samples_per_second": 17.065, |
|
"eval_steps_per_second": 2.133, |
|
"eval_wer": 0.4061819764910753, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2893243940175347, |
|
"grad_norm": 1.6566214561462402, |
|
"learning_rate": 0.00028762700189426555, |
|
"loss": 0.3519, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2893243940175347, |
|
"eval_loss": 0.2590978443622589, |
|
"eval_runtime": 57.823, |
|
"eval_samples_per_second": 16.879, |
|
"eval_steps_per_second": 2.11, |
|
"eval_wer": 0.4134740966478015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2893243940175347, |
|
"step": 2500, |
|
"total_flos": 5.958212987780215e+18, |
|
"train_loss": 0.6475233551025391, |
|
"train_runtime": 3633.5111, |
|
"train_samples_per_second": 64.037, |
|
"train_steps_per_second": 16.009 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 58170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.958212987780215e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|