End of training
Browse files- all_results.json +10 -10
- eval_results.json +6 -6
- runs/Dec13_12-14-07_d7f040c448a8/events.out.tfevents.1670934730.d7f040c448a8.15037.2 +3 -0
- train.log +6 -0
- train_results.json +5 -5
- trainer_state.json +163 -13
    	
        all_results.json
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
                "epoch": 0. | 
| 3 | 
            -
                "eval_loss": 0. | 
| 4 | 
            -
                "eval_runtime": 16. | 
| 5 | 
             
                "eval_samples": 64,
         | 
| 6 | 
            -
                "eval_samples_per_second": 3. | 
| 7 | 
            -
                "eval_steps_per_second": 0. | 
| 8 | 
            -
                "eval_wer":  | 
| 9 | 
            -
                "train_loss": 0. | 
| 10 | 
            -
                "train_runtime":  | 
| 11 | 
            -
                "train_samples_per_second":  | 
| 12 | 
            -
                "train_steps_per_second": 0. | 
| 13 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
                "epoch": 0.33,
         | 
| 3 | 
            +
                "eval_loss": 0.5074095726013184,
         | 
| 4 | 
            +
                "eval_runtime": 16.8249,
         | 
| 5 | 
             
                "eval_samples": 64,
         | 
| 6 | 
            +
                "eval_samples_per_second": 3.804,
         | 
| 7 | 
            +
                "eval_steps_per_second": 0.119,
         | 
| 8 | 
            +
                "eval_wer": 52.197802197802204,
         | 
| 9 | 
            +
                "train_loss": 0.10702953418095906,
         | 
| 10 | 
            +
                "train_runtime": 833.6007,
         | 
| 11 | 
            +
                "train_samples_per_second": 11.516,
         | 
| 12 | 
            +
                "train_steps_per_second": 0.36
         | 
| 13 | 
             
            }
         | 
    	
        eval_results.json
    CHANGED
    
    | @@ -1,9 +1,9 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
                "epoch": 0. | 
| 3 | 
            -
                "eval_loss": 0. | 
| 4 | 
            -
                "eval_runtime": 16. | 
| 5 | 
             
                "eval_samples": 64,
         | 
| 6 | 
            -
                "eval_samples_per_second": 3. | 
| 7 | 
            -
                "eval_steps_per_second": 0. | 
| 8 | 
            -
                "eval_wer":  | 
| 9 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
                "epoch": 0.33,
         | 
| 3 | 
            +
                "eval_loss": 0.5074095726013184,
         | 
| 4 | 
            +
                "eval_runtime": 16.8249,
         | 
| 5 | 
             
                "eval_samples": 64,
         | 
| 6 | 
            +
                "eval_samples_per_second": 3.804,
         | 
| 7 | 
            +
                "eval_steps_per_second": 0.119,
         | 
| 8 | 
            +
                "eval_wer": 52.197802197802204
         | 
| 9 | 
             
            }
         | 
    	
        runs/Dec13_12-14-07_d7f040c448a8/events.out.tfevents.1670934730.d7f040c448a8.15037.2
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:621b7a2306709878e0351fd168351ff2ec7c15bf93824a12f4627acf91ccc932
         | 
| 3 | 
            +
            size 358
         | 
    	
        train.log
    CHANGED
    
    | @@ -118,3 +118,9 @@ | |
| 118 | 
             
            {'loss': 0.4407, 'learning_rate': 5.033333333333333e-06, 'epoch': 0.33}
         | 
| 119 | 
             
            {'eval_loss': 0.5046072602272034, 'eval_wer': 53.11355311355312, 'eval_runtime': 17.9261, 'eval_samples_per_second': 3.57, 'eval_steps_per_second': 0.112, 'epoch': 0.33}
         | 
| 120 | 
             
            {'train_runtime': 833.6007, 'train_samples_per_second': 11.516, 'train_steps_per_second': 0.36, 'train_loss': 0.10702953418095906, 'epoch': 0.33}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 118 | 
             
            {'loss': 0.4407, 'learning_rate': 5.033333333333333e-06, 'epoch': 0.33}
         | 
| 119 | 
             
            {'eval_loss': 0.5046072602272034, 'eval_wer': 53.11355311355312, 'eval_runtime': 17.9261, 'eval_samples_per_second': 3.57, 'eval_steps_per_second': 0.112, 'epoch': 0.33}
         | 
| 120 | 
             
            {'train_runtime': 833.6007, 'train_samples_per_second': 11.516, 'train_steps_per_second': 0.36, 'train_loss': 0.10702953418095906, 'epoch': 0.33}
         | 
| 121 | 
            +
            ***** train metrics *****
         | 
| 122 | 
            +
              epoch                    =       0.33
         | 
| 123 | 
            +
              train_loss               =      0.107
         | 
| 124 | 
            +
              train_runtime            = 0:13:53.60
         | 
| 125 | 
            +
              train_samples_per_second =     11.516
         | 
| 126 | 
            +
              train_steps_per_second   =       0.36
         | 
    	
        train_results.json
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
                "epoch": 0. | 
| 3 | 
            -
                "train_loss": 0. | 
| 4 | 
            -
                "train_runtime":  | 
| 5 | 
            -
                "train_samples_per_second":  | 
| 6 | 
            -
                "train_steps_per_second": 0. | 
| 7 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
                "epoch": 0.33,
         | 
| 3 | 
            +
                "train_loss": 0.10702953418095906,
         | 
| 4 | 
            +
                "train_runtime": 833.6007,
         | 
| 5 | 
            +
                "train_samples_per_second": 11.516,
         | 
| 6 | 
            +
                "train_steps_per_second": 0.36
         | 
| 7 | 
             
            }
         | 
    	
        trainer_state.json
    CHANGED
    
    | @@ -1,8 +1,8 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              "best_metric":  | 
| 3 | 
            -
              "best_model_checkpoint": "./checkpoint- | 
| 4 | 
            -
              "epoch": 0. | 
| 5 | 
            -
              "global_step":  | 
| 6 | 
             
              "is_hyper_param_search": false,
         | 
| 7 | 
             
              "is_local_process_zero": true,
         | 
| 8 | 
             
              "is_world_process_zero": true,
         | 
| @@ -317,18 +317,168 @@ | |
| 317 | 
             
                  "step": 200
         | 
| 318 | 
             
                },
         | 
| 319 | 
             
                {
         | 
| 320 | 
            -
                  "epoch": 0. | 
| 321 | 
            -
                  " | 
| 322 | 
            -
                  " | 
| 323 | 
            -
                  " | 
| 324 | 
            -
             | 
| 325 | 
            -
             | 
| 326 | 
            -
                  " | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 327 | 
             
                }
         | 
| 328 | 
             
              ],
         | 
| 329 | 
            -
              "max_steps":  | 
| 330 | 
             
              "num_train_epochs": 9223372036854775807,
         | 
| 331 | 
            -
              "total_flos":  | 
| 332 | 
             
              "trial_name": null,
         | 
| 333 | 
             
              "trial_params": null
         | 
| 334 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
              "best_metric": 52.197802197802204,
         | 
| 3 | 
            +
              "best_model_checkpoint": "./checkpoint-280",
         | 
| 4 | 
            +
              "epoch": 0.3333333333333333,
         | 
| 5 | 
            +
              "global_step": 300,
         | 
| 6 | 
             
              "is_hyper_param_search": false,
         | 
| 7 | 
             
              "is_local_process_zero": true,
         | 
| 8 | 
             
              "is_world_process_zero": true,
         | 
|  | |
| 317 | 
             
                  "step": 200
         | 
| 318 | 
             
                },
         | 
| 319 | 
             
                {
         | 
| 320 | 
            +
                  "epoch": 0.03,
         | 
| 321 | 
            +
                  "learning_rate": 8.033333333333335e-06,
         | 
| 322 | 
            +
                  "loss": 0.1922,
         | 
| 323 | 
            +
                  "step": 210
         | 
| 324 | 
            +
                },
         | 
| 325 | 
            +
                {
         | 
| 326 | 
            +
                  "epoch": 0.03,
         | 
| 327 | 
            +
                  "eval_loss": 0.5239479541778564,
         | 
| 328 | 
            +
                  "eval_runtime": 16.5572,
         | 
| 329 | 
            +
                  "eval_samples_per_second": 3.865,
         | 
| 330 | 
            +
                  "eval_steps_per_second": 0.121,
         | 
| 331 | 
            +
                  "eval_wer": 55.494505494505496,
         | 
| 332 | 
            +
                  "step": 210
         | 
| 333 | 
            +
                },
         | 
| 334 | 
            +
                {
         | 
| 335 | 
            +
                  "epoch": 0.07,
         | 
| 336 | 
            +
                  "learning_rate": 7.7e-06,
         | 
| 337 | 
            +
                  "loss": 0.2229,
         | 
| 338 | 
            +
                  "step": 220
         | 
| 339 | 
            +
                },
         | 
| 340 | 
            +
                {
         | 
| 341 | 
            +
                  "epoch": 0.07,
         | 
| 342 | 
            +
                  "eval_loss": 0.5312361717224121,
         | 
| 343 | 
            +
                  "eval_runtime": 17.8694,
         | 
| 344 | 
            +
                  "eval_samples_per_second": 3.582,
         | 
| 345 | 
            +
                  "eval_steps_per_second": 0.112,
         | 
| 346 | 
            +
                  "eval_wer": 54.395604395604394,
         | 
| 347 | 
            +
                  "step": 220
         | 
| 348 | 
            +
                },
         | 
| 349 | 
            +
                {
         | 
| 350 | 
            +
                  "epoch": 0.1,
         | 
| 351 | 
            +
                  "learning_rate": 7.3666666666666676e-06,
         | 
| 352 | 
            +
                  "loss": 0.1976,
         | 
| 353 | 
            +
                  "step": 230
         | 
| 354 | 
            +
                },
         | 
| 355 | 
            +
                {
         | 
| 356 | 
            +
                  "epoch": 0.1,
         | 
| 357 | 
            +
                  "eval_loss": 0.5302589535713196,
         | 
| 358 | 
            +
                  "eval_runtime": 17.0912,
         | 
| 359 | 
            +
                  "eval_samples_per_second": 3.745,
         | 
| 360 | 
            +
                  "eval_steps_per_second": 0.117,
         | 
| 361 | 
            +
                  "eval_wer": 54.02930402930403,
         | 
| 362 | 
            +
                  "step": 230
         | 
| 363 | 
            +
                },
         | 
| 364 | 
            +
                {
         | 
| 365 | 
            +
                  "epoch": 0.13,
         | 
| 366 | 
            +
                  "learning_rate": 7.033333333333334e-06,
         | 
| 367 | 
            +
                  "loss": 0.2823,
         | 
| 368 | 
            +
                  "step": 240
         | 
| 369 | 
            +
                },
         | 
| 370 | 
            +
                {
         | 
| 371 | 
            +
                  "epoch": 0.13,
         | 
| 372 | 
            +
                  "eval_loss": 0.5269189476966858,
         | 
| 373 | 
            +
                  "eval_runtime": 17.9989,
         | 
| 374 | 
            +
                  "eval_samples_per_second": 3.556,
         | 
| 375 | 
            +
                  "eval_steps_per_second": 0.111,
         | 
| 376 | 
            +
                  "eval_wer": 54.02930402930403,
         | 
| 377 | 
            +
                  "step": 240
         | 
| 378 | 
            +
                },
         | 
| 379 | 
            +
                {
         | 
| 380 | 
            +
                  "epoch": 0.17,
         | 
| 381 | 
            +
                  "learning_rate": 6.700000000000001e-06,
         | 
| 382 | 
            +
                  "loss": 0.2265,
         | 
| 383 | 
            +
                  "step": 250
         | 
| 384 | 
            +
                },
         | 
| 385 | 
            +
                {
         | 
| 386 | 
            +
                  "epoch": 0.17,
         | 
| 387 | 
            +
                  "eval_loss": 0.5312862992286682,
         | 
| 388 | 
            +
                  "eval_runtime": 18.4593,
         | 
| 389 | 
            +
                  "eval_samples_per_second": 3.467,
         | 
| 390 | 
            +
                  "eval_steps_per_second": 0.108,
         | 
| 391 | 
            +
                  "eval_wer": 55.67765567765568,
         | 
| 392 | 
            +
                  "step": 250
         | 
| 393 | 
            +
                },
         | 
| 394 | 
            +
                {
         | 
| 395 | 
            +
                  "epoch": 0.2,
         | 
| 396 | 
            +
                  "learning_rate": 6.366666666666668e-06,
         | 
| 397 | 
            +
                  "loss": 0.3728,
         | 
| 398 | 
            +
                  "step": 260
         | 
| 399 | 
            +
                },
         | 
| 400 | 
            +
                {
         | 
| 401 | 
            +
                  "epoch": 0.2,
         | 
| 402 | 
            +
                  "eval_loss": 0.5128015279769897,
         | 
| 403 | 
            +
                  "eval_runtime": 18.2249,
         | 
| 404 | 
            +
                  "eval_samples_per_second": 3.512,
         | 
| 405 | 
            +
                  "eval_steps_per_second": 0.11,
         | 
| 406 | 
            +
                  "eval_wer": 53.47985347985348,
         | 
| 407 | 
            +
                  "step": 260
         | 
| 408 | 
            +
                },
         | 
| 409 | 
            +
                {
         | 
| 410 | 
            +
                  "epoch": 0.23,
         | 
| 411 | 
            +
                  "learning_rate": 6.033333333333335e-06,
         | 
| 412 | 
            +
                  "loss": 0.3738,
         | 
| 413 | 
            +
                  "step": 270
         | 
| 414 | 
            +
                },
         | 
| 415 | 
            +
                {
         | 
| 416 | 
            +
                  "epoch": 0.23,
         | 
| 417 | 
            +
                  "eval_loss": 0.5025143623352051,
         | 
| 418 | 
            +
                  "eval_runtime": 17.2543,
         | 
| 419 | 
            +
                  "eval_samples_per_second": 3.709,
         | 
| 420 | 
            +
                  "eval_steps_per_second": 0.116,
         | 
| 421 | 
            +
                  "eval_wer": 52.74725274725275,
         | 
| 422 | 
            +
                  "step": 270
         | 
| 423 | 
            +
                },
         | 
| 424 | 
            +
                {
         | 
| 425 | 
            +
                  "epoch": 0.27,
         | 
| 426 | 
            +
                  "learning_rate": 5.7e-06,
         | 
| 427 | 
            +
                  "loss": 0.488,
         | 
| 428 | 
            +
                  "step": 280
         | 
| 429 | 
            +
                },
         | 
| 430 | 
            +
                {
         | 
| 431 | 
            +
                  "epoch": 0.27,
         | 
| 432 | 
            +
                  "eval_loss": 0.5074095726013184,
         | 
| 433 | 
            +
                  "eval_runtime": 18.4062,
         | 
| 434 | 
            +
                  "eval_samples_per_second": 3.477,
         | 
| 435 | 
            +
                  "eval_steps_per_second": 0.109,
         | 
| 436 | 
            +
                  "eval_wer": 52.197802197802204,
         | 
| 437 | 
            +
                  "step": 280
         | 
| 438 | 
            +
                },
         | 
| 439 | 
            +
                {
         | 
| 440 | 
            +
                  "epoch": 0.3,
         | 
| 441 | 
            +
                  "learning_rate": 5.366666666666666e-06,
         | 
| 442 | 
            +
                  "loss": 0.4142,
         | 
| 443 | 
            +
                  "step": 290
         | 
| 444 | 
            +
                },
         | 
| 445 | 
            +
                {
         | 
| 446 | 
            +
                  "epoch": 0.3,
         | 
| 447 | 
            +
                  "eval_loss": 0.5057792663574219,
         | 
| 448 | 
            +
                  "eval_runtime": 16.5894,
         | 
| 449 | 
            +
                  "eval_samples_per_second": 3.858,
         | 
| 450 | 
            +
                  "eval_steps_per_second": 0.121,
         | 
| 451 | 
            +
                  "eval_wer": 52.56410256410257,
         | 
| 452 | 
            +
                  "step": 290
         | 
| 453 | 
            +
                },
         | 
| 454 | 
            +
                {
         | 
| 455 | 
            +
                  "epoch": 0.33,
         | 
| 456 | 
            +
                  "learning_rate": 5.033333333333333e-06,
         | 
| 457 | 
            +
                  "loss": 0.4407,
         | 
| 458 | 
            +
                  "step": 300
         | 
| 459 | 
            +
                },
         | 
| 460 | 
            +
                {
         | 
| 461 | 
            +
                  "epoch": 0.33,
         | 
| 462 | 
            +
                  "eval_loss": 0.5046072602272034,
         | 
| 463 | 
            +
                  "eval_runtime": 17.9261,
         | 
| 464 | 
            +
                  "eval_samples_per_second": 3.57,
         | 
| 465 | 
            +
                  "eval_steps_per_second": 0.112,
         | 
| 466 | 
            +
                  "eval_wer": 53.11355311355312,
         | 
| 467 | 
            +
                  "step": 300
         | 
| 468 | 
            +
                },
         | 
| 469 | 
            +
                {
         | 
| 470 | 
            +
                  "epoch": 0.33,
         | 
| 471 | 
            +
                  "step": 300,
         | 
| 472 | 
            +
                  "total_flos": 2.36341297152e+17,
         | 
| 473 | 
            +
                  "train_loss": 0.10702953418095906,
         | 
| 474 | 
            +
                  "train_runtime": 833.6007,
         | 
| 475 | 
            +
                  "train_samples_per_second": 11.516,
         | 
| 476 | 
            +
                  "train_steps_per_second": 0.36
         | 
| 477 | 
             
                }
         | 
| 478 | 
             
              ],
         | 
| 479 | 
            +
              "max_steps": 300,
         | 
| 480 | 
             
              "num_train_epochs": 9223372036854775807,
         | 
| 481 | 
            +
              "total_flos": 2.36341297152e+17,
         | 
| 482 | 
             
              "trial_name": null,
         | 
| 483 | 
             
              "trial_params": null
         | 
| 484 | 
             
            }
         |