File size: 2,530 Bytes
14ace45
 
 
6838f76
14ace45
6838f76
14ace45
 
 
 
a12f0d6
 
4747369
 
 
 
 
 
 
 
 
6838f76
a12f0d6
 
 
4747369
 
 
 
 
 
 
 
 
6838f76
a12f0d6
7a398d2
 
4747369
 
 
 
 
 
 
 
 
6838f76
a12f0d6
 
 
4747369
 
 
 
 
 
 
 
 
6838f76
a12f0d6
 
 
4747369
 
 
 
 
 
 
 
 
6838f76
a12f0d6
 
6838f76
 
4747369
 
 
 
 
14ace45
 
 
6838f76
14ace45
6838f76
14ace45
4747369
6838f76
14ace45
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 5.0,
  "eval_steps": 500,
  "global_step": 345,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "eval_gen_len": 95.44927536231884,
      "eval_loss": 0.09570997208356857,
      "eval_rouge1": 72.6601,
      "eval_rouge2": 71.6824,
      "eval_rougeL": 72.6858,
      "eval_rougeLsum": 72.4668,
      "eval_runtime": 12.0064,
      "eval_samples_per_second": 5.747,
      "eval_steps_per_second": 1.499,
      "step": 69
    },
    {
      "epoch": 2.0,
      "eval_gen_len": 92.01449275362319,
      "eval_loss": 0.13447459042072296,
      "eval_rouge1": 75.0063,
      "eval_rouge2": 74.0782,
      "eval_rougeL": 75.0597,
      "eval_rougeLsum": 74.8943,
      "eval_runtime": 11.945,
      "eval_samples_per_second": 5.776,
      "eval_steps_per_second": 1.507,
      "step": 138
    },
    {
      "epoch": 3.0,
      "eval_gen_len": 85.46376811594203,
      "eval_loss": 0.14119356870651245,
      "eval_rouge1": 75.3012,
      "eval_rouge2": 74.5492,
      "eval_rougeL": 75.4246,
      "eval_rougeLsum": 75.324,
      "eval_runtime": 10.9494,
      "eval_samples_per_second": 6.302,
      "eval_steps_per_second": 1.644,
      "step": 207
    },
    {
      "epoch": 4.0,
      "eval_gen_len": 85.04347826086956,
      "eval_loss": 0.10889122635126114,
      "eval_rouge1": 74.8426,
      "eval_rouge2": 74.0317,
      "eval_rougeL": 74.8939,
      "eval_rougeLsum": 74.8128,
      "eval_runtime": 11.2109,
      "eval_samples_per_second": 6.155,
      "eval_steps_per_second": 1.606,
      "step": 276
    },
    {
      "epoch": 5.0,
      "eval_gen_len": 85.31884057971014,
      "eval_loss": 0.12416736036539078,
      "eval_rouge1": 75.3806,
      "eval_rouge2": 74.6735,
      "eval_rougeL": 75.5866,
      "eval_rougeLsum": 75.5446,
      "eval_runtime": 11.151,
      "eval_samples_per_second": 6.188,
      "eval_steps_per_second": 1.614,
      "step": 345
    },
    {
      "epoch": 5.0,
      "step": 345,
      "total_flos": 2990604350914560.0,
      "train_loss": 0.036720043679942256,
      "train_runtime": 132.663,
      "train_samples_per_second": 10.402,
      "train_steps_per_second": 2.601
    }
  ],
  "logging_steps": 500,
  "max_steps": 345,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "total_flos": 2990604350914560.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}