File size: 2,189 Bytes
6890cdd
 
 
 
 
 
 
 
 
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
 
31c4477
 
 
 
 
6890cdd
 
 
 
31c4477
 
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
31c4477
6890cdd
31c4477
6890cdd
 
 
 
31c4477
 
 
 
 
6890cdd
 
 
 
31c4477
 
6890cdd
 
 
 
 
 
 
31c4477
6890cdd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 1058,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.19,
      "learning_rate": 3e-05,
      "loss": 1.7194,
      "step": 100
    },
    {
      "epoch": 0.38,
      "learning_rate": 3e-05,
      "loss": 1.4369,
      "step": 200
    },
    {
      "epoch": 0.57,
      "learning_rate": 3e-05,
      "loss": 1.4257,
      "step": 300
    },
    {
      "epoch": 0.76,
      "learning_rate": 3e-05,
      "loss": 1.3947,
      "step": 400
    },
    {
      "epoch": 0.95,
      "learning_rate": 3e-05,
      "loss": 1.3948,
      "step": 500
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.6132307692307692,
      "eval_loss": 1.3087366819381714,
      "eval_runtime": 4.3553,
      "eval_samples_per_second": 114.803,
      "eval_steps_per_second": 14.465,
      "step": 529
    },
    {
      "epoch": 1.0,
      "eval_exact_match": 24.4,
      "eval_f1": 37.614420394420414,
      "step": 529
    },
    {
      "epoch": 1.13,
      "learning_rate": 3e-05,
      "loss": 1.3927,
      "step": 600
    },
    {
      "epoch": 1.32,
      "learning_rate": 3e-05,
      "loss": 1.3644,
      "step": 700
    },
    {
      "epoch": 1.51,
      "learning_rate": 3e-05,
      "loss": 1.3791,
      "step": 800
    },
    {
      "epoch": 1.7,
      "learning_rate": 3e-05,
      "loss": 1.3627,
      "step": 900
    },
    {
      "epoch": 1.89,
      "learning_rate": 3e-05,
      "loss": 1.3789,
      "step": 1000
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.6146153846153846,
      "eval_loss": 1.289727807044983,
      "eval_runtime": 4.4281,
      "eval_samples_per_second": 112.915,
      "eval_steps_per_second": 14.227,
      "step": 1058
    },
    {
      "epoch": 2.0,
      "eval_exact_match": 23.2,
      "eval_f1": 35.56775335775336,
      "step": 1058
    }
  ],
  "logging_steps": 100,
  "max_steps": 26450,
  "num_train_epochs": 50,
  "save_steps": 500,
  "total_flos": 1.7853789925434982e+17,
  "trial_name": null,
  "trial_params": null
}