File size: 2,067 Bytes
942e89b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.991735537190083,
  "global_step": 420,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.66,
      "learning_rate": 7.619047619047618e-05,
      "loss": 1.2894,
      "step": 100
    },
    {
      "epoch": 1.66,
      "eval_accuracy": 0.4958677589893341,
      "eval_loss": 1.2452681064605713,
      "eval_runtime": 7.5077,
      "eval_samples_per_second": 16.117,
      "eval_steps_per_second": 4.129,
      "step": 100
    },
    {
      "epoch": 3.33,
      "learning_rate": 5.285714285714286e-05,
      "loss": 0.7978,
      "step": 200
    },
    {
      "epoch": 3.33,
      "eval_accuracy": 0.7768595218658447,
      "eval_loss": 0.746031641960144,
      "eval_runtime": 7.1743,
      "eval_samples_per_second": 16.866,
      "eval_steps_per_second": 4.321,
      "step": 200
    },
    {
      "epoch": 4.99,
      "learning_rate": 2.9047619047619052e-05,
      "loss": 0.4372,
      "step": 300
    },
    {
      "epoch": 4.99,
      "eval_accuracy": 0.9008264541625977,
      "eval_loss": 0.3735601603984833,
      "eval_runtime": 7.2008,
      "eval_samples_per_second": 16.804,
      "eval_steps_per_second": 4.305,
      "step": 300
    },
    {
      "epoch": 6.66,
      "learning_rate": 5.4761904761904765e-06,
      "loss": 0.246,
      "step": 400
    },
    {
      "epoch": 6.66,
      "eval_accuracy": 0.9504132270812988,
      "eval_loss": 0.24308893084526062,
      "eval_runtime": 10.0989,
      "eval_samples_per_second": 11.982,
      "eval_steps_per_second": 3.07,
      "step": 400
    },
    {
      "epoch": 6.99,
      "step": 420,
      "total_flos": 1.8354201823649805e+17,
      "train_loss": 0.6664659727187384,
      "train_runtime": 440.1582,
      "train_samples_per_second": 7.681,
      "train_steps_per_second": 0.954
    }
  ],
  "max_steps": 420,
  "num_train_epochs": 7,
  "total_flos": 1.8354201823649805e+17,
  "trial_name": null,
  "trial_params": null
}