File size: 1,749 Bytes
4ba0cea
 
 
8e05c1b
 
4ba0cea
 
 
 
 
 
8e05c1b
 
 
4ba0cea
 
 
8e05c1b
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
4ba0cea
 
8e05c1b
 
 
 
b88a575
 
8e05c1b
 
 
 
 
 
 
4ba0cea
 
8e05c1b
4ba0cea
8e05c1b
4ba0cea
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1,
      "learning_rate": 5e-08,
      "loss": 1.3906,
      "step": 5
    },
    {
      "epoch": 0.2,
      "learning_rate": 9.347826086956522e-08,
      "loss": 1.3831,
      "step": 10
    },
    {
      "epoch": 0.3,
      "learning_rate": 8.26086956521739e-08,
      "loss": 1.406,
      "step": 15
    },
    {
      "epoch": 0.4,
      "learning_rate": 7.173913043478261e-08,
      "loss": 1.4041,
      "step": 20
    },
    {
      "epoch": 0.5,
      "learning_rate": 6.086956521739131e-08,
      "loss": 1.3447,
      "step": 25
    },
    {
      "epoch": 0.6,
      "learning_rate": 5e-08,
      "loss": 1.3263,
      "step": 30
    },
    {
      "epoch": 0.7,
      "learning_rate": 3.913043478260869e-08,
      "loss": 1.3201,
      "step": 35
    },
    {
      "epoch": 0.8,
      "learning_rate": 2.8260869565217388e-08,
      "loss": 1.3223,
      "step": 40
    },
    {
      "epoch": 0.9,
      "learning_rate": 1.7391304347826087e-08,
      "loss": 1.2421,
      "step": 45
    },
    {
      "epoch": 1.0,
      "learning_rate": 6.521739130434782e-09,
      "loss": 1.3366,
      "step": 50
    },
    {
      "epoch": 1.0,
      "step": 50,
      "total_flos": 7.8780432384e+16,
      "train_loss": 1.3475898265838624,
      "train_runtime": 419.0595,
      "train_samples_per_second": 7.636,
      "train_steps_per_second": 0.119
    }
  ],
  "max_steps": 50,
  "num_train_epochs": 9223372036854775807,
  "total_flos": 7.8780432384e+16,
  "trial_name": null,
  "trial_params": null
}