File size: 1,946 Bytes
8d50f4c
008f4bc
 
 
8d50f4c
008f4bc
8d50f4c
 
 
 
 
0879b49
008f4bc
 
 
 
8d50f4c
 
0879b49
008f4bc
 
 
 
8d50f4c
 
5b82d2a
008f4bc
 
 
 
5b82d2a
 
0879b49
008f4bc
 
 
 
5b82d2a
 
0879b49
008f4bc
 
 
 
5b82d2a
 
0879b49
3838257
008f4bc
 
 
 
 
b349a97
 
008f4bc
 
 
 
 
b349a97
 
008f4bc
 
 
 
 
 
 
8d50f4c
 
008f4bc
 
8d50f4c
008f4bc
8d50f4c
008f4bc
 
8d50f4c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{
  "best_metric": 1.1399264335632324,
  "best_model_checkpoint": "Action_model/checkpoint-100",
  "epoch": 1.0,
  "eval_steps": 100,
  "global_step": 134,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.15,
      "grad_norm": 1.509538173675537,
      "learning_rate": 8.507462686567164e-05,
      "loss": 2.1872,
      "step": 20
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6843363046646118,
      "learning_rate": 7.014925373134329e-05,
      "loss": 1.872,
      "step": 40
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.891447901725769,
      "learning_rate": 5.5223880597014934e-05,
      "loss": 1.5872,
      "step": 60
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9932177066802979,
      "learning_rate": 4.029850746268657e-05,
      "loss": 1.3864,
      "step": 80
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7128252983093262,
      "learning_rate": 2.537313432835821e-05,
      "loss": 1.2948,
      "step": 100
    },
    {
      "epoch": 0.75,
      "eval_accuracy": 0.789103690685413,
      "eval_loss": 1.1399264335632324,
      "eval_runtime": 11.9209,
      "eval_samples_per_second": 47.731,
      "eval_steps_per_second": 6.04,
      "step": 100
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.182009696960449,
      "learning_rate": 1.0447761194029851e-05,
      "loss": 1.2271,
      "step": 120
    },
    {
      "epoch": 1.0,
      "step": 134,
      "total_flos": 3.3230947683690086e+17,
      "train_loss": 1.549544946471257,
      "train_runtime": 145.4848,
      "train_samples_per_second": 29.474,
      "train_steps_per_second": 0.921
    }
  ],
  "logging_steps": 20,
  "max_steps": 134,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 3.3230947683690086e+17,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}