File size: 2,659 Bytes
1b707db
7b9f354
1b707db
7b9f354
1b707db
 
 
 
 
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
1b707db
 
 
7b9f354
 
 
 
 
1b707db
 
 
 
 
 
7b9f354
1b707db
 
 
 
 
 
 
 
 
 
 
 
 
7b9f354
1b707db
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
  "best_metric": 0.48496538400650024,
  "best_model_checkpoint": "mikhail-panzo/fil_b32_le5_s4000/checkpoint-500",
  "epoch": 44.44444444444444,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 4.444444444444445,
      "grad_norm": 4.288165092468262,
      "learning_rate": 2.5000000000000004e-07,
      "loss": 0.7989,
      "step": 50
    },
    {
      "epoch": 8.88888888888889,
      "grad_norm": 2.4493463039398193,
      "learning_rate": 5.000000000000001e-07,
      "loss": 0.7701,
      "step": 100
    },
    {
      "epoch": 13.333333333333334,
      "grad_norm": 1.3798907995224,
      "learning_rate": 7.5e-07,
      "loss": 0.7374,
      "step": 150
    },
    {
      "epoch": 17.77777777777778,
      "grad_norm": 2.6173877716064453,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 0.7101,
      "step": 200
    },
    {
      "epoch": 22.22222222222222,
      "grad_norm": 1.3756701946258545,
      "learning_rate": 1.25e-06,
      "loss": 0.6887,
      "step": 250
    },
    {
      "epoch": 26.666666666666668,
      "grad_norm": 1.0456843376159668,
      "learning_rate": 1.5e-06,
      "loss": 0.672,
      "step": 300
    },
    {
      "epoch": 31.11111111111111,
      "grad_norm": 0.9518398642539978,
      "learning_rate": 1.75e-06,
      "loss": 0.6583,
      "step": 350
    },
    {
      "epoch": 35.55555555555556,
      "grad_norm": 1.305978775024414,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.6258,
      "step": 400
    },
    {
      "epoch": 40.0,
      "grad_norm": 0.8857623338699341,
      "learning_rate": 2.25e-06,
      "loss": 0.5835,
      "step": 450
    },
    {
      "epoch": 44.44444444444444,
      "grad_norm": 1.052182912826538,
      "learning_rate": 2.5e-06,
      "loss": 0.5539,
      "step": 500
    },
    {
      "epoch": 44.44444444444444,
      "eval_loss": 0.48496538400650024,
      "eval_runtime": 9.4574,
      "eval_samples_per_second": 16.812,
      "eval_steps_per_second": 2.115,
      "step": 500
    }
  ],
  "logging_steps": 50,
  "max_steps": 4000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 364,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.3753846024576416e+16,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}