File size: 2,687 Bytes
8e7fbb8
c2686ab
8e7fbb8
c2686ab
8e7fbb8
 
 
 
 
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
8e7fbb8
c2686ab
8e7fbb8
 
 
c2686ab
 
 
 
 
8e7fbb8
 
 
 
 
 
c2686ab
8e7fbb8
 
 
 
 
 
 
 
 
 
 
 
 
c2686ab
 
8e7fbb8
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
  "best_metric": 0.42528408765792847,
  "best_model_checkpoint": "mikhail-panzo/zlm-fil-ceb_b64_le5_s8000/checkpoint-500",
  "epoch": 19.801980198019802,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.9801980198019802,
      "grad_norm": 1.2683665752410889,
      "learning_rate": 2.5000000000000004e-07,
      "loss": 0.4932,
      "step": 50
    },
    {
      "epoch": 3.9603960396039604,
      "grad_norm": 1.1266463994979858,
      "learning_rate": 5.000000000000001e-07,
      "loss": 0.4898,
      "step": 100
    },
    {
      "epoch": 5.9405940594059405,
      "grad_norm": 1.1800968647003174,
      "learning_rate": 7.5e-07,
      "loss": 0.4839,
      "step": 150
    },
    {
      "epoch": 7.920792079207921,
      "grad_norm": 0.8235568404197693,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 0.4785,
      "step": 200
    },
    {
      "epoch": 9.900990099009901,
      "grad_norm": 1.313211441040039,
      "learning_rate": 1.25e-06,
      "loss": 0.4767,
      "step": 250
    },
    {
      "epoch": 11.881188118811881,
      "grad_norm": 0.7831560373306274,
      "learning_rate": 1.5e-06,
      "loss": 0.4681,
      "step": 300
    },
    {
      "epoch": 13.861386138613861,
      "grad_norm": 0.7987237572669983,
      "learning_rate": 1.75e-06,
      "loss": 0.4658,
      "step": 350
    },
    {
      "epoch": 15.841584158415841,
      "grad_norm": 0.7143813371658325,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.4627,
      "step": 400
    },
    {
      "epoch": 17.821782178217823,
      "grad_norm": 0.8037531971931458,
      "learning_rate": 2.25e-06,
      "loss": 0.461,
      "step": 450
    },
    {
      "epoch": 19.801980198019802,
      "grad_norm": 0.79031902551651,
      "learning_rate": 2.5e-06,
      "loss": 0.4592,
      "step": 500
    },
    {
      "epoch": 19.801980198019802,
      "eval_loss": 0.42528408765792847,
      "eval_runtime": 7.8982,
      "eval_samples_per_second": 22.79,
      "eval_steps_per_second": 2.912,
      "step": 500
    }
  ],
  "logging_steps": 50,
  "max_steps": 8000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 320,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5396569205689728.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}