File size: 2,678 Bytes
e3b7b94
c665e99
 
 
e3b7b94
 
 
 
 
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
e3b7b94
c665e99
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
e3b7b94
 
 
c665e99
 
 
 
 
e3b7b94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c665e99
 
e3b7b94
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
  "best_metric": 0.6066474914550781,
  "best_model_checkpoint": "mikhail-panzo/zlm_b64_le5_s12000/checkpoint-500",
  "epoch": 0.4187604690117253,
  "eval_steps": 500,
  "global_step": 500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04187604690117253,
      "grad_norm": 4.788218975067139,
      "learning_rate": 2.4500000000000004e-07,
      "loss": 1.1819,
      "step": 50
    },
    {
      "epoch": 0.08375209380234507,
      "grad_norm": 15.618935585021973,
      "learning_rate": 4.95e-07,
      "loss": 1.0042,
      "step": 100
    },
    {
      "epoch": 0.12562814070351758,
      "grad_norm": 2.7821555137634277,
      "learning_rate": 7.4e-07,
      "loss": 0.9676,
      "step": 150
    },
    {
      "epoch": 0.16750418760469013,
      "grad_norm": 3.759263038635254,
      "learning_rate": 9.85e-07,
      "loss": 0.9362,
      "step": 200
    },
    {
      "epoch": 0.20938023450586266,
      "grad_norm": 3.906439781188965,
      "learning_rate": 1.235e-06,
      "loss": 0.8667,
      "step": 250
    },
    {
      "epoch": 0.25125628140703515,
      "grad_norm": 4.397643566131592,
      "learning_rate": 1.485e-06,
      "loss": 0.8455,
      "step": 300
    },
    {
      "epoch": 0.2931323283082077,
      "grad_norm": 2.94077467918396,
      "learning_rate": 1.7350000000000001e-06,
      "loss": 0.8376,
      "step": 350
    },
    {
      "epoch": 0.33500837520938026,
      "grad_norm": 4.161074161529541,
      "learning_rate": 1.985e-06,
      "loss": 0.7521,
      "step": 400
    },
    {
      "epoch": 0.3768844221105528,
      "grad_norm": 2.8088552951812744,
      "learning_rate": 2.235e-06,
      "loss": 0.7444,
      "step": 450
    },
    {
      "epoch": 0.4187604690117253,
      "grad_norm": 2.62283992767334,
      "learning_rate": 2.4850000000000003e-06,
      "loss": 0.7129,
      "step": 500
    },
    {
      "epoch": 0.4187604690117253,
      "eval_loss": 0.6066474914550781,
      "eval_runtime": 213.4911,
      "eval_samples_per_second": 39.763,
      "eval_steps_per_second": 4.974,
      "step": 500
    }
  ],
  "logging_steps": 50,
  "max_steps": 12000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 11,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4878617293034496.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}