File size: 1,903 Bytes
e5d5534
 
 
9c67982
e5d5534
 
 
 
 
 
 
9c67982
 
e5d5534
9c67982
e5d5534
 
 
9c67982
 
e5d5534
9c67982
e5d5534
 
 
9c67982
 
 
 
 
e5d5534
 
 
9c67982
 
e5d5534
9c67982
e5d5534
 
 
 
9c67982
 
 
 
 
e5d5534
 
9c67982
 
e5d5534
9c67982
e5d5534
 
 
9c67982
 
 
 
 
e5d5534
 
 
9c67982
e5d5534
 
9c67982
 
 
 
e5d5534
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.7272727272727275,
  "eval_steps": 500,
  "global_step": 15,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.18,
      "grad_norm": 8.112652061034474,
      "learning_rate": 1e-05,
      "loss": 1.4798,
      "step": 1
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.848398983756101,
      "learning_rate": 1.7485107481711014e-05,
      "loss": 1.2659,
      "step": 5
    },
    {
      "epoch": 0.91,
      "eval_loss": 1.2053821086883545,
      "eval_runtime": 1.8264,
      "eval_samples_per_second": 3.833,
      "eval_steps_per_second": 0.548,
      "step": 5
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6874796106077996,
      "learning_rate": 6.453951129574644e-06,
      "loss": 1.0094,
      "step": 10
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.1690208911895752,
      "eval_runtime": 1.8589,
      "eval_samples_per_second": 3.766,
      "eval_steps_per_second": 0.538,
      "step": 11
    },
    {
      "epoch": 2.73,
      "grad_norm": 1.7499037267304558,
      "learning_rate": 0.0,
      "loss": 0.8249,
      "step": 15
    },
    {
      "epoch": 2.73,
      "eval_loss": 1.1581600904464722,
      "eval_runtime": 1.8666,
      "eval_samples_per_second": 3.75,
      "eval_steps_per_second": 0.536,
      "step": 15
    },
    {
      "epoch": 2.73,
      "step": 15,
      "total_flos": 3088349921280.0,
      "train_loss": 1.0476287603378296,
      "train_runtime": 318.5273,
      "train_samples_per_second": 3.306,
      "train_steps_per_second": 0.047
    }
  ],
  "logging_steps": 5,
  "max_steps": 15,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 3088349921280.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}