File size: 2,568 Bytes
893fbd2
 
 
5ee030d
893fbd2
5ee030d
893fbd2
 
 
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
cb636b5
5ee030d
 
 
893fbd2
 
 
5ee030d
 
 
 
 
 
107bea7
893fbd2
 
 
5ee030d
893fbd2
5ee030d
893fbd2
 
 
 
 
 
 
 
 
 
 
 
 
5ee030d
893fbd2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.9047619047619047,
  "eval_steps": 500,
  "global_step": 20,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.19047619047619047,
      "grad_norm": 2.075279951095581,
      "learning_rate": 0.00018,
      "loss": 9.5584,
      "step": 2
    },
    {
      "epoch": 0.38095238095238093,
      "grad_norm": 1.2216403484344482,
      "learning_rate": 0.00016,
      "loss": 9.2621,
      "step": 4
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 1.9454573392868042,
      "learning_rate": 0.00014,
      "loss": 8.8941,
      "step": 6
    },
    {
      "epoch": 0.7619047619047619,
      "grad_norm": 2.4250237941741943,
      "learning_rate": 0.00012,
      "loss": 8.4539,
      "step": 8
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 1.720076560974121,
      "learning_rate": 0.0001,
      "loss": 8.1194,
      "step": 10
    },
    {
      "epoch": 1.1428571428571428,
      "grad_norm": 1.4687391519546509,
      "learning_rate": 8e-05,
      "loss": 7.9072,
      "step": 12
    },
    {
      "epoch": 1.3333333333333333,
      "grad_norm": 1.1075403690338135,
      "learning_rate": 6e-05,
      "loss": 7.7305,
      "step": 14
    },
    {
      "epoch": 1.5238095238095237,
      "grad_norm": 1.532745361328125,
      "learning_rate": 4e-05,
      "loss": 7.6572,
      "step": 16
    },
    {
      "epoch": 1.7142857142857144,
      "grad_norm": 1.2055246829986572,
      "learning_rate": 2e-05,
      "loss": 7.6057,
      "step": 18
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 0.9867807030677795,
      "learning_rate": 0.0,
      "loss": 7.5695,
      "step": 20
    },
    {
      "epoch": 1.9047619047619047,
      "step": 20,
      "total_flos": 99814369025136.0,
      "train_loss": 8.27579402923584,
      "train_runtime": 84.8689,
      "train_samples_per_second": 3.959,
      "train_steps_per_second": 0.236
    }
  ],
  "logging_steps": 2,
  "max_steps": 20,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 99814369025136.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}