File size: 2,276 Bytes
a3bf387
 
 
 
 
 
 
 
 
 
 
 
55cf540
a3bf387
 
 
 
 
 
55cf540
a3bf387
 
 
 
 
 
55cf540
a3bf387
55cf540
a3bf387
 
 
 
55cf540
a3bf387
 
 
 
 
 
55cf540
a3bf387
55cf540
a3bf387
 
 
 
55cf540
a3bf387
55cf540
a3bf387
 
 
 
55cf540
a3bf387
55cf540
a3bf387
 
 
 
55cf540
a3bf387
55cf540
a3bf387
 
 
 
 
 
55cf540
 
 
a3bf387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.764705882352941,
  "eval_steps": 25,
  "global_step": 8,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 1.1788614988327026,
      "learning_rate": 0.000175,
      "loss": 9.5575,
      "step": 1
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 1.1705650091171265,
      "learning_rate": 0.00015000000000000001,
      "loss": 9.4221,
      "step": 2
    },
    {
      "epoch": 1.4117647058823528,
      "grad_norm": 1.1544705629348755,
      "learning_rate": 0.000125,
      "loss": 9.2271,
      "step": 3
    },
    {
      "epoch": 1.8823529411764706,
      "grad_norm": 1.3187427520751953,
      "learning_rate": 0.0001,
      "loss": 8.9961,
      "step": 4
    },
    {
      "epoch": 2.3529411764705883,
      "grad_norm": 1.6399372816085815,
      "learning_rate": 7.500000000000001e-05,
      "loss": 8.8138,
      "step": 5
    },
    {
      "epoch": 2.8235294117647056,
      "grad_norm": 1.7734088897705078,
      "learning_rate": 5e-05,
      "loss": 8.6993,
      "step": 6
    },
    {
      "epoch": 3.2941176470588234,
      "grad_norm": 2.155649423599243,
      "learning_rate": 2.5e-05,
      "loss": 8.4984,
      "step": 7
    },
    {
      "epoch": 3.764705882352941,
      "grad_norm": 2.2283499240875244,
      "learning_rate": 0.0,
      "loss": 8.4289,
      "step": 8
    },
    {
      "epoch": 3.764705882352941,
      "step": 8,
      "total_flos": 3464493556500.0,
      "train_loss": 8.95539665222168,
      "train_runtime": 43.773,
      "train_samples_per_second": 3.016,
      "train_steps_per_second": 0.183
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 8,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3464493556500.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}