File size: 2,440 Bytes
68c0706
 
 
d7eb129
 
 
68c0706
 
 
 
 
d7eb129
68c0706
 
d7eb129
68c0706
 
 
d7eb129
 
 
 
 
68c0706
 
 
d7eb129
 
68c0706
d7eb129
68c0706
 
 
d7eb129
 
 
 
 
 
c537bc2
 
d7eb129
 
 
 
25946e5
16c8fbd
 
d7eb129
 
16c8fbd
d7eb129
16c8fbd
 
 
d7eb129
 
 
 
 
 
 
 
 
 
16c8fbd
d7eb129
16c8fbd
 
 
d7eb129
 
16c8fbd
d7eb129
16c8fbd
 
 
d7eb129
 
 
 
 
16c8fbd
68c0706
 
 
d7eb129
68c0706
 
d7eb129
68c0706
 
 
 
 
 
 
d7eb129
68c0706
 
 
 
d7eb129
68c0706
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 2,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.16666666666666666,
      "grad_norm": NaN,
      "learning_rate": 0.0,
      "loss": 1.3997,
      "step": 1
    },
    {
      "epoch": 0.16666666666666666,
      "eval_loss": 1.4973297119140625,
      "eval_runtime": 0.2886,
      "eval_samples_per_second": 10.395,
      "eval_steps_per_second": 6.93,
      "step": 1
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 5.478867530822754,
      "learning_rate": 2e-05,
      "loss": 1.4535,
      "step": 2
    },
    {
      "epoch": 0.3333333333333333,
      "eval_loss": 1.4973297119140625,
      "eval_runtime": 0.284,
      "eval_samples_per_second": 10.565,
      "eval_steps_per_second": 7.043,
      "step": 2
    },
    {
      "epoch": 0.5,
      "grad_norm": 5.20695686340332,
      "learning_rate": 4e-05,
      "loss": 1.5303,
      "step": 3
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 4.481665134429932,
      "learning_rate": 6e-05,
      "loss": 1.394,
      "step": 4
    },
    {
      "epoch": 0.6666666666666666,
      "eval_loss": 1.2689329385757446,
      "eval_runtime": 0.3021,
      "eval_samples_per_second": 9.93,
      "eval_steps_per_second": 6.62,
      "step": 4
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 3.2430155277252197,
      "learning_rate": 8e-05,
      "loss": 1.2373,
      "step": 5
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6539690494537354,
      "learning_rate": 0.0001,
      "loss": 1.0206,
      "step": 6
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.7840422987937927,
      "eval_runtime": 0.3266,
      "eval_samples_per_second": 9.185,
      "eval_steps_per_second": 6.123,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 6,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 2,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2219477097775104.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}