File size: 3,025 Bytes
0c3eda2
 
 
5fdc735
0c3eda2
5fdc735
0c3eda2
 
 
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
 
 
 
0c3eda2
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
0c3eda2
bb017bb
0c3eda2
 
 
bb017bb
 
 
 
 
0c3eda2
dd06f81
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
dd06f81
bb017bb
dd06f81
 
 
bb017bb
 
 
 
 
dd06f81
5fdc735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c3eda2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fdc735
0c3eda2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.024324324324324326,
  "eval_steps": 3,
  "global_step": 9,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002702702702702703,
      "grad_norm": 0.22516648471355438,
      "learning_rate": 2e-05,
      "loss": 2.4235,
      "step": 1
    },
    {
      "epoch": 0.002702702702702703,
      "eval_loss": 2.200819492340088,
      "eval_runtime": 8.0954,
      "eval_samples_per_second": 9.635,
      "eval_steps_per_second": 9.635,
      "step": 1
    },
    {
      "epoch": 0.005405405405405406,
      "grad_norm": 0.2626628577709198,
      "learning_rate": 4e-05,
      "loss": 2.2166,
      "step": 2
    },
    {
      "epoch": 0.008108108108108109,
      "grad_norm": 0.21446576714515686,
      "learning_rate": 6e-05,
      "loss": 2.6546,
      "step": 3
    },
    {
      "epoch": 0.008108108108108109,
      "eval_loss": 2.199615955352783,
      "eval_runtime": 8.2871,
      "eval_samples_per_second": 9.412,
      "eval_steps_per_second": 9.412,
      "step": 3
    },
    {
      "epoch": 0.010810810810810811,
      "grad_norm": 0.2510293424129486,
      "learning_rate": 8e-05,
      "loss": 2.6251,
      "step": 4
    },
    {
      "epoch": 0.013513513513513514,
      "grad_norm": 0.2600695788860321,
      "learning_rate": 0.0001,
      "loss": 2.0934,
      "step": 5
    },
    {
      "epoch": 0.016216216216216217,
      "grad_norm": 0.23503589630126953,
      "learning_rate": 0.00012,
      "loss": 2.3495,
      "step": 6
    },
    {
      "epoch": 0.016216216216216217,
      "eval_loss": 2.193514585494995,
      "eval_runtime": 8.4696,
      "eval_samples_per_second": 9.209,
      "eval_steps_per_second": 9.209,
      "step": 6
    },
    {
      "epoch": 0.01891891891891892,
      "grad_norm": 0.23834799230098724,
      "learning_rate": 0.00014,
      "loss": 2.0365,
      "step": 7
    },
    {
      "epoch": 0.021621621621621623,
      "grad_norm": 0.34777796268463135,
      "learning_rate": 0.00016,
      "loss": 2.0777,
      "step": 8
    },
    {
      "epoch": 0.024324324324324326,
      "grad_norm": 0.2863074839115143,
      "learning_rate": 0.00018,
      "loss": 2.3295,
      "step": 9
    },
    {
      "epoch": 0.024324324324324326,
      "eval_loss": 2.1840600967407227,
      "eval_runtime": 8.339,
      "eval_samples_per_second": 9.354,
      "eval_steps_per_second": 9.354,
      "step": 9
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3098020115644416.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}