File size: 3,025 Bytes
0c3eda2
 
 
ae00ba9
0c3eda2
5fdc735
0c3eda2
 
 
 
 
ae00ba9
 
0c3eda2
ae00ba9
0c3eda2
 
 
ae00ba9
 
 
 
 
0c3eda2
 
 
ae00ba9
 
0c3eda2
ae00ba9
0c3eda2
 
 
ae00ba9
 
0c3eda2
ae00ba9
0c3eda2
 
 
ae00ba9
 
 
 
 
0c3eda2
dd06f81
 
ae00ba9
 
dd06f81
ae00ba9
dd06f81
 
 
ae00ba9
 
dd06f81
ae00ba9
dd06f81
 
 
ae00ba9
 
dd06f81
ae00ba9
dd06f81
 
 
ae00ba9
 
 
 
 
dd06f81
5fdc735
 
ae00ba9
 
5fdc735
ae00ba9
5fdc735
 
 
ae00ba9
 
5fdc735
ae00ba9
5fdc735
 
 
ae00ba9
 
5fdc735
ae00ba9
5fdc735
 
 
ae00ba9
 
 
 
 
5fdc735
0c3eda2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae00ba9
0c3eda2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.04864864864864865,
  "eval_steps": 3,
  "global_step": 9,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.005405405405405406,
      "grad_norm": 0.2000586837530136,
      "learning_rate": 2e-05,
      "loss": 2.3204,
      "step": 1
    },
    {
      "epoch": 0.005405405405405406,
      "eval_loss": 2.2008719444274902,
      "eval_runtime": 9.7979,
      "eval_samples_per_second": 7.961,
      "eval_steps_per_second": 3.98,
      "step": 1
    },
    {
      "epoch": 0.010810810810810811,
      "grad_norm": 0.17591507732868195,
      "learning_rate": 4e-05,
      "loss": 2.6394,
      "step": 2
    },
    {
      "epoch": 0.016216216216216217,
      "grad_norm": 0.19049879908561707,
      "learning_rate": 6e-05,
      "loss": 2.2241,
      "step": 3
    },
    {
      "epoch": 0.016216216216216217,
      "eval_loss": 2.1992404460906982,
      "eval_runtime": 9.8906,
      "eval_samples_per_second": 7.886,
      "eval_steps_per_second": 3.943,
      "step": 3
    },
    {
      "epoch": 0.021621621621621623,
      "grad_norm": 0.21317099034786224,
      "learning_rate": 8e-05,
      "loss": 2.0634,
      "step": 4
    },
    {
      "epoch": 0.02702702702702703,
      "grad_norm": 0.16591224074363708,
      "learning_rate": 0.0001,
      "loss": 2.2234,
      "step": 5
    },
    {
      "epoch": 0.032432432432432434,
      "grad_norm": 0.22719058394432068,
      "learning_rate": 0.00012,
      "loss": 2.4093,
      "step": 6
    },
    {
      "epoch": 0.032432432432432434,
      "eval_loss": 2.1915857791900635,
      "eval_runtime": 9.9284,
      "eval_samples_per_second": 7.856,
      "eval_steps_per_second": 3.928,
      "step": 6
    },
    {
      "epoch": 0.03783783783783784,
      "grad_norm": 0.24013416469097137,
      "learning_rate": 0.00014,
      "loss": 2.034,
      "step": 7
    },
    {
      "epoch": 0.043243243243243246,
      "grad_norm": 0.19061008095741272,
      "learning_rate": 0.00016,
      "loss": 2.0807,
      "step": 8
    },
    {
      "epoch": 0.04864864864864865,
      "grad_norm": 0.22366337478160858,
      "learning_rate": 0.00018,
      "loss": 2.2467,
      "step": 9
    },
    {
      "epoch": 0.04864864864864865,
      "eval_loss": 2.179177761077881,
      "eval_runtime": 9.9529,
      "eval_samples_per_second": 7.837,
      "eval_steps_per_second": 3.918,
      "step": 9
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 6057323004952576.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}