File size: 2,376 Bytes
bfb9871
 
 
1cb3104
bfb9871
1cb3104
bfb9871
 
 
 
 
 
bb346a2
bfb9871
 
 
 
 
 
 
bb346a2
 
 
bfb9871
 
 
 
bb346a2
bfb9871
 
 
 
 
 
bb346a2
bfb9871
bb346a2
bfb9871
 
 
 
bb346a2
 
 
bfb9871
 
1cb3104
 
 
bb346a2
1cb3104
bb346a2
1cb3104
 
 
 
bb346a2
1cb3104
bb346a2
1cb3104
 
 
 
bb346a2
1cb3104
bb346a2
1cb3104
 
 
 
bb346a2
 
 
1cb3104
 
bfb9871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cb3104
bfb9871
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.0006045492329781607,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00010075820549636011,
      "grad_norm": 8.359752932236722e+18,
      "learning_rate": 2e-05,
      "loss": 1.8654,
      "step": 1
    },
    {
      "epoch": 0.00010075820549636011,
      "eval_loss": 1.7779617309570312,
      "eval_runtime": 1454.9416,
      "eval_samples_per_second": 2.872,
      "eval_steps_per_second": 1.436,
      "step": 1
    },
    {
      "epoch": 0.00020151641099272022,
      "grad_norm": 2.626370165041791e+18,
      "learning_rate": 4e-05,
      "loss": 1.3359,
      "step": 2
    },
    {
      "epoch": 0.0003022746164890803,
      "grad_norm": 3.249743505089626e+18,
      "learning_rate": 6e-05,
      "loss": 1.3042,
      "step": 3
    },
    {
      "epoch": 0.0003022746164890803,
      "eval_loss": 1.7708849906921387,
      "eval_runtime": 1454.6249,
      "eval_samples_per_second": 2.873,
      "eval_steps_per_second": 1.437,
      "step": 3
    },
    {
      "epoch": 0.00040303282198544043,
      "grad_norm": 8.100253344674611e+18,
      "learning_rate": 8e-05,
      "loss": 1.5396,
      "step": 4
    },
    {
      "epoch": 0.0005037910274818005,
      "grad_norm": 4.612551334078448e+18,
      "learning_rate": 0.0001,
      "loss": 1.2117,
      "step": 5
    },
    {
      "epoch": 0.0006045492329781607,
      "grad_norm": 6.333141896013021e+18,
      "learning_rate": 0.00012,
      "loss": 1.6426,
      "step": 6
    },
    {
      "epoch": 0.0006045492329781607,
      "eval_loss": 1.7828969955444336,
      "eval_runtime": 1454.0873,
      "eval_samples_per_second": 2.874,
      "eval_steps_per_second": 1.437,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1235242526441472.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}