File size: 1,642 Bytes
345eb64
 
 
5c94a7f
 
345eb64
 
 
 
 
957cd32
5c94a7f
 
957cd32
 
 
d9cf529
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
 
 
5c94a7f
d9cf529
 
 
5c94a7f
 
 
 
 
 
345eb64
 
 
5c94a7f
 
 
345eb64
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 66.66666666666667,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 6.67,
      "learning_rate": 2e-05,
      "loss": 1.9669,
      "step": 10
    },
    {
      "epoch": 13.33,
      "learning_rate": 2e-05,
      "loss": 0.9447,
      "step": 20
    },
    {
      "epoch": 20.0,
      "learning_rate": 2e-05,
      "loss": 0.1908,
      "step": 30
    },
    {
      "epoch": 26.67,
      "learning_rate": 2e-05,
      "loss": 0.0666,
      "step": 40
    },
    {
      "epoch": 33.33,
      "learning_rate": 2e-05,
      "loss": 0.0441,
      "step": 50
    },
    {
      "epoch": 40.0,
      "learning_rate": 2e-05,
      "loss": 0.0329,
      "step": 60
    },
    {
      "epoch": 46.67,
      "learning_rate": 2e-05,
      "loss": 0.0251,
      "step": 70
    },
    {
      "epoch": 53.33,
      "learning_rate": 2e-05,
      "loss": 0.019,
      "step": 80
    },
    {
      "epoch": 60.0,
      "learning_rate": 2e-05,
      "loss": 0.0166,
      "step": 90
    },
    {
      "epoch": 66.67,
      "learning_rate": 2e-05,
      "loss": 0.0133,
      "step": 100
    },
    {
      "epoch": 66.67,
      "step": 100,
      "total_flos": 42050959441920.0,
      "train_loss": 0.3319866207242012,
      "train_runtime": 4269.4137,
      "train_samples_per_second": 2.998,
      "train_steps_per_second": 0.023
    }
  ],
  "max_steps": 100,
  "num_train_epochs": 100,
  "total_flos": 42050959441920.0,
  "trial_name": null,
  "trial_params": null
}