mschwab commited on
Commit
6e7c9b3
1 Parent(s): 8609bea

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +16 -16
trainer_state.json CHANGED
@@ -34,49 +34,49 @@
34
  {
35
  "epoch": 0.52,
36
  "learning_rate": 3.705468102734051e-05,
37
- "loss": 0.0159,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 0.62,
42
  "learning_rate": 3.4465617232808616e-05,
43
- "loss": 0.0145,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.72,
48
  "learning_rate": 3.187655343827672e-05,
49
- "loss": 0.0107,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 0.83,
54
  "learning_rate": 2.9287489643744827e-05,
55
- "loss": 0.0085,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 0.93,
60
  "learning_rate": 2.6698425849212926e-05,
61
- "loss": 0.0067,
62
  "step": 4500
63
  },
64
  {
65
  "epoch": 1.04,
66
  "learning_rate": 2.4109362054681028e-05,
67
- "loss": 0.0068,
68
  "step": 5000
69
  },
70
  {
71
  "epoch": 1.14,
72
  "learning_rate": 2.152029826014913e-05,
73
- "loss": 0.0041,
74
  "step": 5500
75
  },
76
  {
77
  "epoch": 1.24,
78
  "learning_rate": 1.8931234465617236e-05,
79
- "loss": 0.0051,
80
  "step": 6000
81
  },
82
  {
@@ -88,45 +88,45 @@
88
  {
89
  "epoch": 1.45,
90
  "learning_rate": 1.375310687655344e-05,
91
- "loss": 0.0038,
92
  "step": 7000
93
  },
94
  {
95
  "epoch": 1.55,
96
  "learning_rate": 1.1164043082021542e-05,
97
- "loss": 0.0068,
98
  "step": 7500
99
  },
100
  {
101
  "epoch": 1.66,
102
  "learning_rate": 8.574979287489644e-06,
103
- "loss": 0.0028,
104
  "step": 8000
105
  },
106
  {
107
  "epoch": 1.76,
108
  "learning_rate": 5.9859154929577465e-06,
109
- "loss": 0.0036,
110
  "step": 8500
111
  },
112
  {
113
  "epoch": 1.86,
114
  "learning_rate": 3.396851698425849e-06,
115
- "loss": 0.001,
116
  "step": 9000
117
  },
118
  {
119
  "epoch": 1.97,
120
  "learning_rate": 8.07787903893952e-07,
121
- "loss": 0.005,
122
  "step": 9500
123
  },
124
  {
125
  "epoch": 2.0,
126
  "step": 9656,
127
  "total_flos": 2.570302678695936e+16,
128
- "train_runtime": 2777.1249,
129
- "train_samples_per_second": 3.477
130
  }
131
  ],
132
  "max_steps": 9656,
34
  {
35
  "epoch": 0.52,
36
  "learning_rate": 3.705468102734051e-05,
37
+ "loss": 0.0156,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 0.62,
42
  "learning_rate": 3.4465617232808616e-05,
43
+ "loss": 0.0137,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.72,
48
  "learning_rate": 3.187655343827672e-05,
49
+ "loss": 0.0089,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 0.83,
54
  "learning_rate": 2.9287489643744827e-05,
55
+ "loss": 0.0093,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 0.93,
60
  "learning_rate": 2.6698425849212926e-05,
61
+ "loss": 0.0068,
62
  "step": 4500
63
  },
64
  {
65
  "epoch": 1.04,
66
  "learning_rate": 2.4109362054681028e-05,
67
+ "loss": 0.0062,
68
  "step": 5000
69
  },
70
  {
71
  "epoch": 1.14,
72
  "learning_rate": 2.152029826014913e-05,
73
+ "loss": 0.0042,
74
  "step": 5500
75
  },
76
  {
77
  "epoch": 1.24,
78
  "learning_rate": 1.8931234465617236e-05,
79
+ "loss": 0.0043,
80
  "step": 6000
81
  },
82
  {
88
  {
89
  "epoch": 1.45,
90
  "learning_rate": 1.375310687655344e-05,
91
+ "loss": 0.0015,
92
  "step": 7000
93
  },
94
  {
95
  "epoch": 1.55,
96
  "learning_rate": 1.1164043082021542e-05,
97
+ "loss": 0.0046,
98
  "step": 7500
99
  },
100
  {
101
  "epoch": 1.66,
102
  "learning_rate": 8.574979287489644e-06,
103
+ "loss": 0.0025,
104
  "step": 8000
105
  },
106
  {
107
  "epoch": 1.76,
108
  "learning_rate": 5.9859154929577465e-06,
109
+ "loss": 0.0035,
110
  "step": 8500
111
  },
112
  {
113
  "epoch": 1.86,
114
  "learning_rate": 3.396851698425849e-06,
115
+ "loss": 0.0016,
116
  "step": 9000
117
  },
118
  {
119
  "epoch": 1.97,
120
  "learning_rate": 8.07787903893952e-07,
121
+ "loss": 0.0031,
122
  "step": 9500
123
  },
124
  {
125
  "epoch": 2.0,
126
  "step": 9656,
127
  "total_flos": 2.570302678695936e+16,
128
+ "train_runtime": 1563.26,
129
+ "train_samples_per_second": 6.177
130
  }
131
  ],
132
  "max_steps": 9656,