NiharGupte commited on
Commit
947b7bb
1 Parent(s): 10a0643

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -6,8 +6,8 @@
6
  "eval_samples_per_second": 76.103,
7
  "eval_steps_per_second": 2.393,
8
  "total_flos": 1.477984078577664e+17,
9
- "train_loss": 0.0,
10
- "train_runtime": 167.7096,
11
- "train_samples_per_second": 44.243,
12
- "train_steps_per_second": 0.328
13
  }
 
6
  "eval_samples_per_second": 76.103,
7
  "eval_steps_per_second": 2.393,
8
  "total_flos": 1.477984078577664e+17,
9
+ "train_loss": 3.320157440986396e+22,
10
+ "train_runtime": 166.0473,
11
+ "train_samples_per_second": 44.686,
12
+ "train_steps_per_second": 0.331
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83218fc677ef895342b4874306a47dd0b316a563b91ba4c24f45fc17c74d9e8e
3
  size 94302952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4224349105ff6362d0f6292f347ad3360f20520fe3d5180e17dcc776df27ef
3
  size 94302952
runs/May04_08-04-48_4f22111e1b44/events.out.tfevents.1714809902.4f22111e1b44.9006.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f596106547d26c1d43480a7d5aac25f624c2ab7b1f1cb30b4d77065256c725cb
3
+ size 5991
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.680851063829787,
3
  "total_flos": 1.477984078577664e+17,
4
- "train_loss": 0.0,
5
- "train_runtime": 167.7096,
6
- "train_samples_per_second": 44.243,
7
- "train_steps_per_second": 0.328
8
  }
 
1
  {
2
  "epoch": 4.680851063829787,
3
  "total_flos": 1.477984078577664e+17,
4
+ "train_loss": 3.320157440986396e+22,
5
+ "train_runtime": 166.0473,
6
+ "train_samples_per_second": 44.686,
7
+ "train_steps_per_second": 0.331
8
  }
trainer_state.json CHANGED
@@ -10,92 +10,92 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.851063829787234,
13
- "grad_norm": NaN,
14
  "learning_rate": 4.591836734693878e-05,
15
- "loss": 0.0,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9361702127659575,
20
  "eval_accuracy": 0.4889937106918239,
21
- "eval_loss": NaN,
22
- "eval_runtime": 8.3443,
23
- "eval_samples_per_second": 76.22,
24
- "eval_steps_per_second": 2.397,
25
  "step": 11
26
  },
27
  {
28
  "epoch": 1.702127659574468,
29
- "grad_norm": NaN,
30
  "learning_rate": 3.571428571428572e-05,
31
- "loss": 0.0,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.9574468085106385,
36
  "eval_accuracy": 0.4889937106918239,
37
- "eval_loss": NaN,
38
- "eval_runtime": 7.5463,
39
- "eval_samples_per_second": 84.28,
40
- "eval_steps_per_second": 2.65,
41
  "step": 23
42
  },
43
  {
44
  "epoch": 2.5531914893617023,
45
- "grad_norm": NaN,
46
  "learning_rate": 2.5510204081632654e-05,
47
- "loss": 0.0,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 2.978723404255319,
52
  "eval_accuracy": 0.4889937106918239,
53
- "eval_loss": NaN,
54
- "eval_runtime": 8.3005,
55
- "eval_samples_per_second": 76.621,
56
- "eval_steps_per_second": 2.409,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 3.404255319148936,
61
- "grad_norm": NaN,
62
  "learning_rate": 1.5306122448979594e-05,
63
- "loss": 0.0,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
  "eval_accuracy": 0.4889937106918239,
69
- "eval_loss": NaN,
70
- "eval_runtime": 14.6795,
71
- "eval_samples_per_second": 43.326,
72
- "eval_steps_per_second": 1.362,
73
  "step": 47
74
  },
75
  {
76
  "epoch": 4.25531914893617,
77
- "grad_norm": NaN,
78
  "learning_rate": 5.102040816326531e-06,
79
- "loss": 0.0,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 4.680851063829787,
84
  "eval_accuracy": 0.4889937106918239,
85
- "eval_loss": NaN,
86
- "eval_runtime": 7.5742,
87
- "eval_samples_per_second": 83.969,
88
- "eval_steps_per_second": 2.641,
89
  "step": 55
90
  },
91
  {
92
  "epoch": 4.680851063829787,
93
  "step": 55,
94
  "total_flos": 1.477984078577664e+17,
95
- "train_loss": 0.0,
96
- "train_runtime": 167.7096,
97
- "train_samples_per_second": 44.243,
98
- "train_steps_per_second": 0.328
99
  }
100
  ],
101
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.851063829787234,
13
+ "grad_norm": 492.8765869140625,
14
  "learning_rate": 4.591836734693878e-05,
15
+ "loss": 3.371893679888266e+22,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9361702127659575,
20
  "eval_accuracy": 0.4889937106918239,
21
+ "eval_loss": 3.489738920977707e+22,
22
+ "eval_runtime": 7.7509,
23
+ "eval_samples_per_second": 82.055,
24
+ "eval_steps_per_second": 2.58,
25
  "step": 11
26
  },
27
  {
28
  "epoch": 1.702127659574468,
29
+ "grad_norm": 954.1488647460938,
30
  "learning_rate": 3.571428571428572e-05,
31
+ "loss": 3.243846974994898e+22,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.9574468085106385,
36
  "eval_accuracy": 0.4889937106918239,
37
+ "eval_loss": 3.489738920977707e+22,
38
+ "eval_runtime": 8.4777,
39
+ "eval_samples_per_second": 75.021,
40
+ "eval_steps_per_second": 2.359,
41
  "step": 23
42
  },
43
  {
44
  "epoch": 2.5531914893617023,
45
+ "grad_norm": 704.236083984375,
46
  "learning_rate": 2.5510204081632654e-05,
47
+ "loss": 3.336324610319264e+22,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 2.978723404255319,
52
  "eval_accuracy": 0.4889937106918239,
53
+ "eval_loss": 3.489738920977707e+22,
54
+ "eval_runtime": 8.3616,
55
+ "eval_samples_per_second": 76.062,
56
+ "eval_steps_per_second": 2.392,
57
  "step": 35
58
  },
59
  {
60
  "epoch": 3.404255319148936,
61
+ "grad_norm": 400.68768310546875,
62
  "learning_rate": 1.5306122448979594e-05,
63
+ "loss": 3.295420756775664e+22,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
  "eval_accuracy": 0.4889937106918239,
69
+ "eval_loss": 3.489738920977707e+22,
70
+ "eval_runtime": 8.378,
71
+ "eval_samples_per_second": 75.913,
72
+ "eval_steps_per_second": 2.387,
73
  "step": 47
74
  },
75
  {
76
  "epoch": 4.25531914893617,
77
+ "grad_norm": 1062.6368408203125,
78
  "learning_rate": 5.102040816326531e-06,
79
+ "loss": 3.2794156842759295e+22,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 4.680851063829787,
84
  "eval_accuracy": 0.4889937106918239,
85
+ "eval_loss": 3.489738920977707e+22,
86
+ "eval_runtime": 8.4114,
87
+ "eval_samples_per_second": 75.612,
88
+ "eval_steps_per_second": 2.378,
89
  "step": 55
90
  },
91
  {
92
  "epoch": 4.680851063829787,
93
  "step": 55,
94
  "total_flos": 1.477984078577664e+17,
95
+ "train_loss": 3.320157440986396e+22,
96
+ "train_runtime": 166.0473,
97
+ "train_samples_per_second": 44.686,
98
+ "train_steps_per_second": 0.331
99
  }
100
  ],
101
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f07845e5a4f177d4d37279be2bdadd143d53be652dea45ac33053323beb4cc
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cfd8939d4759655d81188e8fcac714580650779c22533dfb13372c7b5c8ad0
3
  size 5048