NiharGupte
commited on
Commit
•
947b7bb
1
Parent(s):
10a0643
Training in progress, epoch 1
Browse files- all_results.json +4 -4
- model.safetensors +1 -1
- runs/May04_08-04-48_4f22111e1b44/events.out.tfevents.1714809902.4f22111e1b44.9006.7 +3 -0
- train_results.json +4 -4
- trainer_state.json +34 -34
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -6,8 +6,8 @@
|
|
6 |
"eval_samples_per_second": 76.103,
|
7 |
"eval_steps_per_second": 2.393,
|
8 |
"total_flos": 1.477984078577664e+17,
|
9 |
-
"train_loss":
|
10 |
-
"train_runtime":
|
11 |
-
"train_samples_per_second": 44.
|
12 |
-
"train_steps_per_second": 0.
|
13 |
}
|
|
|
6 |
"eval_samples_per_second": 76.103,
|
7 |
"eval_steps_per_second": 2.393,
|
8 |
"total_flos": 1.477984078577664e+17,
|
9 |
+
"train_loss": 3.320157440986396e+22,
|
10 |
+
"train_runtime": 166.0473,
|
11 |
+
"train_samples_per_second": 44.686,
|
12 |
+
"train_steps_per_second": 0.331
|
13 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94302952
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a4224349105ff6362d0f6292f347ad3360f20520fe3d5180e17dcc776df27ef
|
3 |
size 94302952
|
runs/May04_08-04-48_4f22111e1b44/events.out.tfevents.1714809902.4f22111e1b44.9006.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f596106547d26c1d43480a7d5aac25f624c2ab7b1f1cb30b4d77065256c725cb
|
3 |
+
size 5991
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 4.680851063829787,
|
3 |
"total_flos": 1.477984078577664e+17,
|
4 |
-
"train_loss":
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 44.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 4.680851063829787,
|
3 |
"total_flos": 1.477984078577664e+17,
|
4 |
+
"train_loss": 3.320157440986396e+22,
|
5 |
+
"train_runtime": 166.0473,
|
6 |
+
"train_samples_per_second": 44.686,
|
7 |
+
"train_steps_per_second": 0.331
|
8 |
}
|
trainer_state.json
CHANGED
@@ -10,92 +10,92 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.851063829787234,
|
13 |
-
"grad_norm":
|
14 |
"learning_rate": 4.591836734693878e-05,
|
15 |
-
"loss":
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.9361702127659575,
|
20 |
"eval_accuracy": 0.4889937106918239,
|
21 |
-
"eval_loss":
|
22 |
-
"eval_runtime":
|
23 |
-
"eval_samples_per_second":
|
24 |
-
"eval_steps_per_second": 2.
|
25 |
"step": 11
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.702127659574468,
|
29 |
-
"grad_norm":
|
30 |
"learning_rate": 3.571428571428572e-05,
|
31 |
-
"loss":
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
"epoch": 1.9574468085106385,
|
36 |
"eval_accuracy": 0.4889937106918239,
|
37 |
-
"eval_loss":
|
38 |
-
"eval_runtime":
|
39 |
-
"eval_samples_per_second":
|
40 |
-
"eval_steps_per_second": 2.
|
41 |
"step": 23
|
42 |
},
|
43 |
{
|
44 |
"epoch": 2.5531914893617023,
|
45 |
-
"grad_norm":
|
46 |
"learning_rate": 2.5510204081632654e-05,
|
47 |
-
"loss":
|
48 |
"step": 30
|
49 |
},
|
50 |
{
|
51 |
"epoch": 2.978723404255319,
|
52 |
"eval_accuracy": 0.4889937106918239,
|
53 |
-
"eval_loss":
|
54 |
-
"eval_runtime": 8.
|
55 |
-
"eval_samples_per_second": 76.
|
56 |
-
"eval_steps_per_second": 2.
|
57 |
"step": 35
|
58 |
},
|
59 |
{
|
60 |
"epoch": 3.404255319148936,
|
61 |
-
"grad_norm":
|
62 |
"learning_rate": 1.5306122448979594e-05,
|
63 |
-
"loss":
|
64 |
"step": 40
|
65 |
},
|
66 |
{
|
67 |
"epoch": 4.0,
|
68 |
"eval_accuracy": 0.4889937106918239,
|
69 |
-
"eval_loss":
|
70 |
-
"eval_runtime":
|
71 |
-
"eval_samples_per_second":
|
72 |
-
"eval_steps_per_second":
|
73 |
"step": 47
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.25531914893617,
|
77 |
-
"grad_norm":
|
78 |
"learning_rate": 5.102040816326531e-06,
|
79 |
-
"loss":
|
80 |
"step": 50
|
81 |
},
|
82 |
{
|
83 |
"epoch": 4.680851063829787,
|
84 |
"eval_accuracy": 0.4889937106918239,
|
85 |
-
"eval_loss":
|
86 |
-
"eval_runtime":
|
87 |
-
"eval_samples_per_second":
|
88 |
-
"eval_steps_per_second": 2.
|
89 |
"step": 55
|
90 |
},
|
91 |
{
|
92 |
"epoch": 4.680851063829787,
|
93 |
"step": 55,
|
94 |
"total_flos": 1.477984078577664e+17,
|
95 |
-
"train_loss":
|
96 |
-
"train_runtime":
|
97 |
-
"train_samples_per_second": 44.
|
98 |
-
"train_steps_per_second": 0.
|
99 |
}
|
100 |
],
|
101 |
"logging_steps": 10,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.851063829787234,
|
13 |
+
"grad_norm": 492.8765869140625,
|
14 |
"learning_rate": 4.591836734693878e-05,
|
15 |
+
"loss": 3.371893679888266e+22,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.9361702127659575,
|
20 |
"eval_accuracy": 0.4889937106918239,
|
21 |
+
"eval_loss": 3.489738920977707e+22,
|
22 |
+
"eval_runtime": 7.7509,
|
23 |
+
"eval_samples_per_second": 82.055,
|
24 |
+
"eval_steps_per_second": 2.58,
|
25 |
"step": 11
|
26 |
},
|
27 |
{
|
28 |
"epoch": 1.702127659574468,
|
29 |
+
"grad_norm": 954.1488647460938,
|
30 |
"learning_rate": 3.571428571428572e-05,
|
31 |
+
"loss": 3.243846974994898e+22,
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
"epoch": 1.9574468085106385,
|
36 |
"eval_accuracy": 0.4889937106918239,
|
37 |
+
"eval_loss": 3.489738920977707e+22,
|
38 |
+
"eval_runtime": 8.4777,
|
39 |
+
"eval_samples_per_second": 75.021,
|
40 |
+
"eval_steps_per_second": 2.359,
|
41 |
"step": 23
|
42 |
},
|
43 |
{
|
44 |
"epoch": 2.5531914893617023,
|
45 |
+
"grad_norm": 704.236083984375,
|
46 |
"learning_rate": 2.5510204081632654e-05,
|
47 |
+
"loss": 3.336324610319264e+22,
|
48 |
"step": 30
|
49 |
},
|
50 |
{
|
51 |
"epoch": 2.978723404255319,
|
52 |
"eval_accuracy": 0.4889937106918239,
|
53 |
+
"eval_loss": 3.489738920977707e+22,
|
54 |
+
"eval_runtime": 8.3616,
|
55 |
+
"eval_samples_per_second": 76.062,
|
56 |
+
"eval_steps_per_second": 2.392,
|
57 |
"step": 35
|
58 |
},
|
59 |
{
|
60 |
"epoch": 3.404255319148936,
|
61 |
+
"grad_norm": 400.68768310546875,
|
62 |
"learning_rate": 1.5306122448979594e-05,
|
63 |
+
"loss": 3.295420756775664e+22,
|
64 |
"step": 40
|
65 |
},
|
66 |
{
|
67 |
"epoch": 4.0,
|
68 |
"eval_accuracy": 0.4889937106918239,
|
69 |
+
"eval_loss": 3.489738920977707e+22,
|
70 |
+
"eval_runtime": 8.378,
|
71 |
+
"eval_samples_per_second": 75.913,
|
72 |
+
"eval_steps_per_second": 2.387,
|
73 |
"step": 47
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.25531914893617,
|
77 |
+
"grad_norm": 1062.6368408203125,
|
78 |
"learning_rate": 5.102040816326531e-06,
|
79 |
+
"loss": 3.2794156842759295e+22,
|
80 |
"step": 50
|
81 |
},
|
82 |
{
|
83 |
"epoch": 4.680851063829787,
|
84 |
"eval_accuracy": 0.4889937106918239,
|
85 |
+
"eval_loss": 3.489738920977707e+22,
|
86 |
+
"eval_runtime": 8.4114,
|
87 |
+
"eval_samples_per_second": 75.612,
|
88 |
+
"eval_steps_per_second": 2.378,
|
89 |
"step": 55
|
90 |
},
|
91 |
{
|
92 |
"epoch": 4.680851063829787,
|
93 |
"step": 55,
|
94 |
"total_flos": 1.477984078577664e+17,
|
95 |
+
"train_loss": 3.320157440986396e+22,
|
96 |
+
"train_runtime": 166.0473,
|
97 |
+
"train_samples_per_second": 44.686,
|
98 |
+
"train_steps_per_second": 0.331
|
99 |
}
|
100 |
],
|
101 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24cfd8939d4759655d81188e8fcac714580650779c22533dfb13372c7b5c8ad0
|
3 |
size 5048
|