Kerem P commited on
Commit
9e5bc07
1 Parent(s): 8b6ab0a

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -0
  2. all_results.json +14 -0
  3. eval_results.json +9 -0
  4. train_results.json +8 -0
  5. trainer_state.json +186 -0
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: microsoft/deberta-v3-large
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: opus-em-deberta-3-large-v2
8
  results: []
@@ -14,6 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
14
  # opus-em-deberta-3-large-v2
15
 
16
  This model is a fine-tuned version of [microsoft/deberta-v3-large](https://huggingface.co/microsoft/deberta-v3-large) on an unknown dataset.
 
 
 
17
 
18
  ## Model description
19
 
 
3
  base_model: microsoft/deberta-v3-large
4
  tags:
5
  - generated_from_trainer
6
+ metrics:
7
+ - f1
8
  model-index:
9
  - name: opus-em-deberta-3-large-v2
10
  results: []
 
16
  # opus-em-deberta-3-large-v2
17
 
18
  This model is a fine-tuned version of [microsoft/deberta-v3-large](https://huggingface.co/microsoft/deberta-v3-large) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 19.5558
21
+ - F1: 0.1942
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "eval_f1": 0.1941564561734213,
4
+ "eval_loss": 19.555837631225586,
5
+ "eval_runtime": 23.5491,
6
+ "eval_samples": 1916,
7
+ "eval_samples_per_second": 81.362,
8
+ "eval_steps_per_second": 1.274,
9
+ "train_loss": 6.7171177713120676,
10
+ "train_runtime": 1535.649,
11
+ "train_samples": 5743,
12
+ "train_samples_per_second": 11.219,
13
+ "train_steps_per_second": 0.35
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "eval_f1": 0.1941564561734213,
4
+ "eval_loss": 19.555837631225586,
5
+ "eval_runtime": 23.5491,
6
+ "eval_samples": 1916,
7
+ "eval_samples_per_second": 81.362,
8
+ "eval_steps_per_second": 1.274
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.99,
3
+ "train_loss": 6.7171177713120676,
4
+ "train_runtime": 1535.649,
5
+ "train_samples": 5743,
6
+ "train_samples_per_second": 11.219,
7
+ "train_steps_per_second": 0.35
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9916434540389973,
5
+ "eval_steps": 500,
6
+ "global_step": 537,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11,
13
+ "learning_rate": 2e-05,
14
+ "loss": 95.476,
15
+ "step": 20
16
+ },
17
+ {
18
+ "epoch": 0.22,
19
+ "learning_rate": 2e-05,
20
+ "loss": 48.4218,
21
+ "step": 40
22
+ },
23
+ {
24
+ "epoch": 0.33,
25
+ "learning_rate": 2e-05,
26
+ "loss": 13.7763,
27
+ "step": 60
28
+ },
29
+ {
30
+ "epoch": 0.45,
31
+ "learning_rate": 2e-05,
32
+ "loss": 1.648,
33
+ "step": 80
34
+ },
35
+ {
36
+ "epoch": 0.56,
37
+ "learning_rate": 2e-05,
38
+ "loss": 1.4463,
39
+ "step": 100
40
+ },
41
+ {
42
+ "epoch": 0.67,
43
+ "learning_rate": 2e-05,
44
+ "loss": 0.9103,
45
+ "step": 120
46
+ },
47
+ {
48
+ "epoch": 0.78,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6693,
51
+ "step": 140
52
+ },
53
+ {
54
+ "epoch": 0.89,
55
+ "learning_rate": 2e-05,
56
+ "loss": 0.8188,
57
+ "step": 160
58
+ },
59
+ {
60
+ "epoch": 1.0,
61
+ "learning_rate": 2e-05,
62
+ "loss": 0.7446,
63
+ "step": 180
64
+ },
65
+ {
66
+ "epoch": 1.11,
67
+ "learning_rate": 2e-05,
68
+ "loss": 0.6158,
69
+ "step": 200
70
+ },
71
+ {
72
+ "epoch": 1.23,
73
+ "learning_rate": 2e-05,
74
+ "loss": 0.9035,
75
+ "step": 220
76
+ },
77
+ {
78
+ "epoch": 1.34,
79
+ "learning_rate": 2e-05,
80
+ "loss": 0.9486,
81
+ "step": 240
82
+ },
83
+ {
84
+ "epoch": 1.45,
85
+ "learning_rate": 2e-05,
86
+ "loss": 0.7198,
87
+ "step": 260
88
+ },
89
+ {
90
+ "epoch": 1.56,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.5337,
93
+ "step": 280
94
+ },
95
+ {
96
+ "epoch": 1.67,
97
+ "learning_rate": 2e-05,
98
+ "loss": 1.3312,
99
+ "step": 300
100
+ },
101
+ {
102
+ "epoch": 1.78,
103
+ "learning_rate": 2e-05,
104
+ "loss": 0.8632,
105
+ "step": 320
106
+ },
107
+ {
108
+ "epoch": 1.89,
109
+ "learning_rate": 2e-05,
110
+ "loss": 0.8279,
111
+ "step": 340
112
+ },
113
+ {
114
+ "epoch": 2.01,
115
+ "learning_rate": 2e-05,
116
+ "loss": 0.9823,
117
+ "step": 360
118
+ },
119
+ {
120
+ "epoch": 2.12,
121
+ "learning_rate": 2e-05,
122
+ "loss": 0.7963,
123
+ "step": 380
124
+ },
125
+ {
126
+ "epoch": 2.23,
127
+ "learning_rate": 2e-05,
128
+ "loss": 0.7861,
129
+ "step": 400
130
+ },
131
+ {
132
+ "epoch": 2.34,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9859,
135
+ "step": 420
136
+ },
137
+ {
138
+ "epoch": 2.45,
139
+ "learning_rate": 2e-05,
140
+ "loss": 1.0639,
141
+ "step": 440
142
+ },
143
+ {
144
+ "epoch": 2.56,
145
+ "learning_rate": 2e-05,
146
+ "loss": 0.7174,
147
+ "step": 460
148
+ },
149
+ {
150
+ "epoch": 2.67,
151
+ "learning_rate": 2e-05,
152
+ "loss": 0.7996,
153
+ "step": 480
154
+ },
155
+ {
156
+ "epoch": 2.79,
157
+ "learning_rate": 2e-05,
158
+ "loss": 0.7602,
159
+ "step": 500
160
+ },
161
+ {
162
+ "epoch": 2.9,
163
+ "learning_rate": 2e-05,
164
+ "loss": 0.8271,
165
+ "step": 520
166
+ },
167
+ {
168
+ "epoch": 2.99,
169
+ "step": 537,
170
+ "total_flos": 1345715322224640.0,
171
+ "train_loss": 6.7171177713120676,
172
+ "train_runtime": 1535.649,
173
+ "train_samples_per_second": 11.219,
174
+ "train_steps_per_second": 0.35
175
+ }
176
+ ],
177
+ "logging_steps": 20,
178
+ "max_steps": 537,
179
+ "num_input_tokens_seen": 0,
180
+ "num_train_epochs": 3,
181
+ "save_steps": 500,
182
+ "total_flos": 1345715322224640.0,
183
+ "train_batch_size": 2,
184
+ "trial_name": null,
185
+ "trial_params": null
186
+ }