Shresthadev403 commited on
Commit
b7d81e6
1 Parent(s): 249e70a

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,14 +13,14 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # food-recipe-generation
13
 
14
- This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.1335
17
- - eval_runtime: 18.0469
18
- - eval_samples_per_second: 110.823
19
- - eval_steps_per_second: 1.773
20
- - epoch: 0.02
21
- - step: 65
22
 
23
  ## Model description
24
 
@@ -43,7 +45,7 @@ The following hyperparameters were used during training:
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 0.1
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: gpt2
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # food-recipe-generation
15
 
16
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - eval_loss: 1.0114
19
+ - eval_runtime: 2006.5534
20
+ - eval_samples_per_second: 111.193
21
+ - eval_steps_per_second: 1.738
22
+ - epoch: 0.8
23
+ - step: 50000
24
 
25
  ## Model description
26
 
 
45
  - seed: 42
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
+ - num_epochs: 500
49
  - mixed_precision_training: Native AMP
50
 
51
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/kaggle/working/food-recipe-generation",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
logs/events.out.tfevents.1702895757.113cdbd75979.26.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8c3aceee3c5b9421ddfa93c338a825b3f2aa716f95c06b0b3874ec3aed88b4
3
+ size 5037
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e61e3bd1c996a2773a820b93fa6954cc5f60e51780cc5f1901127d1f3122031
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec1b9b7354ba4a77850663e7816cf56efa720d5b9a816282d4900fdf2143a6c
3
  size 497918592
trainer_state.json CHANGED
@@ -1,202 +1,34 @@
1
  {
2
- "best_metric": 1.124158501625061,
3
- "best_model_checkpoint": "food-recipe-generation/checkpoint-45",
4
- "epoch": 0.021221025138752857,
5
- "eval_steps": 5,
6
- "global_step": 65,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "learning_rate": 4.9185667752443e-05,
14
- "loss": 1.1526,
15
- "step": 5
16
  },
17
  {
18
- "epoch": 0.0,
19
- "eval_loss": 1.142299771308899,
20
- "eval_runtime": 18.2229,
21
- "eval_samples_per_second": 109.752,
22
- "eval_steps_per_second": 1.756,
23
- "step": 5
24
- },
25
- {
26
- "epoch": 0.0,
27
- "learning_rate": 4.8371335504885994e-05,
28
- "loss": 1.2427,
29
- "step": 10
30
- },
31
- {
32
- "epoch": 0.0,
33
- "eval_loss": 1.1404472589492798,
34
- "eval_runtime": 17.9113,
35
- "eval_samples_per_second": 111.662,
36
- "eval_steps_per_second": 1.787,
37
- "step": 10
38
- },
39
- {
40
- "epoch": 0.0,
41
- "learning_rate": 4.9185667752443e-05,
42
- "loss": 1.1104,
43
- "step": 15
44
- },
45
- {
46
- "epoch": 0.0,
47
- "eval_loss": 1.1525651216506958,
48
- "eval_runtime": 18.2243,
49
- "eval_samples_per_second": 109.744,
50
- "eval_steps_per_second": 1.756,
51
- "step": 15
52
- },
53
- {
54
- "epoch": 0.01,
55
- "learning_rate": 4.8371335504885994e-05,
56
- "loss": 1.0834,
57
- "step": 20
58
- },
59
- {
60
- "epoch": 0.01,
61
- "eval_loss": 1.152503490447998,
62
- "eval_runtime": 17.9261,
63
- "eval_samples_per_second": 111.569,
64
- "eval_steps_per_second": 1.785,
65
- "step": 20
66
- },
67
- {
68
- "epoch": 0.01,
69
- "learning_rate": 4.9185667752443e-05,
70
- "loss": 1.1478,
71
- "step": 25
72
- },
73
- {
74
- "epoch": 0.01,
75
- "eval_loss": 1.1247614622116089,
76
- "eval_runtime": 18.0737,
77
- "eval_samples_per_second": 110.658,
78
- "eval_steps_per_second": 1.771,
79
- "step": 25
80
- },
81
- {
82
- "epoch": 0.01,
83
- "learning_rate": 4.8371335504885994e-05,
84
- "loss": 1.1589,
85
- "step": 30
86
- },
87
- {
88
- "epoch": 0.01,
89
- "eval_loss": 1.1252552270889282,
90
- "eval_runtime": 17.9015,
91
- "eval_samples_per_second": 111.722,
92
- "eval_steps_per_second": 1.788,
93
- "step": 30
94
- },
95
- {
96
- "epoch": 0.01,
97
- "learning_rate": 4.755700325732899e-05,
98
- "loss": 1.1235,
99
- "step": 35
100
- },
101
- {
102
- "epoch": 0.01,
103
- "eval_loss": 1.1276049613952637,
104
- "eval_runtime": 17.7961,
105
- "eval_samples_per_second": 112.384,
106
- "eval_steps_per_second": 1.798,
107
- "step": 35
108
- },
109
- {
110
- "epoch": 0.01,
111
- "learning_rate": 4.6742671009771985e-05,
112
- "loss": 1.2209,
113
- "step": 40
114
- },
115
- {
116
- "epoch": 0.01,
117
- "eval_loss": 1.1258747577667236,
118
- "eval_runtime": 17.8768,
119
- "eval_samples_per_second": 111.877,
120
- "eval_steps_per_second": 1.79,
121
- "step": 40
122
- },
123
- {
124
- "epoch": 0.01,
125
- "learning_rate": 4.592833876221499e-05,
126
- "loss": 1.16,
127
- "step": 45
128
- },
129
- {
130
- "epoch": 0.01,
131
- "eval_loss": 1.124158501625061,
132
- "eval_runtime": 17.8962,
133
- "eval_samples_per_second": 111.756,
134
- "eval_steps_per_second": 1.788,
135
- "step": 45
136
- },
137
- {
138
- "epoch": 0.02,
139
- "learning_rate": 4.511400651465798e-05,
140
- "loss": 1.2131,
141
- "step": 50
142
- },
143
- {
144
- "epoch": 0.02,
145
- "eval_loss": 1.1221903562545776,
146
- "eval_runtime": 17.8085,
147
- "eval_samples_per_second": 112.306,
148
- "eval_steps_per_second": 1.797,
149
- "step": 50
150
- },
151
- {
152
- "epoch": 0.02,
153
- "learning_rate": 4.9185667752443e-05,
154
- "loss": 1.1619,
155
- "step": 55
156
- },
157
- {
158
- "epoch": 0.02,
159
- "eval_loss": 1.1328459978103638,
160
- "eval_runtime": 18.2169,
161
- "eval_samples_per_second": 109.788,
162
- "eval_steps_per_second": 1.757,
163
- "step": 55
164
- },
165
- {
166
- "epoch": 0.02,
167
- "learning_rate": 4.8371335504885994e-05,
168
- "loss": 1.1732,
169
- "step": 60
170
- },
171
- {
172
- "epoch": 0.02,
173
- "eval_loss": 1.1319738626480103,
174
- "eval_runtime": 17.9412,
175
- "eval_samples_per_second": 111.475,
176
- "eval_steps_per_second": 1.784,
177
- "step": 60
178
- },
179
- {
180
- "epoch": 0.02,
181
- "learning_rate": 4.755700325732899e-05,
182
- "loss": 1.1628,
183
- "step": 65
184
- },
185
- {
186
- "epoch": 0.02,
187
- "eval_loss": 1.1334753036499023,
188
- "eval_runtime": 18.0469,
189
- "eval_samples_per_second": 110.823,
190
- "eval_steps_per_second": 1.773,
191
- "step": 65
192
  }
193
  ],
194
- "logging_steps": 5,
195
- "max_steps": 307,
196
  "num_input_tokens_seen": 0,
197
- "num_train_epochs": 1,
198
- "save_steps": 5,
199
- "total_flos": 135871856640000.0,
200
  "train_batch_size": 32,
201
  "trial_name": null,
202
  "trial_params": null
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.7968000509952032,
5
+ "eval_steps": 50000,
6
+ "global_step": 50000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8,
13
+ "learning_rate": 4.9920355054102726e-05,
14
+ "loss": 1.1412,
15
+ "step": 50000
16
  },
17
  {
18
+ "epoch": 0.8,
19
+ "eval_loss": 1.011365294456482,
20
+ "eval_runtime": 2006.5534,
21
+ "eval_samples_per_second": 111.193,
22
+ "eval_steps_per_second": 1.738,
23
+ "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
+ "logging_steps": 50000,
27
+ "max_steps": 31375500,
28
  "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 500,
30
+ "save_steps": 50000,
31
+ "total_flos": 1.045168128e+17,
32
  "train_batch_size": 32,
33
  "trial_name": null,
34
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24f7cc470173209200fa7833aefd3c6f1ea8b6b4e8765bc96a7ce199e4c5e43d
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1a05d08c00c3cba58440423e7820be9126a02775330faced61d133aa0fb521
3
  size 4283