Shresthadev403 commited on
Commit
8b49b42
1 Parent(s): 6f17c60

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.1242
17
- - eval_runtime: 17.8962
18
- - eval_samples_per_second: 111.756
19
- - eval_steps_per_second: 1.788
20
- - epoch: 0.01
21
- - step: 45
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.1222
17
+ - eval_runtime: 17.8085
18
+ - eval_samples_per_second: 112.306
19
+ - eval_steps_per_second: 1.797
20
+ - epoch: 0.02
21
+ - step: 50
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1702892983.c87f45a5aed4.42.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b59e6436b272d280410208bdc36d59fef679a50ea1cfcc33bbeb57c34b9bb2
3
- size 6751
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:455757bae7675273c4dd282d19d10ca985f4cf4abf34f31d7a0e088706efbe3a
3
+ size 7171
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:502efa3e70ac250c4b4511ccdb38f16d1c8af9b3b9af6be779726d8d2c3e8305
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db25361201f94ee4603c3730182d977d0a4d9f1bc5f98ae095777c4e48f5691
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1247614622116089,
3
- "best_model_checkpoint": "food-recipe-generation/checkpoint-25",
4
- "epoch": 0.014691478942213516,
5
  "eval_steps": 5,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -133,6 +133,20 @@
133
  "eval_samples_per_second": 111.756,
134
  "eval_steps_per_second": 1.788,
135
  "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
137
  ],
138
  "logging_steps": 5,
@@ -140,7 +154,7 @@
140
  "num_input_tokens_seen": 0,
141
  "num_train_epochs": 1,
142
  "save_steps": 5,
143
- "total_flos": 94065131520000.0,
144
  "train_batch_size": 32,
145
  "trial_name": null,
146
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.124158501625061,
3
+ "best_model_checkpoint": "food-recipe-generation/checkpoint-45",
4
+ "epoch": 0.01632386549134835,
5
  "eval_steps": 5,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
133
  "eval_samples_per_second": 111.756,
134
  "eval_steps_per_second": 1.788,
135
  "step": 45
136
+ },
137
+ {
138
+ "epoch": 0.02,
139
+ "learning_rate": 4.511400651465798e-05,
140
+ "loss": 1.2131,
141
+ "step": 50
142
+ },
143
+ {
144
+ "epoch": 0.02,
145
+ "eval_loss": 1.1221903562545776,
146
+ "eval_runtime": 17.8085,
147
+ "eval_samples_per_second": 112.306,
148
+ "eval_steps_per_second": 1.797,
149
+ "step": 50
150
  }
151
  ],
152
  "logging_steps": 5,
 
154
  "num_input_tokens_seen": 0,
155
  "num_train_epochs": 1,
156
  "save_steps": 5,
157
+ "total_flos": 104516812800000.0,
158
  "train_batch_size": 32,
159
  "trial_name": null,
160
  "trial_params": null