Shresthadev403 commited on
Commit
e64e790
·
1 Parent(s): 0b4bd66

End of training

Browse files
README.md CHANGED
@@ -13,12 +13,12 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - eval_loss: 1.1526
17
- - eval_runtime: 18.2243
18
- - eval_samples_per_second: 109.744
19
- - eval_steps_per_second: 1.756
20
- - epoch: 0.0
21
- - step: 15
22
 
23
  ## Model description
24
 
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - eval_loss: 1.1525
17
+ - eval_runtime: 17.9261
18
+ - eval_samples_per_second: 111.569
19
+ - eval_steps_per_second: 1.785
20
+ - epoch: 0.01
21
+ - step: 20
22
 
23
  ## Model description
24
 
logs/events.out.tfevents.1702892648.82d5b6822809.42.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37ad33be7d0485b15516b8569d1a5beb0cb8ff4e4b63a6739ebc3c0e37d8f4b2
3
- size 5071
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5bc7ba9bf0160bb974108ee13fd6e89d84e23fb462e86151f3a5523b9c82c73
3
+ size 5491
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:786a50d8d4ce0218d11987ec66734818f9045dfe55fad3d78997f5a0a8bc34fb
3
  size 497918592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91d0acc4d2d56f0cc48445003be031c08633d89d08448565ee65404a26feb261
3
  size 497918592
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.142299771308899,
3
  "best_model_checkpoint": "food-recipe-generation/checkpoint-5",
4
- "epoch": 0.004897159647404506,
5
  "eval_steps": 5,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -49,6 +49,20 @@
49
  "eval_samples_per_second": 109.744,
50
  "eval_steps_per_second": 1.756,
51
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
  ],
54
  "logging_steps": 5,
@@ -56,7 +70,7 @@
56
  "num_input_tokens_seen": 0,
57
  "num_train_epochs": 1,
58
  "save_steps": 5,
59
- "total_flos": 31355043840000.0,
60
  "train_batch_size": 32,
61
  "trial_name": null,
62
  "trial_params": null
 
1
  {
2
  "best_metric": 1.142299771308899,
3
  "best_model_checkpoint": "food-recipe-generation/checkpoint-5",
4
+ "epoch": 0.00652954619653934,
5
  "eval_steps": 5,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
49
  "eval_samples_per_second": 109.744,
50
  "eval_steps_per_second": 1.756,
51
  "step": 15
52
+ },
53
+ {
54
+ "epoch": 0.01,
55
+ "learning_rate": 4.8371335504885994e-05,
56
+ "loss": 1.0834,
57
+ "step": 20
58
+ },
59
+ {
60
+ "epoch": 0.01,
61
+ "eval_loss": 1.152503490447998,
62
+ "eval_runtime": 17.9261,
63
+ "eval_samples_per_second": 111.569,
64
+ "eval_steps_per_second": 1.785,
65
+ "step": 20
66
  }
67
  ],
68
  "logging_steps": 5,
 
70
  "num_input_tokens_seen": 0,
71
  "num_train_epochs": 1,
72
  "save_steps": 5,
73
+ "total_flos": 41806725120000.0,
74
  "train_batch_size": 32,
75
  "trial_name": null,
76
  "trial_params": null