VedaantJain commited on
Commit
3d4ad8c
·
verified ·
1 Parent(s): dee739b

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 602422732914688.0,
4
- "train_loss": 1.1182151794433595,
5
- "train_runtime": 3.8542,
6
  "train_samples": 10,
7
- "train_samples_per_second": 12.973,
8
- "train_steps_per_second": 1.297
9
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 239345474732032.0,
4
+ "train_loss": 1.1433531045913696,
5
+ "train_runtime": 2.3713,
6
  "train_samples": 10,
7
+ "train_samples_per_second": 8.434,
8
+ "train_steps_per_second": 0.843
9
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0389d42cd3307c18435a3443c17cfbc923ecbcb694f36a847ce243f054d03fc
3
  size 4965799096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13cbd6d16e927a0c5bad54102514e6e18b4a47b3a6eb911e39d678d328d19f55
3
  size 4965799096
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:556eeddf3cd4bb0ca0b28d1ca08cf53c94ef62bdd9031cdec4c4563c8078afc3
3
  size 2247734992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f66ee63b3de299eb9fd89dfbc23a39095197874fd6891f7d18de34c50e4e1a7
3
  size 2247734992
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988
3
- size 17210019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fca926fdfedcb8fd225765fb3911e0ab5de0390ec7242a46dff829e8604f723
3
+ size 17210020
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 602422732914688.0,
4
- "train_loss": 1.1182151794433595,
5
- "train_runtime": 3.8542,
6
  "train_samples": 10,
7
- "train_samples_per_second": 12.973,
8
- "train_steps_per_second": 1.297
9
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 239345474732032.0,
4
+ "train_loss": 1.1433531045913696,
5
+ "train_runtime": 2.3713,
6
  "train_samples": 10,
7
+ "train_samples_per_second": 8.434,
8
+ "train_steps_per_second": 0.843
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17,64 +17,33 @@
17
  {
18
  "epoch": 1.0,
19
  "eval_loss": 1.131566047668457,
20
- "eval_runtime": 0.1577,
21
- "eval_samples_per_second": 63.429,
22
- "eval_steps_per_second": 12.686,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_loss": 1.131566047668457,
28
- "eval_runtime": 0.1586,
29
- "eval_samples_per_second": 63.048,
30
- "eval_steps_per_second": 12.61,
31
  "step": 2
32
  },
33
  {
34
- "epoch": 3.0,
35
- "eval_loss": 1.131566047668457,
36
- "eval_runtime": 0.1576,
37
- "eval_samples_per_second": 63.444,
38
- "eval_steps_per_second": 12.689,
39
- "step": 3
40
- },
41
- {
42
- "epoch": 4.0,
43
- "eval_loss": 1.125792145729065,
44
- "eval_runtime": 0.1592,
45
- "eval_samples_per_second": 62.812,
46
- "eval_steps_per_second": 12.562,
47
- "step": 4
48
- },
49
- {
50
- "epoch": 5.0,
51
- "grad_norm": 9.712682723999023,
52
- "learning_rate": 0.0,
53
- "loss": 1.1158,
54
- "step": 5
55
- },
56
- {
57
- "epoch": 5.0,
58
- "eval_loss": 1.125792145729065,
59
- "eval_runtime": 0.1553,
60
- "eval_samples_per_second": 64.38,
61
- "eval_steps_per_second": 12.876,
62
- "step": 5
63
- },
64
- {
65
- "epoch": 5.0,
66
- "step": 5,
67
- "total_flos": 602422732914688.0,
68
- "train_loss": 1.1182151794433595,
69
- "train_runtime": 3.8542,
70
- "train_samples_per_second": 12.973,
71
- "train_steps_per_second": 1.297
72
  }
73
  ],
74
  "logging_steps": 5,
75
- "max_steps": 5,
76
  "num_input_tokens_seen": 0,
77
- "num_train_epochs": 5,
78
  "save_steps": 500,
79
  "stateful_callbacks": {
80
  "TrainerControl": {
@@ -88,7 +57,7 @@
88
  "attributes": {}
89
  }
90
  },
91
- "total_flos": 602422732914688.0,
92
  "train_batch_size": 4,
93
  "trial_name": null,
94
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17
  {
18
  "epoch": 1.0,
19
  "eval_loss": 1.131566047668457,
20
+ "eval_runtime": 0.156,
21
+ "eval_samples_per_second": 64.102,
22
+ "eval_steps_per_second": 12.82,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_loss": 1.131566047668457,
28
+ "eval_runtime": 0.1552,
29
+ "eval_samples_per_second": 64.444,
30
+ "eval_steps_per_second": 12.889,
31
  "step": 2
32
  },
33
  {
34
+ "epoch": 2.0,
35
+ "step": 2,
36
+ "total_flos": 239345474732032.0,
37
+ "train_loss": 1.1433531045913696,
38
+ "train_runtime": 2.3713,
39
+ "train_samples_per_second": 8.434,
40
+ "train_steps_per_second": 0.843
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "logging_steps": 5,
44
+ "max_steps": 2,
45
  "num_input_tokens_seen": 0,
46
+ "num_train_epochs": 2,
47
  "save_steps": 500,
48
  "stateful_callbacks": {
49
  "TrainerControl": {
 
57
  "attributes": {}
58
  }
59
  },
60
+ "total_flos": 239345474732032.0,
61
  "train_batch_size": 4,
62
  "trial_name": null,
63
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad88084d18f44c79d0b1a11aa9118ea2b4ba2f54c48dc6577fe5f5fbd4f99587
3
  size 6968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ebe46d98a6851b85db4ef6908bb9fe0d07ebbd0ec3ef607f14f955f2a49ead0
3
  size 6968