satyanshu404 commited on
Commit
f2b2fb5
1 Parent(s): 6231323

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 11.5514
19
 
20
  ## Model description
21
 
@@ -41,7 +41,7 @@ The following hyperparameters were used during training:
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: cosine
43
  - lr_scheduler_warmup_ratio: 0.2
44
- - num_epochs: 3
45
 
46
  ### Training results
47
 
 
15
 
16
  This model was trained from scratch on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 11.5637
19
 
20
  ## Model description
21
 
 
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: cosine
43
  - lr_scheduler_warmup_ratio: 0.2
44
+ - num_epochs: 10
45
 
46
  ### Training results
47
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 11.551432609558105,
4
- "eval_runtime": 50.9651,
5
  "eval_samples": 615,
6
- "eval_samples_per_second": 12.067,
7
- "eval_steps_per_second": 6.043,
8
  "total_flos": 8536543131303936.0,
9
- "train_loss": 12.036643933186408,
10
- "train_runtime": 6501.8144,
11
- "train_samples_per_second": 2.552,
12
- "train_steps_per_second": 1.276
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 11.56369686126709,
4
+ "eval_runtime": 44.4292,
5
  "eval_samples": 615,
6
+ "eval_samples_per_second": 13.842,
7
+ "eval_steps_per_second": 6.932,
8
  "total_flos": 8536543131303936.0,
9
+ "train_loss": 11.932003958565849,
10
+ "train_runtime": 24504.6855,
11
+ "train_samples_per_second": 2.257,
12
+ "train_steps_per_second": 1.128
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 11.551432609558105,
4
- "eval_runtime": 50.9651,
5
  "eval_samples": 615,
6
- "eval_samples_per_second": 12.067,
7
- "eval_steps_per_second": 6.043
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 11.56369686126709,
4
+ "eval_runtime": 44.4292,
5
  "eval_samples": 615,
6
+ "eval_samples_per_second": 13.842,
7
+ "eval_steps_per_second": 6.932
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f0580adf7aa0c65ed7cfdd4857ad1dca5fcda8a74ab2db93ac6c2aa34bf13fd
3
  size 4018095352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f212a975eba9ef3984476f2bac53838949bdf491cdd81ecf0ecd6f51b7b93543
3
  size 4018095352
tokenizer_config.json CHANGED
@@ -122,7 +122,7 @@
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<unk>",
125
- "padding_side": "right",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
 
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<unk>",
125
+ "padding_side": "left",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 12.036643933186408,
4
- "train_runtime": 6501.8144,
5
- "train_samples_per_second": 2.552,
6
- "train_steps_per_second": 1.276
7
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 11.932003958565849,
4
+ "train_runtime": 24504.6855,
5
+ "train_samples_per_second": 2.257,
6
+ "train_steps_per_second": 1.128
7
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff