alexredna commited on
Commit
aa240c5
1 Parent(s): 3cd7140

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.8959
19
 
20
  ## Model description
21
 
@@ -43,14 +43,14 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 40
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
- - num_epochs: 2
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
  | 0.9354 | 0.22 | 92 | 0.9211 |
53
- | 0.8646 | 1.22 | 185 | 0.8959 |
54
 
55
 
56
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.9013
19
 
20
  ## Model description
21
 
 
43
  - total_train_batch_size: 40
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
+ - num_epochs: 4
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
  | 0.9354 | 0.22 | 92 | 0.9211 |
53
+ | 0.8752 | 1.16 | 160 | 0.9013 |
54
 
55
 
56
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 1.22,
3
- "eval_loss": 0.8958835601806641,
4
- "eval_runtime": 48.8992,
5
  "eval_samples": 956,
6
- "eval_samples_per_second": 19.55,
7
- "eval_steps_per_second": 9.775,
8
- "train_loss": 0.9214719901213775,
9
- "train_runtime": 5680.6096,
10
  "train_samples": 16676,
11
- "train_samples_per_second": 5.871,
12
- "train_steps_per_second": 0.146
13
  }
 
1
  {
2
+ "epoch": 1.16,
3
+ "eval_loss": 0.9012895226478577,
4
+ "eval_runtime": 48.5375,
5
  "eval_samples": 956,
6
+ "eval_samples_per_second": 19.696,
7
+ "eval_steps_per_second": 9.848,
8
+ "train_loss": 0.0,
9
+ "train_runtime": 80.9781,
10
  "train_samples": 16676,
11
+ "train_samples_per_second": 823.729,
12
+ "train_steps_per_second": 20.549
13
  }
config.json CHANGED
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.35.0",
25
- "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.35.0",
25
+ "use_cache": false,
26
  "vocab_size": 32000
27
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.22,
3
- "eval_loss": 0.8958835601806641,
4
- "eval_runtime": 48.8992,
5
  "eval_samples": 956,
6
- "eval_samples_per_second": 19.55,
7
- "eval_steps_per_second": 9.775
8
  }
 
1
  {
2
+ "epoch": 1.16,
3
+ "eval_loss": 0.9012895226478577,
4
+ "eval_runtime": 48.5375,
5
  "eval_samples": 956,
6
+ "eval_samples_per_second": 19.696,
7
+ "eval_steps_per_second": 9.848
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edfe5952a27c911cfcc36d4ba07c5433974e54140263c4f1aaba3f9835aeaf0b
3
  size 2200119864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a6abe0a7f5694698206db580f116073d1e3e745697ffc511a101ea321c73bd
3
  size 2200119864
runs/Jan03_16-36-16_ml-xtx-machine/events.out.tfevents.1704296223.ml-xtx-machine.50946.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b4820243a873e84a6fcb08bf2f029fc5f3920ba8f182f67d6b2b4607044f5e
3
+ size 5049
runs/Jan03_16-36-16_ml-xtx-machine/events.out.tfevents.1704296353.ml-xtx-machine.50946.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45842fb8aaada45bd3b0f272146efaf3c2e2aaa2b371825a5b16dcdac514384b
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.22,
3
- "train_loss": 0.9214719901213775,
4
- "train_runtime": 5680.6096,
5
  "train_samples": 16676,
6
- "train_samples_per_second": 5.871,
7
- "train_steps_per_second": 0.146
8
  }
 
1
  {
2
+ "epoch": 1.16,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 80.9781,
5
  "train_samples": 16676,
6
+ "train_samples_per_second": 823.729,
7
+ "train_steps_per_second": 20.549
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2213960182297914,
5
  "eval_steps": 500,
6
- "global_step": 185,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -71,34 +71,28 @@
71
  "step": 160
72
  },
73
  {
74
- "epoch": 1.21,
75
- "learning_rate": 1.7777792431226384e-05,
76
- "loss": 0.8646,
77
- "step": 180
78
- },
79
- {
80
- "epoch": 1.22,
81
- "eval_loss": 0.8958887457847595,
82
- "eval_runtime": 48.9138,
83
- "eval_samples_per_second": 19.545,
84
- "eval_steps_per_second": 9.772,
85
- "step": 185
86
  },
87
  {
88
- "epoch": 1.22,
89
- "step": 185,
90
- "total_flos": 9.424742439400243e+16,
91
- "train_loss": 0.9214719901213775,
92
- "train_runtime": 5680.6096,
93
- "train_samples_per_second": 5.871,
94
- "train_steps_per_second": 0.146
95
  }
96
  ],
97
  "logging_steps": 20,
98
- "max_steps": 832,
99
- "num_train_epochs": 2,
100
  "save_steps": 40,
101
- "total_flos": 9.424742439400243e+16,
102
  "trial_name": null,
103
  "trial_params": null
104
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1614295994243224,
5
  "eval_steps": 500,
6
+ "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
71
  "step": 160
72
  },
73
  {
74
+ "epoch": 1.16,
75
+ "eval_loss": 0.9012895226478577,
76
+ "eval_runtime": 48.6193,
77
+ "eval_samples_per_second": 19.663,
78
+ "eval_steps_per_second": 9.831,
79
+ "step": 160
 
 
 
 
 
 
80
  },
81
  {
82
+ "epoch": 1.16,
83
+ "step": 160,
84
+ "total_flos": 8.13446546367447e+16,
85
+ "train_loss": 0.0,
86
+ "train_runtime": 80.9781,
87
+ "train_samples_per_second": 823.729,
88
+ "train_steps_per_second": 20.549
89
  }
90
  ],
91
  "logging_steps": 20,
92
+ "max_steps": 1664,
93
+ "num_train_epochs": 4,
94
  "save_steps": 40,
95
+ "total_flos": 8.13446546367447e+16,
96
  "trial_name": null,
97
  "trial_params": null
98
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aa50007fd840cca58fdd03dcd8502b2c5a82b34eb9b5546d8b2fa653a4498b5
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62daaa1ca184febf366ffa9c4dc9832e7b1feee79b4945c2ec3fbdbb5baf799d
3
  size 4728