Commit
β’
50ba910
1
Parent(s):
e3cb80d
Saving train state of step 105000
Browse files- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/config.json +0 -0
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/generation_config.json +0 -0
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/model.safetensors +0 -0
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/model_1.safetensors +0 -0
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/optimizer.bin +2 -2
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_0.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_1.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_2.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_3.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_4.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_5.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_6.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_7.pkl +1 -1
- {checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/scheduler.bin +1 -1
- config.json +1 -1
- distil-mistral/1713941262.4922624/events.out.tfevents.1713941262.ip-26-0-163-236.2159465.1 +3 -0
- distil-mistral/1713941262.4960794/hparams.yml +16 -0
- distil-mistral/events.out.tfevents.1713941253.ip-26-0-163-236.2159465.0 +3 -0
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/config.json
RENAMED
File without changes
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/generation_config.json
RENAMED
File without changes
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/model.safetensors
RENAMED
File without changes
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/model_1.safetensors
RENAMED
File without changes
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/optimizer.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5b4bb513a347751f619d23b3ae2c73877fd1c98bc9bca5b98d9ccd6d05bcc46
|
3 |
+
size 6283330358
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_0.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b99f58b220f63a52c3b0c5c7cc10ad02f071007a3b5f1a7fb57470c7b0df9563
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_1.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16dc6a42c4b6030f174f6b541dd1ff53f42c6631557c28d6ffbfb9fa2f018c0d
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_2.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c355178c243033b5fc0e59dab7794e27e4117ccae4839bacd9117669e18128f
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_3.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd92ff1f26d6e0fc02d1c74e165e15da454bbb14c8b0b92fc7444bce19760646
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_4.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1fc42ab775cfcf1756ed979acd33ba33c74c3d6f264a262b12d30e0176a4767
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_5.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cbe097d86779f451556b429d6f0f89ab9239bec9c3529ae239606c0da559bd0
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_6.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46abc7c9a7aaa6ad9d6f9d9295ef664d3a9459b9d6b26ad0eee23dc328d535ff
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/random_states_7.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0d7bfea176199f4f78590bb2ce68f9e522a1c4ba684324ab5f534f0149615bf
|
3 |
size 16100
|
{checkpoint-100000-epoch-0 β checkpoint-105000-epoch-0}/scheduler.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb569859d616388bdfd1c62494f05bdf276ee3132da5ea91454ffcb9d238c624
|
3 |
size 1064
|
config.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"rope_theta": 1000000.0,
|
21 |
"sliding_window": null,
|
22 |
"tie_word_embeddings": false,
|
23 |
-
"torch_dtype": "
|
24 |
"transformers_version": "4.40.0.dev0",
|
25 |
"use_cache": true,
|
26 |
"vocab_size": 32000
|
|
|
20 |
"rope_theta": 1000000.0,
|
21 |
"sliding_window": null,
|
22 |
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "float32",
|
24 |
"transformers_version": "4.40.0.dev0",
|
25 |
"use_cache": true,
|
26 |
"vocab_size": 32000
|
distil-mistral/1713941262.4922624/events.out.tfevents.1713941262.ip-26-0-163-236.2159465.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:560ea0f543e24af273c443f0d783419a3f42cfdd753222467875fd9e0ee81ee6
|
3 |
+
size 1160
|
distil-mistral/1713941262.4960794/hparams.yml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
adam_beta1: 0.9
|
2 |
+
adam_beta2: 0.999
|
3 |
+
global_batch_size: 64
|
4 |
+
gradient_accumulation_steps: 1
|
5 |
+
learning_rate: 0.0001
|
6 |
+
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
|
7 |
+
- linear
|
8 |
+
max_steps: 100000
|
9 |
+
mixed_precision: bf16
|
10 |
+
model_name_or_path: sanchit-gandhi/Mistral-1.5B-Instruct-v0.2
|
11 |
+
num_train_epochs: 3.0
|
12 |
+
per_device_train_batch_size: 8
|
13 |
+
teacher_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
|
14 |
+
temperature: 2.0
|
15 |
+
warmup_steps: 500
|
16 |
+
weight_decay: 0.0
|
distil-mistral/events.out.tfevents.1713941253.ip-26-0-163-236.2159465.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac0bbb34ac5ddea96b4065d48e1b11fc3eaa9afc1f12ca77a56f73c4c71a836d
|
3 |
+
size 63288
|