sanchit-gandhi HF staff commited on
Commit
50ba910
β€’
1 Parent(s): e3cb80d

Saving train state of step 105000

Browse files
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/config.json RENAMED
File without changes
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/generation_config.json RENAMED
File without changes
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/model.safetensors RENAMED
File without changes
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/model_1.safetensors RENAMED
File without changes
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/optimizer.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:676b30f8977d68842a99b2bed13ef836c24396b2b307b9539776599b1cb0add7
3
- size 6283329590
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b4bb513a347751f619d23b3ae2c73877fd1c98bc9bca5b98d9ccd6d05bcc46
3
+ size 6283330358
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_0.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab68cb112d3f7ed47e584f1f4912731fbb699e21a32518098c4e2ea48df5a98
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b99f58b220f63a52c3b0c5c7cc10ad02f071007a3b5f1a7fb57470c7b0df9563
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_1.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f69756b590564bb1602f4bf4e62e206ab474f3e2dff9ed0062c568ae4bbfaad
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16dc6a42c4b6030f174f6b541dd1ff53f42c6631557c28d6ffbfb9fa2f018c0d
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_2.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af6d7d0468c0469bce9b102cbba7d4536231b31da06d6b2e1a422af3ac8c2c33
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c355178c243033b5fc0e59dab7794e27e4117ccae4839bacd9117669e18128f
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_3.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a4b7c910464a405b508d72a8a58bc75a6f31aeae4a2d837b21ab0daf1bbe98e
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd92ff1f26d6e0fc02d1c74e165e15da454bbb14c8b0b92fc7444bce19760646
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_4.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92002f48c8f65ad524b520d4e991aa1336e559fef41682c34d87999b14c221e0
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1fc42ab775cfcf1756ed979acd33ba33c74c3d6f264a262b12d30e0176a4767
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_5.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd550d2cc98e4d46fb38b49243f4ccbbc9104ecd948c9dba1b38df633f6be70
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cbe097d86779f451556b429d6f0f89ab9239bec9c3529ae239606c0da559bd0
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_6.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ca12bafd8fa2bb70a9e7e6a74053b32f8636f13a80c65a2ccef6f5765376823
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46abc7c9a7aaa6ad9d6f9d9295ef664d3a9459b9d6b26ad0eee23dc328d535ff
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/random_states_7.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d457746e9c1062e536fbddcdde4d7279fd10d691ad2be87e9dda3280bde421d
3
  size 16100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0d7bfea176199f4f78590bb2ce68f9e522a1c4ba684324ab5f534f0149615bf
3
  size 16100
{checkpoint-100000-epoch-0 β†’ checkpoint-105000-epoch-0}/scheduler.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0888c48d6cef9a2f1bb5ba2d52a074c348114b934f0cbce026ae99d6a22d09fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb569859d616388bdfd1c62494f05bdf276ee3132da5ea91454ffcb9d238c624
3
  size 1064
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "rope_theta": 1000000.0,
21
  "sliding_window": null,
22
  "tie_word_embeddings": false,
23
- "torch_dtype": "bfloat16",
24
  "transformers_version": "4.40.0.dev0",
25
  "use_cache": true,
26
  "vocab_size": 32000
 
20
  "rope_theta": 1000000.0,
21
  "sliding_window": null,
22
  "tie_word_embeddings": false,
23
+ "torch_dtype": "float32",
24
  "transformers_version": "4.40.0.dev0",
25
  "use_cache": true,
26
  "vocab_size": 32000
distil-mistral/1713941262.4922624/events.out.tfevents.1713941262.ip-26-0-163-236.2159465.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560ea0f543e24af273c443f0d783419a3f42cfdd753222467875fd9e0ee81ee6
3
+ size 1160
distil-mistral/1713941262.4960794/hparams.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ global_batch_size: 64
4
+ gradient_accumulation_steps: 1
5
+ learning_rate: 0.0001
6
+ lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
7
+ - linear
8
+ max_steps: 100000
9
+ mixed_precision: bf16
10
+ model_name_or_path: sanchit-gandhi/Mistral-1.5B-Instruct-v0.2
11
+ num_train_epochs: 3.0
12
+ per_device_train_batch_size: 8
13
+ teacher_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
14
+ temperature: 2.0
15
+ warmup_steps: 500
16
+ weight_decay: 0.0
distil-mistral/events.out.tfevents.1713941253.ip-26-0-163-236.2159465.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0bbb34ac5ddea96b4065d48e1b11fc3eaa9afc1f12ca77a56f73c4c71a836d
3
+ size 63288