Ba2han commited on
Commit
14ef19e
·
verified ·
1 Parent(s): c82d4cc

Training in progress, step 390, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -75,18 +75,11 @@
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
78
- "resid_lambda_init_end": 1.05,
79
- "resid_lambda_init_start": 1.15,
80
- "resid_lambda_max": 1.25,
81
- "resid_lambda_min": 0.75,
82
- "resid_scalar_lr_mult": 0.01,
83
- "resid_scalar_weight_decay": 0.05,
84
  "rms_norm_eps": 1e-06,
85
  "rope_parameters": {
86
  "rope_theta": 50000,
87
  "rope_type": "default"
88
  },
89
- "scalar_lr": 0.5,
90
  "sliding_window": null,
91
  "squared_relu_activation": "relu2",
92
  "squared_relu_intermediate_size": 2880,
@@ -95,10 +88,5 @@
95
  "unsloth_version": "2026.4.8",
96
  "use_cache": false,
97
  "use_sliding_window": false,
98
- "vocab_size": 50048,
99
- "x0_lambda_init_end": 0.05,
100
- "x0_lambda_init_start": 0.2,
101
- "x0_mix_max": 0.3,
102
- "x0_scalar_lr_mult": 0.01,
103
- "x0_scalar_weight_decay": 0.0
104
  }
 
75
  "num_hidden_layers": 48,
76
  "num_key_value_heads": 4,
77
  "pad_token_id": 50034,
 
 
 
 
 
 
78
  "rms_norm_eps": 1e-06,
79
  "rope_parameters": {
80
  "rope_theta": 50000,
81
  "rope_type": "default"
82
  },
 
83
  "sliding_window": null,
84
  "squared_relu_activation": "relu2",
85
  "squared_relu_intermediate_size": 2880,
 
88
  "unsloth_version": "2026.4.8",
89
  "use_cache": false,
90
  "use_sliding_window": false,
91
+ "vocab_size": 50048
 
 
 
 
 
92
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7897ee48a70a99f3be8042d8c88374e8f1788f56ef3243022895ea3f3d8a025a
3
- size 1151039648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68869e9941558597cb9a2ed2f69bd3b6d0446177fb931565edc6e3e0bf43c18
3
+ size 1151036872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e60385101d4f28e573a088eab659376fe012575e58ec939ec71787cdc308a377
3
- size 1845697547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6fb72dd5a7a531d1fd1d9ca62f33c744ea49994a8ed4858d787f0476bdac178
3
+ size 1382095179
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f8db2cf42889f5d133fa68431556e23686c3d60a923adc0eef65d9d39b6834
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1d565802a8e26c4e8a31328752b7a7fdc186d9401aa008e65697d0ad8c22e33
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0cbbc4ffb0cd22b7a2bc8b3a51214b0ee04eee80c064b9aa3de49707d11e502
3
- size 1529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5d66c345519d0ddc5b19e485e028794035648262f6da9909314fb11d10d9e2
3
+ size 1337
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff