patrickamadeus commited on
Commit
904c803
·
verified ·
1 Parent(s): fa1b321

Upload step 3000 checkpoint

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. train_config.json +2 -2
config.json CHANGED
@@ -117,7 +117,7 @@
117
  "kv_bridge_use_gate": false,
118
  "memory_mode": "replace",
119
  "memory_donor_layers": [
120
- 28
121
  ],
122
  "memory_share_donor_across_right_layers": true,
123
  "memory_reduce_strategy": "single"
 
117
  "kv_bridge_use_gate": false,
118
  "memory_mode": "replace",
119
  "memory_donor_layers": [
120
+ 20
121
  ],
122
  "memory_share_donor_across_right_layers": true,
123
  "memory_reduce_strategy": "single"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0f210e659ccf8f4fa961c2bcba4c22702bc119fe2bc3a3115df39311c0bf73b
3
  size 725313232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632ab53f36f71ca625fcd40d849473cc0c173a3465f03c6b65319ca1146da51e
3
  size 725313232
train_config.json CHANGED
@@ -2,14 +2,14 @@
2
  "lr_mp": 0.0001,
3
  "lr_vision_backbone": 0.0,
4
  "lr_language_backbone": 0.0001,
5
- "lr_right_tower": 0.0,
6
  "lr_kv_bridge": 0.0,
7
  "lr_activation_bridge": 0.0,
8
  "batch_size": 32,
9
  "gradient_accumulation_steps": 4,
10
  "max_grad_norm": 1.0,
11
  "max_training_steps": 10000,
12
- "stop_after_step": 4100,
13
  "warmup_ratio": 0.03,
14
  "stats_log_interval": 100,
15
  "precision": "bf16",
 
2
  "lr_mp": 0.0001,
3
  "lr_vision_backbone": 0.0,
4
  "lr_language_backbone": 0.0001,
5
+ "lr_right_tower": 0.0001,
6
  "lr_kv_bridge": 0.0,
7
  "lr_activation_bridge": 0.0,
8
  "batch_size": 32,
9
  "gradient_accumulation_steps": 4,
10
  "max_grad_norm": 1.0,
11
  "max_training_steps": 10000,
12
+ "stop_after_step": 5100,
13
  "warmup_ratio": 0.03,
14
  "stats_log_interval": 100,
15
  "precision": "bf16",