moot20 commited on
Commit
d625c9e
·
verified ·
1 Parent(s): a0210e3

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. config.json +7 -7
  3. model.safetensors +2 -2
  4. model.safetensors.index.json +19 -1
README.md CHANGED
@@ -5,8 +5,8 @@ tags:
5
  ---
6
 
7
  # moot20/SmolVLM-500M-Base-MLX
8
- This model was converted to MLX format from [`HuggingFaceTB/SmolVLM-256M-Base`]() using mlx-vlm version **0.1.12**.
9
- Refer to the [original model card](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Base) for more details on the model.
10
  ## Use with mlx
11
 
12
  ```bash
 
5
  ---
6
 
7
  # moot20/SmolVLM-500M-Base-MLX
8
+ This model was converted to MLX format from [`HuggingFaceTB/SmolVLM-500M-Base`]() using mlx-vlm version **0.1.12**.
9
+ Refer to the [original model card](https://huggingface.co/HuggingFaceTB/SmolVLM-500M-Base) for more details on the model.
10
  ## Use with mlx
11
 
12
  ```bash
config.json CHANGED
@@ -57,13 +57,13 @@
57
  "text_config": {
58
  "vocab_size": 49280,
59
  "max_position_embeddings": 8192,
60
- "hidden_size": 576,
61
- "intermediate_size": 1536,
62
- "num_hidden_layers": 30,
63
- "num_attention_heads": 9,
64
- "num_key_value_heads": 3,
65
  "hidden_act": "silu",
66
- "initializer_range": 0.041666666666666664,
67
  "rms_norm_eps": 1e-05,
68
  "pretraining_tp": 1,
69
  "use_cache": true,
@@ -134,7 +134,7 @@
134
  "decoder_start_token_id": null,
135
  "task_specific_params": null,
136
  "problem_type": null,
137
- "_name_or_path": "/fsx/m4/experiments/local_experiment_dir/tr_341_vsmollm2_05b/opt_step-25750/unwrapped_model",
138
  "_attn_implementation_autoset": false,
139
  "_flash_attn_2_enabled": true,
140
  "is_llama_config": true,
 
57
  "text_config": {
58
  "vocab_size": 49280,
59
  "max_position_embeddings": 8192,
60
+ "hidden_size": 960,
61
+ "intermediate_size": 2560,
62
+ "num_hidden_layers": 32,
63
+ "num_attention_heads": 15,
64
+ "num_key_value_heads": 5,
65
  "hidden_act": "silu",
66
+ "initializer_range": 0.02,
67
  "rms_norm_eps": 1e-05,
68
  "pretraining_tp": 1,
69
  "use_cache": true,
 
134
  "decoder_start_token_id": null,
135
  "task_specific_params": null,
136
  "problem_type": null,
137
+ "_name_or_path": "/fsx/m4/experiments/local_experiment_dir/tr_343_vsmollm2_05b/opt_step-22750/unwrapped_model",
138
  "_attn_implementation_autoset": false,
139
  "_flash_attn_2_enabled": true,
140
  "is_llama_config": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc4304ed859ecfa88c3b7bc59c9f88bd87a743be95694aea0c7e6c1a1479517
3
- size 513026363
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6caa1cbc55256126a9c584d55cfb1759bb0d359584d1105f111fb48c8a2fac
3
+ size 1015023504
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 512969856
4
  },
5
  "weight_map": {
6
  "connector.modality_projection.proj.weight": "model.safetensors",
@@ -221,6 +221,24 @@
221
  "language_model.layers.3.self_attn.o_proj.weight": "model.safetensors",
222
  "language_model.layers.3.self_attn.q_proj.weight": "model.safetensors",
223
  "language_model.layers.3.self_attn.v_proj.weight": "model.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  "language_model.layers.4.input_layernorm.weight": "model.safetensors",
225
  "language_model.layers.4.mlp.down_proj.weight": "model.safetensors",
226
  "language_model.layers.4.mlp.gate_proj.weight": "model.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 1014964608
4
  },
5
  "weight_map": {
6
  "connector.modality_projection.proj.weight": "model.safetensors",
 
221
  "language_model.layers.3.self_attn.o_proj.weight": "model.safetensors",
222
  "language_model.layers.3.self_attn.q_proj.weight": "model.safetensors",
223
  "language_model.layers.3.self_attn.v_proj.weight": "model.safetensors",
224
+ "language_model.layers.30.input_layernorm.weight": "model.safetensors",
225
+ "language_model.layers.30.mlp.down_proj.weight": "model.safetensors",
226
+ "language_model.layers.30.mlp.gate_proj.weight": "model.safetensors",
227
+ "language_model.layers.30.mlp.up_proj.weight": "model.safetensors",
228
+ "language_model.layers.30.post_attention_layernorm.weight": "model.safetensors",
229
+ "language_model.layers.30.self_attn.k_proj.weight": "model.safetensors",
230
+ "language_model.layers.30.self_attn.o_proj.weight": "model.safetensors",
231
+ "language_model.layers.30.self_attn.q_proj.weight": "model.safetensors",
232
+ "language_model.layers.30.self_attn.v_proj.weight": "model.safetensors",
233
+ "language_model.layers.31.input_layernorm.weight": "model.safetensors",
234
+ "language_model.layers.31.mlp.down_proj.weight": "model.safetensors",
235
+ "language_model.layers.31.mlp.gate_proj.weight": "model.safetensors",
236
+ "language_model.layers.31.mlp.up_proj.weight": "model.safetensors",
237
+ "language_model.layers.31.post_attention_layernorm.weight": "model.safetensors",
238
+ "language_model.layers.31.self_attn.k_proj.weight": "model.safetensors",
239
+ "language_model.layers.31.self_attn.o_proj.weight": "model.safetensors",
240
+ "language_model.layers.31.self_attn.q_proj.weight": "model.safetensors",
241
+ "language_model.layers.31.self_attn.v_proj.weight": "model.safetensors",
242
  "language_model.layers.4.input_layernorm.weight": "model.safetensors",
243
  "language_model.layers.4.mlp.down_proj.weight": "model.safetensors",
244
  "language_model.layers.4.mlp.gate_proj.weight": "model.safetensors",