Upload 7 files

Files changed (4) hide show

config.json CHANGED Viewed

@@ -7,36 +7,12 @@
   "bos_token_id": 0,
   "dtype": "float32",
   "eos_token_id": 4,
-  "head_dim": 32,
   "hidden_act": "silu",
-  "hidden_size": 64,
   "initializer_range": 0.02,
-  "intermediate_size": 256,
   "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
     "full_attention",
     "full_attention",
     "full_attention",
@@ -47,11 +23,11 @@
     "full_attention"
   ],
   "max_position_embeddings": 2048,
-  "max_window_layers": 32,
   "model_type": "qwen3",
-  "num_attention_heads": 4,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 2,
   "pad_token_id": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
@@ -59,7 +35,7 @@
   "sliding_window": null,
   "tie_word_embeddings": true,
   "transformers_version": "4.57.6",
-  "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 2048
 }

   "bos_token_id": 0,
   "dtype": "float32",
   "eos_token_id": 4,
+  "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 512,
   "initializer_range": 0.02,
+  "intermediate_size": 2048,
   "layer_types": [
     "full_attention",
     "full_attention",
     "full_attention",
     "full_attention"
   ],
   "max_position_embeddings": 2048,
+  "max_window_layers": 8,
   "model_type": "qwen3",
+  "num_attention_heads": 8,
+  "num_hidden_layers": 8,
+  "num_key_value_heads": 4,
   "pad_token_id": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "sliding_window": null,
   "tie_word_embeddings": true,
   "transformers_version": "4.57.6",
+  "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 2048
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "bos_token_id": 0,
   "eos_token_id": 4,
   "pad_token_id": 1,
-  "use_cache": true
 }

   "bos_token_id": 0,
   "eos_token_id": 4,
   "pad_token_id": 1,
+  "use_cache": false
 }

metadata.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-  "preset": "qwen3-32layer-c10x",
   "family": "qwen3",
   "source": "inline-preset",
-  "output_dir": "D:\\Qwen3-80m-tinystories-A\\workspace\\outputs\\qwen3-32layer-c10x",
-  "parameters": 2496576,
   "vocab_size": 2048,
-  "hidden_size": 64,
-  "num_hidden_layers": 32,
-  "num_attention_heads": 4,
-  "num_key_value_heads": 2,
   "rope_theta": 10000.0
 }

 {
+  "preset": "qwen3-16layer-c10x",
   "family": "qwen3",
   "source": "inline-preset",
+  "output_dir": "D:\\Qwen3-80m-tinystories-A\\workspace\\outputs\\qwen3-16layer-c10x",
+  "parameters": 32515584,
   "vocab_size": 2048,
+  "hidden_size": 512,
+  "num_hidden_layers": 8,
+  "num_attention_heads": 8,
+  "num_key_value_heads": 4,
   "rope_theta": 10000.0
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e223f0d607fdc649ddfa5bca27b1675e6ee75bdd65198b00450abe76cba4d283
-size 10023688

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdfe3633df9ac5c6ead66960a86a77e2999797dd40686e2caf4e856d5133c3eb
+size 130072120