Training in progress, epoch 1

Files changed (4) hide show

config.json CHANGED Viewed

@@ -5,12 +5,51 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 2560,
   "initializer_range": 0.02,
   "intermediate_size": 9728,
   "max_position_embeddings": 40960,
   "max_window_layers": 36,
   "model_type": "qwen3",

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 2560,
   "initializer_range": 0.02,
   "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 40960,
   "max_window_layers": 36,
   "model_type": "qwen3",

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b0715a10392bdd5118d08de1f514a6a435eca90370c552da724ef7e9349a8b6
 size 4967215360

 version https://git-lfs.github.com/spec/v1
+oid sha256:d859d7a0b2472bc60d1e3157e7755aef9aef1321bd536356d20366ae360dae11
 size 4967215360

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:609e7e7f829e72fc66dbc79f9934d26593cecf852bf2b8f4989f7f690654d1bd
 size 3077766632

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbacb63aa2797beac1c00f79715f0c187de43c57bed9ac6c2ee20a3bf4b760fe
 size 3077766632

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1afc2500a68483c16264423e468022edf4976022d51c6ffbcedb08c51ab6d277
 size 7953

 version https://git-lfs.github.com/spec/v1
+oid sha256:8470d727e64d342a7188d92201c679231746a8d1ea411a46b4147ce9d144865b
 size 7953