windlx commited on
Commit
2072181
·
verified ·
1 Parent(s): 2df0e60

Add model config

Browse files
Files changed (1) hide show
  1. model_config.json +46 -0
model_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "url-classifier",
3
+ "architecture": "autoresearch",
4
+ "version": "1.0.0",
5
+ "training_dataset": "iowacat",
6
+ "accuracy": 0.9962,
7
+ "final_loss": 0.002,
8
+ "training_seconds": 300.3,
9
+ "gpu": "RTX 4060 Laptop",
10
+
11
+ "model": {
12
+ "depth": 4,
13
+ "aspect_ratio": 96,
14
+ "head_dim": 128,
15
+ "model_dim": 384,
16
+ "n_head": 3,
17
+ "n_kv_head": 3,
18
+ "n_embd": 384,
19
+ "vocab_size": 100277,
20
+ "max_seq_len": 64,
21
+ "window_pattern": "SSSL",
22
+ "estimated_params": 161000000
23
+ },
24
+
25
+ "tokenizer": {
26
+ "name": "cl100k_base",
27
+ "library": "tiktoken",
28
+ "vocab_size": 100277,
29
+ "bos_id": 1,
30
+ "pad_id": 0
31
+ },
32
+
33
+ "training": {
34
+ "batch_size": 16,
35
+ "total_batch_size": 8192,
36
+ "grad_accum_steps": 512,
37
+ "time_budget_seconds": 300,
38
+ "lr": 1.0,
39
+ "optimizer": "muon + adamw"
40
+ },
41
+
42
+ "class_labels": {
43
+ "0": "A (列表页)",
44
+ "1": "B (详情页)"
45
+ }
46
+ }