yashiqxlab commited on
Commit
719a80d
1 Parent(s): 61c2927

Training in progress, step 100

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2-0.5B",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -7,19 +7,19 @@
7
  "bos_token_id": 151644,
8
  "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
- "hidden_size": 896,
11
  "initializer_range": 0.02,
12
- "intermediate_size": 4864,
13
- "max_position_embeddings": 131072,
14
- "max_window_layers": 24,
15
  "model_type": "qwen2",
16
- "num_attention_heads": 14,
17
  "num_hidden_layers": 24,
18
- "num_key_value_heads": 2,
19
  "pad_token_id": 151645,
20
  "rms_norm_eps": 1e-06,
21
  "rope_theta": 1000000.0,
22
- "sliding_window": 131072,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.41.2",
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen1.5-0.5B",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
7
  "bos_token_id": 151644,
8
  "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
+ "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 2816,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
  "model_type": "qwen2",
16
+ "num_attention_heads": 16,
17
  "num_hidden_layers": 24,
18
+ "num_key_value_heads": 16,
19
  "pad_token_id": 151645,
20
  "rms_norm_eps": 1e-06,
21
  "rope_theta": 1000000.0,
22
+ "sliding_window": 32768,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.41.2",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d2a19330b7281732a01a99ff16e2dd76975bf8d191f4645b69840ac1e308aa
3
- size 987578144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aedc68856425813373b80732b296ca170e907111c101dcb033446b67e17a018
3
+ size 927414184
runs/Jun19_05-46-05_vipin-yasI179KX/events.out.tfevents.1718777132.vipin-yasI179KX.441835.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45da375de07bbc2306c72bc3af7024444c024ed4ff0624f844ddcccc97f2364
3
+ size 5003
runs/Jun19_06-21-26_vipin-yasI179KX/events.out.tfevents.1718778317.vipin-yasI179KX.449183.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd6badc74358909de07b1dc80e47a788961b91da400680643f7edc0ff1a1b48
3
+ size 5003
runs/Jun19_06-34-18_vipin-yasI179KX/events.out.tfevents.1718778921.vipin-yasI179KX.452400.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a494a4f72a711b1e25b946dadbd93f6962b9d22058c9033ce08cc1ccbf930c20
3
+ size 5003
runs/Jun19_06-43-28_vipin-yasI179KX/events.out.tfevents.1718779724.vipin-yasI179KX.454399.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac4329dc56f2825f1ec493eeea830f275c24092be8d9655ac6fe6c3cff1de74f
3
+ size 5003
runs/Jun19_07-47-02_vipin-yasI179KX/events.out.tfevents.1718784097.vipin-yasI179KX.468386.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6bcb55c57c7e248b3717bd1864e27dfe2cb89d977faea1f2af701a121621780
3
+ size 5003
runs/Jun19_08-07-23_vipin-yasI179KX/events.out.tfevents.1718784748.vipin-yasI179KX.475079.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f338dc3b92dae1ba2109beeccc9ddddf9d4d509a44c10d30cb2f827d1e050099
3
+ size 9345
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d50a85fa368f2f82040527c6f8ff696a657bb27122ff1b831e81a6d15def616
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3beaedac57511859debe9ed8863573235d19720b594bce57665e791e8cdeffec
3
  size 5368