explcre commited on
Commit
fa5b1a7
·
verified ·
1 Parent(s): 9b2da64

Upload _reasoning_rl_multiseed/exp_phase8_reasoning_grounded_rl_t1_r128_alpha1_s3_20260506_234027/manifest.json with huggingface_hub

Browse files
_reasoning_rl_multiseed/exp_phase8_reasoning_grounded_rl_t1_r128_alpha1_s3_20260506_234027/manifest.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": "train_reasoning_grounded_rl.py",
3
+ "stage": "post_sft_rl",
4
+ "init_ckpt": "/workspace/dnathinker/runs/exp_phase5_stage_a_v7_reasoning_t1_r128_FIXED_20260502_104222/best.pt",
5
+ "llm_name": "Qwen/Qwen3.5-0.8B",
6
+ "lora_r": 128,
7
+ "lora_layers": [
8
+ 3,
9
+ 7,
10
+ 11,
11
+ 15,
12
+ 19,
13
+ 23
14
+ ],
15
+ "lora_modules": [
16
+ "k_proj",
17
+ "o_proj",
18
+ "q_proj",
19
+ "v_proj"
20
+ ],
21
+ "n_trainable": 8650752,
22
+ "alpha_halluc": 1.0,
23
+ "beta_tags": 0.1,
24
+ "gamma_consensus": 0.2,
25
+ "rollouts_per_prompt": 4,
26
+ "lr": 5e-06,
27
+ "num_rl_steps": 250,
28
+ "seed": 3
29
+ }