renpas22 commited on
Commit
2d1ba1a
·
1 Parent(s): fa9e543

Add inference config parameters

Browse files
train_configs/train_qwen_cot_dual.yaml CHANGED
@@ -38,6 +38,11 @@ entropy_coef: 0.01
38
  gamma: 0.99
39
  gae_lambda: 0.95
40
  rl_learning_rate: 5e-6
 
 
 
 
 
41
  prm_dropout: 0.1
42
  max_reasoning_steps: 20
43
 
 
38
  gamma: 0.99
39
  gae_lambda: 0.95
40
  rl_learning_rate: 5e-6
41
+
42
+ # Inference Configuration
43
+ num_inference_samples: 8
44
+ inference_temperature: 0.7
45
+ aggregation_method: "best_of_n"
46
  prm_dropout: 0.1
47
  max_reasoning_steps: 20
48