renpas22 commited on
Commit ·
2d1ba1a
1
Parent(s): fa9e543
Add inference config parameters
Browse files
train_configs/train_qwen_cot_dual.yaml
CHANGED
|
@@ -38,6 +38,11 @@ entropy_coef: 0.01
|
|
| 38 |
gamma: 0.99
|
| 39 |
gae_lambda: 0.95
|
| 40 |
rl_learning_rate: 5e-6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
prm_dropout: 0.1
|
| 42 |
max_reasoning_steps: 20
|
| 43 |
|
|
|
|
| 38 |
gamma: 0.99
|
| 39 |
gae_lambda: 0.95
|
| 40 |
rl_learning_rate: 5e-6
|
| 41 |
+
|
| 42 |
+
# Inference Configuration
|
| 43 |
+
num_inference_samples: 8
|
| 44 |
+
inference_temperature: 0.7
|
| 45 |
+
aggregation_method: "best_of_n"
|
| 46 |
prm_dropout: 0.1
|
| 47 |
max_reasoning_steps: 20
|
| 48 |
|