Update README.md
Browse files
README.md
CHANGED
|
@@ -68,7 +68,7 @@ Input Text (Reasoning Trace)
|
|
| 68 |
β
|
| 69 |
[Frozen Base LM Encoder] β Pre-trained, frozen during training
|
| 70 |
β
|
| 71 |
-
[Final Token (
|
| 72 |
β
|
| 73 |
[Lightweight Linear Head] β Only these parameters are trained
|
| 74 |
β
|
|
@@ -219,12 +219,13 @@ base_model = AutoModel.from_pretrained("facebook/opt-1.3b")
|
|
| 219 |
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
|
| 220 |
|
| 221 |
# Load the trained scoring head weights
|
| 222 |
-
|
|
|
|
| 223 |
|
| 224 |
# Initialize scoring head (single linear layer)
|
| 225 |
hidden_size = base_model.config.hidden_size
|
| 226 |
scoring_head = torch.nn.Linear(hidden_size, 1)
|
| 227 |
-
scoring_head.load_state_dict(
|
| 228 |
|
| 229 |
# Move to device
|
| 230 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 68 |
β
|
| 69 |
[Frozen Base LM Encoder] β Pre-trained, frozen during training
|
| 70 |
β
|
| 71 |
+
[Final Non-Padding Token Pooling (attention-mask aware)]
|
| 72 |
β
|
| 73 |
[Lightweight Linear Head] β Only these parameters are trained
|
| 74 |
β
|
|
|
|
| 219 |
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
|
| 220 |
|
| 221 |
# Load the trained scoring head weights
|
| 222 |
+
ckpt = torch.load(model_path, map_location="cpu")
|
| 223 |
+
state = ckpt["model_state"] if "model_state" in ckpt else ckpt
|
| 224 |
|
| 225 |
# Initialize scoring head (single linear layer)
|
| 226 |
hidden_size = base_model.config.hidden_size
|
| 227 |
scoring_head = torch.nn.Linear(hidden_size, 1)
|
| 228 |
+
scoring_head.load_state_dict(state)
|
| 229 |
|
| 230 |
# Move to device
|
| 231 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|