Fix issues in sample code: Invalid reward_tokenizer and issue in returning scores
#1
by
amitness
- opened
README.md
CHANGED
@@ -92,11 +92,10 @@ class GPTRewardModel(nn.Module):
|
|
92 |
c_ind = c_inds[0].item() if len(c_inds) > 0 else input_ids.shape[1]
|
93 |
scores.append(rewards[i, c_ind - 1])
|
94 |
return scores
|
95 |
-
return scores
|
96 |
|
97 |
## Load the model and tokenizer
|
98 |
|
99 |
-
reward_model = GPTRewardModel("meta-llama/Llama-2-7b-chat-hf"
|
100 |
reward_tokenizer = reward_model.tokenizer
|
101 |
reward_tokenizer.truncation_side = "left"
|
102 |
|
|
|
92 |
c_ind = c_inds[0].item() if len(c_inds) > 0 else input_ids.shape[1]
|
93 |
scores.append(rewards[i, c_ind - 1])
|
94 |
return scores
|
|
|
95 |
|
96 |
## Load the model and tokenizer
|
97 |
|
98 |
+
reward_model = GPTRewardModel("meta-llama/Llama-2-7b-chat-hf")
|
99 |
reward_tokenizer = reward_model.tokenizer
|
100 |
reward_tokenizer.truncation_side = "left"
|
101 |
|