Text Classification
Transformers
Safetensors
mistral
feature-extraction
reward_model
custom_code
text-generation-inference
lievan commited on
Commit
066d4a6
1 Parent(s): 2cd6494

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -7
README.md CHANGED
@@ -32,7 +32,7 @@ Eurus-RM-7B is trained on a mixture of [UltraInteract](https://huggingface.co/da
32
  ## Usage
33
  ```python
34
  from transformers import AutoTokenizer, AutoModel
35
-
36
 
37
  def test(model_path):
38
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
@@ -43,12 +43,13 @@ def test(model_path):
43
  tokenizer = AutoTokenizer.from_pretrained(model_path)
44
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
45
 
46
- for example in dataset:
47
- inputs = tokenizer(example["chosen"], return_tensors="pt")
48
- chosen_reward = model(**inputs).item()
49
- inputs = tokenizer(example["rejected"], return_tensors="pt")
50
- rejected_reward = model(**inputs).item()
51
- print(chosen_reward - rejected_reward)
 
52
 
53
  test("openbmb/Eurus-RM-7b")
54
  # Output: 47.4404296875
 
32
  ## Usage
33
  ```python
34
  from transformers import AutoTokenizer, AutoModel
35
+ import torch
36
 
37
  def test(model_path):
38
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
 
43
  tokenizer = AutoTokenizer.from_pretrained(model_path)
44
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
45
 
46
+ with torch.no_grad():
47
+ for example in dataset:
48
+ inputs = tokenizer(example["chosen"], return_tensors="pt")
49
+ chosen_reward = model(**inputs).item()
50
+ inputs = tokenizer(example["rejected"], return_tensors="pt")
51
+ rejected_reward = model(**inputs).item()
52
+ print(chosen_reward - rejected_reward)
53
 
54
  test("openbmb/Eurus-RM-7b")
55
  # Output: 47.4404296875