XuehaiPan commited on
Commit
375cd6a
1 Parent(s): 4d1016a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -32
README.md CHANGED
@@ -36,16 +36,17 @@ It can play a role in the safe RLHF algorithm, helping the Beaver model become m
36
  - **Reward Model:** <https://huggingface.co/PKU-Alignment/beaver-7b-v1.0-reward>
37
  - **Cost Model:** <https://huggingface.co/PKU-Alignment/beaver-7b-v1.0-cost>
38
  - **Dataset Paper:** <https://arxiv.org/abs/2307.04657>
39
- - **Paper:** *Coming soon...*
40
 
41
  ## How to Use the Reward Model
42
 
43
  ```python
 
44
  from transformers import AutoTokenizer
45
  from safe_rlhf.models import AutoModelForScore
46
 
47
- model = AutoModelForScore.from_pretrained('PKU-Alignment/beaver-7b-v1.0-reward', device_map='auto')
48
- tokenizer = AutoTokenizer.from_pretrained('PKU-Alignment/beaver-7b-v1.0-reward', use_fast=False)
49
 
50
  input = 'BEGINNING OF CONVERSATION: USER: hello ASSISTANT:Hello! How can I help you today?'
51
 
@@ -54,34 +55,45 @@ output = model(**input_ids)
54
  print(output)
55
 
56
  # ScoreModelOutput(
57
- # scores=tensor([[[-19.6476],
58
- # [-20.2238],
59
- # [-21.4228],
60
- # [-19.2506],
61
- # [-20.2728],
62
- # [-23.8799],
63
- # [-22.6898],
64
- # [-21.5825],
65
- # [-21.0855],
66
- # [-20.2068],
67
- # [-23.8296],
68
- # [-21.4940],
69
- # [-21.9484],
70
- # [-13.1220],
71
- # [ -6.4499],
72
- # [ -8.1982],
73
- # [ -7.2492],
74
- # [ -9.3377],
75
- # [-13.5010],
76
- # [-10.4932],
77
- # [ -9.7837],
78
- # [ -6.4540],
79
- # [ -6.0084],
80
- # [ -5.8093],
81
- # [ -6.6134],
82
- # [ -5.8995],
83
- # [ -9.1505],
84
- # [-11.3254]]], grad_fn=<ToCopyBackward0>),
85
- # end_scores=tensor([[-11.3254]], grad_fn=<ToCopyBackward0>)
 
 
 
 
 
 
 
 
 
 
 
86
  # )
87
  ```
 
36
  - **Reward Model:** <https://huggingface.co/PKU-Alignment/beaver-7b-v1.0-reward>
37
  - **Cost Model:** <https://huggingface.co/PKU-Alignment/beaver-7b-v1.0-cost>
38
  - **Dataset Paper:** <https://arxiv.org/abs/2307.04657>
39
+ - **Paper:** <https://arxiv.org/abs/2310.12773>
40
 
41
  ## How to Use the Reward Model
42
 
43
  ```python
44
+ import torch
45
  from transformers import AutoTokenizer
46
  from safe_rlhf.models import AutoModelForScore
47
 
48
+ model = AutoModelForScore.from_pretrained('PKU-Alignment/beaver-7b-v1.0-reward', torch_dtype=torch.bfloat16, device_map='auto')
49
+ tokenizer = AutoTokenizer.from_pretrained('PKU-Alignment/beaver-7b-v1.0-reward')
50
 
51
  input = 'BEGINNING OF CONVERSATION: USER: hello ASSISTANT:Hello! How can I help you today?'
52
 
 
55
  print(output)
56
 
57
  # ScoreModelOutput(
58
+ # scores=tensor([[[-19.7500],
59
+ # [-19.3750],
60
+ # [-20.1250],
61
+ # [-18.0000],
62
+ # [-20.0000],
63
+ # [-23.8750],
64
+ # [-23.5000],
65
+ # [-22.0000],
66
+ # [-21.0000],
67
+ # [-20.1250],
68
+ # [-23.7500],
69
+ # [-21.6250],
70
+ # [-21.7500],
71
+ # [-12.9375],
72
+ # [ -6.4375],
73
+ # [ -8.1250],
74
+ # [ -7.3438],
75
+ # [ -9.1875],
76
+ # [-13.6250],
77
+ # [-10.5625],
78
+ # [ -9.9375],
79
+ # [ -6.4375],
80
+ # [ -6.0938],
81
+ # [ -5.8438],
82
+ # [ -6.6562],
83
+ # [ -5.9688],
84
+ # [ -9.1875],
85
+ # [-11.4375]]], grad_fn=<ToCopyBackward0>),
86
+ # end_scores=tensor([[-11.4375]], grad_fn=<ToCopyBackward0>),
87
+ # last_hidden_state=tensor([[[ 0.7461, -0.6055, -0.4980, ..., 0.1670, 0.7812, -0.3242],
88
+ # [ 0.7383, -0.5391, -0.1836, ..., -0.1396, 0.5273, -0.2256],
89
+ # [ 0.6836, -0.7031, -0.3730, ..., 0.2100, 0.5000, -0.6328],
90
+ # ...,
91
+ # [-1.7969, 1.0234, 1.0234, ..., -0.8047, 0.2500, -0.8398],
92
+ # [ 2.0469, -1.3203, 0.8984, ..., -0.7734, -1.4141, -1.6797],
93
+ # [ 4.3438, -0.6953, 0.9648, ..., -0.1787, 0.6680, -3.0000]]],
94
+ # dtype=torch.bfloat16, grad_fn=<ToCopyBackward0>),
95
+ # end_last_hidden_state=tensor([[ 4.3438, -0.6953, 0.9648, ..., -0.1787, 0.6680, -3.0000]],
96
+ # dtype=torch.bfloat16, grad_fn=<ToCopyBackward0>),
97
+ # end_index=tensor([27])
98
  # )
99
  ```