Text Classification
Transformers
Safetensors
mistral
feature-extraction
reward_model
custom_code
text-generation-inference
lievan commited on
Commit
4110700
1 Parent(s): c857edd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -41
README.md CHANGED
@@ -28,47 +28,8 @@ Eurus-RM-7B is trained on a mixture of [UltraInteract](https://huggingface.co/da
28
 
29
  ## Usage
30
  ```python
31
- from transformers import PreTrainedModel, MistralConfig, MistralModel
32
- import torch.nn as nn
33
- import torch
34
- from typing import Optional, List
35
 
36
- class EurusRewardModel(PreTrainedModel):
37
- config_class = MistralConfig
38
- def __init__(self, config):
39
- super().__init__(config)
40
- self.model = MistralModel(config)
41
- self.regression_head = nn.Linear(self.config.hidden_size, 1, bias=False)
42
-
43
- def forward( # args are the same as LlamaForCausalLM
44
- self,
45
- input_ids: torch.LongTensor = None,
46
- attention_mask: Optional[torch.Tensor] = None,
47
- position_ids: Optional[torch.LongTensor] = None,
48
- past_key_values: Optional[List[torch.FloatTensor]] = None,
49
- inputs_embeds: Optional[torch.FloatTensor] = None,
50
- labels: Optional[torch.LongTensor] = None,
51
- use_cache: Optional[bool] = None,
52
- output_attentions: Optional[bool] = None,
53
- output_hidden_states: Optional[bool] = None,
54
- return_dict: Optional[bool] = None,
55
- ):
56
-
57
- transformer_outputs = self.model(
58
- input_ids,
59
- attention_mask=attention_mask,
60
- position_ids=position_ids,
61
- past_key_values=past_key_values,
62
- inputs_embeds=inputs_embeds,
63
- )
64
-
65
- hidden_states = transformer_outputs[0]
66
- rewards = self.regression_head(hidden_states).squeeze(-1)
67
-
68
- ends = attention_mask.cumsum(dim=1).argmax(dim=1).view(-1,1)
69
- rewards = torch.gather(rewards, 1, ends)
70
-
71
- return rewards
72
 
73
  def test(model_path):
74
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
@@ -78,7 +39,7 @@ def test(model_path):
78
 
79
 
80
  tokenizer = AutoTokenizer.from_pretrained(model_path)
81
- model = EurusRewardModel.from_pretrained(model_path)
82
 
83
  for example in dataset:
84
  inputs = tokenizer(example["chosen"], return_tensors="pt")
 
28
 
29
  ## Usage
30
  ```python
31
+ from transformers import AutoTokenizer, AutoModel
 
 
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def test(model_path):
35
  dataset = [ # cases in webgpt; we use the same template as Mistral-Instruct-v0.2
 
39
 
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_path)
42
+ model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
43
 
44
  for example in dataset:
45
  inputs = tokenizer(example["chosen"], return_tensors="pt")