liangyuxin commited on
Commit
be55357
1 Parent(s): 026a0d8

add config and model.py

Browse files
Files changed (2) hide show
  1. config.json +26 -0
  2. modeling_llama_rm.py +26 -0
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "IDEA-CCNL/Ziya-LLaMA-7B-Reward",
3
+ "architectures": [
4
+ "LlamaRewardModel"
5
+ ],
6
+ "auto_map": {
7
+ "AutoModelForSequenceClassification": "rm_modeling_hf.LlamaRewardModel"
8
+ },
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 11008,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "pad_token_id": 0,
20
+ "rms_norm_eps": 1e-06,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.28.0.dev0",
24
+ "use_cache": true,
25
+ "vocab_size": 32001
26
+ }
modeling_llama_rm.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PreTrainedModel,LlamaConfig,LlamaModel
2
+ import torch.nn as nn
3
+ import torch
4
+ from typing import Optional
5
+
6
+ class LlamaRewardModel(PreTrainedModel):
7
+ config_class =LlamaConfig
8
+
9
+ def __init__(self, config):
10
+ super().__init__(config)
11
+ self.model = LlamaModel(config)
12
+ self.value_head = nn.Linear(config.hidden_size, 1)
13
+
14
+ def forward(self,
15
+ input_ids: torch.LongTensor,
16
+ attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
17
+ outputs = self.model(input_ids,attention_mask=attention_mask, output_hidden_states=True)
18
+ last_hidden_states = outputs.hidden_states[-1]
19
+ if attention_mask is None:
20
+ last_hidden_states = last_hidden_states[:, -1]
21
+ else:
22
+ last_index = attention_mask.cumsum(dim=1).argmax(dim=1)
23
+ last_hidden_states = last_hidden_states.gather(1, last_index.view(-1, 1, 1).expand(-1, 1, last_hidden_states.size(-1))).squeeze(1)
24
+ values = self.value_head(last_hidden_states).squeeze(-1)# (bs,)
25
+
26
+ return values