BAAI
/

shunxing1234 commited on
Commit
d2d6eae
1 Parent(s): 1c958b3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +133 -1
README.md CHANGED
@@ -31,4 +31,136 @@ We will continue to release improved versions of Aquila model as open source. Fo
31
 
32
  <!-- </table> -->
33
 
34
- ## Quick Start AquilaChat-7B(Chat model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  <!-- </table> -->
33
 
34
+ ## Quick Start AquilaChat-7B(Chat model)
35
+
36
+ ### 1. Inference
37
+
38
+ ```python
39
+ from transformers import AutoTokenizer, AutoModelForCausalLM
40
+ import torch
41
+ from cyg_conversation import covert_prompt_to_input_ids_with_history
42
+
43
+ tokenizer = AutoTokenizer.from_pretrained("BAAI/AquilaChat-7B")
44
+ model = AutoModelForCausalLM.from_pretrained("BAAI/AquilaChat-7B")
45
+ model.eval()
46
+ model.to("cuda:0")
47
+ vocab = tokenizer.vocab
48
+ print(len(vocab))
49
+
50
+ text = "请给出10个要到北京旅游的理由。"
51
+
52
+ tokens = covert_prompt_to_input_ids_with_history(text, history=[], tokenizer=tokenizer, max_token=512)
53
+
54
+ tokens = torch.tensor(tokens)[None,].to("cuda:0")
55
+
56
+
57
+ with torch.no_grad():
58
+ out = model.generate(tokens, do_sample=True, max_length=512, eos_token_id=100007)[0]
59
+
60
+ out = tokenizer.decode(out.cpu().numpy().tolist())
61
+
62
+ print(out)
63
+ ```
64
+
65
+ usning [NBCE](https://github.com/bojone/NBCE/tree/main) Inference
66
+
67
+ ```python
68
+ import json
69
+ import torch
70
+ from transformers import AutoTokenizer
71
+ from transformers import AutoModelForCausalLM
72
+ from transformers import TopPLogitsWarper, LogitsProcessorList
73
+ import pdb
74
+
75
+ # 加载tokenizer
76
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
77
+ tokenizer.padding_side = 'left'
78
+ tokenizer.pad_token = tokenizer.unk_token
79
+
80
+ # 加载Aquila模型
81
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
82
+ device = torch.device('cuda')
83
+ model.to(device)
84
+ # 加载示例Context
85
+ from cyg_conversation import default_conversation
86
+
87
+ conv = default_conversation.copy()
88
+ contexts = json.load(open('code_text_2.json'))
89
+
90
+ question = "请解释这段程序的功能:"
91
+ batch = []
92
+ conv.append_message(conv.roles[0], question)
93
+ conv.append_message(conv.roles[1], None)
94
+ batch.append(conv.get_prompt())
95
+ # 拼接context和question
96
+ for ci,context in enumerate(contexts):
97
+ conv1 = default_conversation.copy()
98
+ conv1.append_message(conv.roles[0], context+question)
99
+ conv1.append_message(conv.roles[1], None)
100
+ batch.append(conv1.get_prompt())
101
+ print('Context长度分布:', [len(text) for text in batch])
102
+ print('Context总长度:', sum([len(text) for text in batch]))
103
+
104
+ # Top-P截断
105
+ processors = LogitsProcessorList()
106
+ processors.append(TopPLogitsWarper(0.95))
107
+
108
+ # Copied from https://github.com/bojone/NBCE/blob/main/test.py#L51-L106
109
+ @torch.inference_mode()
110
+ def generate(max_tokens):
111
+ """Naive Bayes-based Context Extension 演示代码
112
+ """
113
+ inputs = tokenizer(batch, padding='longest', return_tensors='pt').to(device)
114
+ input_ids = inputs.input_ids
115
+ attention_mask = inputs.attention_mask
116
+
117
+ print('input_ids', input_ids.shape)
118
+ past_key_values = None
119
+ n = input_ids.shape[0]
120
+
121
+ for i in range(max_tokens):
122
+ # 模型输出
123
+ outputs = model(input_ids=input_ids,
124
+ attention_mask=attention_mask,
125
+ return_dict=True,
126
+ use_cache=True,
127
+ past_key_values=past_key_values
128
+ )
129
+ past_key_values = outputs.past_key_values
130
+
131
+ # ===== 核心代码开始 =====
132
+ beta, eta = 0.25, 0.1
133
+ logits = outputs.logits[:, -1]
134
+ logits = logits - logits.logsumexp(dim=-1, keepdims=True)
135
+ logits = processors(input_ids, logits)
136
+ entropy = -(logits.exp() * logits.clip(-100, 0)).sum(dim=-1)
137
+ if i > 0:
138
+ entropy[k] -= eta
139
+ k = entropy[1:].argmin() + 1
140
+ logits_max = logits[k]
141
+ logits_uncond = logits[0]
142
+ logits_merged = (1 + beta) * logits_max - beta * logits_uncond
143
+ logits = torch.where(logits_uncond > -100, logits_merged, logits_max)
144
+ # ===== 核心代码结束 =====
145
+
146
+ # 构建分布,采样
147
+ # tau = 1是标准的随机采样,tau->0则是贪心搜索
148
+ # 简单起见,这里没有实现topk、topp截断
149
+ tau = 0.01
150
+ probas = torch.nn.functional.softmax(logits[None] / tau , dim=-1)
151
+ next_tokens = torch.multinomial(probas, num_samples=1).squeeze(1)
152
+ if next_tokens[0] == tokenizer.eos_token_id:
153
+ break
154
+
155
+ ret = tokenizer.batch_decode(next_tokens)
156
+ print(ret[0], flush=True, end='')
157
+
158
+ # prepare for next iteration
159
+ input_ids = next_tokens.unsqueeze(-1).tile(n, 1)
160
+ attention_mask = torch.cat([attention_mask, torch.ones(n, 1, dtype=torch.long, device=device)], dim=-1)
161
+
162
+
163
+ if __name__ == '__main__':
164
+ generate(1000)
165
+
166
+ ```