shenzhi-wang
commited on
Commit
•
a1eb7ba
1
Parent(s):
a276f55
Update README.md
Browse files
README.md
CHANGED
@@ -138,32 +138,35 @@ deepspeed --num_gpus 8 src/train_bash.py \
|
|
138 |
# 2. Usage
|
139 |
|
140 |
```python
|
141 |
-
from
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
model_id, torch_dtype="auto", device_map="auto"
|
148 |
)
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
messages = [
|
|
|
|
|
|
|
|
|
151 |
{"role": "user", "content": "写一首诗吧"},
|
152 |
]
|
153 |
|
154 |
-
|
155 |
-
messages, add_generation_prompt=True, return_tensors="pt"
|
156 |
-
).to(model.device)
|
157 |
-
|
158 |
-
outputs = model.generate(
|
159 |
-
input_ids,
|
160 |
-
max_new_tokens=8192,
|
161 |
-
do_sample=True,
|
162 |
-
temperature=0.6,
|
163 |
-
top_p=0.9,
|
164 |
-
)
|
165 |
-
response = outputs[0][input_ids.shape[-1]:]
|
166 |
-
print(tokenizer.decode(response, skip_special_tokens=True))
|
167 |
```
|
168 |
|
169 |
# 3. Examples
|
|
|
138 |
# 2. Usage
|
139 |
|
140 |
```python
|
141 |
+
from llama_cpp import Llama
|
142 |
|
143 |
+
model = Llama(
|
144 |
+
"/Your/Path/To/GGUF/File",
|
145 |
+
verbose=False,
|
146 |
+
n_gpu_layers=-1,
|
|
|
147 |
)
|
148 |
|
149 |
+
system_prompt = "You are a helpful assistant."
|
150 |
+
|
151 |
+
def generate_reponse(_model, _messages, _max_tokens=8192):
|
152 |
+
_output = _model.create_chat_completion(
|
153 |
+
_messages,
|
154 |
+
stop=["<|eot_id|>", "<|end_of_text|>"],
|
155 |
+
max_tokens=_max_tokens,
|
156 |
+
)["choices"][0]["message"]["content"]
|
157 |
+
return _output
|
158 |
+
|
159 |
+
# The following are some examples
|
160 |
+
|
161 |
messages = [
|
162 |
+
{
|
163 |
+
"role": "system",
|
164 |
+
"content": system_prompt,
|
165 |
+
},
|
166 |
{"role": "user", "content": "写一首诗吧"},
|
167 |
]
|
168 |
|
169 |
+
print(generate_reponse(model, messages))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
```
|
171 |
|
172 |
# 3. Examples
|