Update README.md
Browse files
README.md
CHANGED
@@ -30,8 +30,8 @@ We introduce [SeaLLM-7B-v2](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2), the st
|
|
30 |
- Technical report: [Arxiv: SeaLLMs - Large Language Models for Southeast Asia](https://arxiv.org/pdf/2312.00738.pdf).
|
31 |
- Model weights:
|
32 |
- [SeaLLM-7B-v2](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2).
|
33 |
-
- [SeaLLM-7B-v2-gguf](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2-gguf).
|
34 |
-
- [SeaLLM-7B-v2-GGUF (
|
35 |
|
36 |
|
37 |
<blockquote style="color:red">
|
@@ -155,7 +155,9 @@ You are a helpful assistant.</s><|im_start|>user
|
|
155 |
Hello world</s><|im_start|>assistant
|
156 |
Hi there, how can I help?</s>"""
|
157 |
|
158 |
-
# NOTE previous commit has \n between </s> and <|im_start|>, that was incorrect!
|
|
|
|
|
159 |
|
160 |
# ! ENSURE 1 and only 1 bos `<s>` at the beginning of sequence
|
161 |
print(tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)))
|
@@ -171,6 +173,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
171 |
|
172 |
device = "cuda" # the device to load the model onto
|
173 |
|
|
|
174 |
model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2", torch_dtype=torch.bfloat16, device_map=device)
|
175 |
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2")
|
176 |
|
@@ -201,6 +204,8 @@ from vllm import LLM, SamplingParams
|
|
201 |
TURN_TEMPLATE = "<|im_start|>{role}\n{content}</s>"
|
202 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
203 |
|
|
|
|
|
204 |
def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
|
205 |
# conversations: list of dict with key `role` and `content` (openai format)
|
206 |
if conversations[0]['role'] != 'system' and system_prompt is not None:
|
|
|
30 |
- Technical report: [Arxiv: SeaLLMs - Large Language Models for Southeast Asia](https://arxiv.org/pdf/2312.00738.pdf).
|
31 |
- Model weights:
|
32 |
- [SeaLLM-7B-v2](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2).
|
33 |
+
- [SeaLLM-7B-v2-gguf](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2-gguf). Run with LM-studio: [SeaLLM-7B-v2-q4_0](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2-gguf/blob/main/SeaLLM-7B-v2.q4_0.gguf) and SeaLLM-7B-v2-q8_0.
|
34 |
+
- [SeaLLM-7B-v2-GGUF (thanks Lonestriker)](https://huggingface.co/LoneStriker/SeaLLM-7B-v2-GGUF). NOTE: Lonestriker's GGUF uses old and incorrect chat format (see below).
|
35 |
|
36 |
|
37 |
<blockquote style="color:red">
|
|
|
155 |
Hello world</s><|im_start|>assistant
|
156 |
Hi there, how can I help?</s>"""
|
157 |
|
158 |
+
# NOTE: previous commit has \n between </s> and <|im_start|>, that was incorrect!
|
159 |
+
# <|im_start|> is not a special token.
|
160 |
+
# Transformers chat_template should be consistent with vLLM format below.
|
161 |
|
162 |
# ! ENSURE 1 and only 1 bos `<s>` at the beginning of sequence
|
163 |
print(tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)))
|
|
|
173 |
|
174 |
device = "cuda" # the device to load the model onto
|
175 |
|
176 |
+
# use bfloat16 to ensure the best performance.
|
177 |
model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2", torch_dtype=torch.bfloat16, device_map=device)
|
178 |
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2")
|
179 |
|
|
|
204 |
TURN_TEMPLATE = "<|im_start|>{role}\n{content}</s>"
|
205 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
206 |
|
207 |
+
# There is no \n between </s> and <|im_start|>.
|
208 |
+
|
209 |
def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
|
210 |
# conversations: list of dict with key `role` and `content` (openai format)
|
211 |
if conversations[0]['role'] != 'system' and system_prompt is not None:
|