sail
/

Sailor-1.8B-Chat

@@ -51,7 +51,7 @@ The pre-training corpus heavily leverages the publicly available corpus, includi
 [SlimPajama](https://huggingface.co/datasets/cerebras/SlimPajama-627B),
 [SkyPile](https://huggingface.co/datasets/Skywork/SkyPile-150B),
 [CC100](https://huggingface.co/datasets/cc100) and [MADLAD-400](https://huggingface.co/datasets/allenai/MADLAD-400).
-The instruction tuning corpus are all public available including
 [aya_collection](https://huggingface.co/datasets/CohereForAI/aya_collection),
 [aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset),
 [OpenOrca](https://huggingface.co/datasets/Open-Orca/OpenOrca).
@@ -70,25 +70,42 @@ Here provides a code snippet to show you how to load the tokenizer and model and
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
-device = "cuda" # the device to load the model
-model = AutoModelForCausalLM.from_pretrained("sail/Sailor-7B", device_map="auto")
-tokenizer = AutoTokenizer.from_pretrained("sail/Sailor-7B")
-input_message = "Model bahasa adalah model probabilistik"
-### The given Indonesian input translates to 'A language model is a probabilistic model of.'
-model_inputs = tokenizer([input_message], return_tensors="pt").to(device)
 generated_ids = model.generate(
-    model_inputs.input_ids,
-    max_new_tokens=64
 )
 generated_ids = [
     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 ]
 response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 print(response)
 ```

 [SlimPajama](https://huggingface.co/datasets/cerebras/SlimPajama-627B),
 [SkyPile](https://huggingface.co/datasets/Skywork/SkyPile-150B),
 [CC100](https://huggingface.co/datasets/cc100) and [MADLAD-400](https://huggingface.co/datasets/allenai/MADLAD-400).
+The instruction tuning corpus are all publicly available including
 [aya_collection](https://huggingface.co/datasets/CohereForAI/aya_collection),
 [aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset),
 [OpenOrca](https://huggingface.co/datasets/Open-Orca/OpenOrca).
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda"
+model = AutoModelForCausalLM.from_pretrained(
+    'sail/Sailor-1.8B-Chat',
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained('sail/Sailor-1.8B-Chat')
+system_prompt= 'You are a helpful assistant'
+prompt = "Beri saya pengenalan singkat tentang model bahasa besar."
+# prompt = "Hãy cho tôi một giới thiệu ngắn gọn về mô hình ngôn ngữ lớn."
+# prompt = "ให้ฉันแนะนำสั้น ๆ เกี่ยวกับโมเดลภาษาขนาดใหญ่"
+messages = [
+    {"role": "system", "content": system_prompt},
+    {"role": "question", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+input_ids = model_inputs.input_ids.to(device)
 generated_ids = model.generate(
+    input_ids,
+    max_new_tokens=512,
 )
 generated_ids = [
     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 ]
 response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 print(response)
 ```