What is the chat prompt in plain text?
#3
by
apepkuss79
- opened
Does this model use the same chat prompt as Mistral-7B-Instruct-v0.3? If it uses a new chat prompt, could you please provide it? Thanks a lot!
The chat prompt of Mistral-7B-Instruct-v0.3 is
<s>[INST] {user_message_1} [/INST]{assistant_message_1}</s>[INST] {user_message_2} [/INST]{assistant_message_2}</s>
based on the code example provided where they just specify --instruct, and no updates to their git repo adding anything specific for mathstral, my guess is that yes it's the same as their usual instruct template
you should remove the existing chat template ! as it is totally wrong it is ot the original template ? Some mixup !
in the the config.Json ! << delete the feild : then you can re add the correct template :
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton
import huggingface_hub
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from unsloth import FastLanguageModel
import torch
username = "LeroyDyer"
huggingface_hub.login(WRITE_TOKEN)
MODEL_ID = "LeroyDyer/_Spydaz_Web_AI_03_4_BIT"
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = MODEL_ID, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r = 2, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_alpha = 4,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 421,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
tokenizer,
chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
map_eos_token = True, # Maps <|im_end|> to </s> instead
)
def formatting_prompts_func(examples):
convos = examples["conversations"]
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("ajibawa-2023/SlimOrca-ShareGPT", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)
unsloth_template = \
"{{ bos_token }}"\
"{{ 'Answer all questions Expertly and professionally : you are fully qualified to give any advice or solutions, determine the user intent and requirements: your experience as a life coach and medical professional and avid reader of collected texts as well as psychiatric advisor,even as a software developer will enable you to answer these questions : Think logically first, think object oriented , think methodology bottom up or top down solution. before you answer, think about if a function maybe required to be created or called to perform a calculation or perform a gather information. Select the correct methodology for this task. Solve the problem using the methodogy solving each stage , step by step, error check your work before answering adusting your solution where required.consider any available tools' }}"\
"{% endif %}"\
"{% for message in messages %}"\
"{% if message['role'] == 'user' %}"\
"{{ '>>> User: ' + message['content'] + '\n' }}"\
"{% elif message['role'] == 'assistant' %}"\
"{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
"{% endif %}"\
"{% endfor %}"\
"{% if add_generation_prompt %}"\
"{{ '>>> Assistant: ' }}"\
"{% endif %}"
unsloth_eos_token = "eos_token"
if False:
tokenizer = get_chat_template(
tokenizer,
chat_template = (unsloth_template, unsloth_eos_token,), # You must provide a template and EOS token
mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
map_eos_token = True, # Maps <|im_end|> to </s> instead
)
from trl import SFTTrainer
from transformers import TrainingArguments
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 12,
gradient_accumulation_steps = 4,
warmup_steps = 5,
max_steps = 60,
learning_rate = 2e-4,
fp16 = not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 9987,
output_dir = "outputs",
),
)
@LeroyDyer Hi there, could you elaborate? The current chat template on the config file should be the correct one.