Getting an error when trying to use a fine-tuned model from JASMINE-350M
Hello,
I am trying to fine-tune JASMINE-350M for a chatbot. The fine tuning code works fine. However, when it comes to inference, I am getting the following error:
RuntimeError Traceback (most recent call last)
in <cell line: 43>()
46 break
47
---> 48 bot_response = chatbot_response(user_input)
49 print(f"JASMINE: {bot_response}")
3 frames
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in _sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3247 probs = nn.functional.softmax(next_token_scores, dim=-1)
3248 # TODO (joao): this OP throws "skipping cudagraphs due to ['incompatible ops']", find solution
-> 3249 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
3250 else:
3251 next_tokens = torch.argmax(next_token_scores, dim=-1)
RuntimeError: CUDA error: device-side assert triggered
Compile with TORCH_USE_CUDA_DSA
to enable device-side assertions.
Please help me fix this issue or share with me code that works
Here is the complete code for your reference:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import Dataset
Load model and tokenizer
model_name = "UBC-NLP/Jasmine-350M"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16,attn_implementation="eager").cuda()
Add special tokens for the chatbot conversation roles
special_tokens_dict = {"additional_special_tokens": ["<|USER|>", "<|BOT|>"]}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))
Convert the list to a Hugging Face Dataset
train_dataset_raw = Dataset.from_dict({"dialogue": train_dataset_raw})
Add the eos token if not already present in the tokenizer
eos_token = tokenizer.eos_token or "<|endoftext|>"
Preprocess function for structured data
def preprocess_function(examples):
conversations = []
# Iterate over each dialogue in the dataset
for dialogue in examples["dialogue"]:
# Initialize an empty conversation string
conversation = ""
# Add each message to the conversation with alternating tags
for i, message in enumerate(dialogue):
if i % 2 == 0: # Odd index in the loop (User message)
conversation += f"<|USER|>\n{message}{eos_token}\n"
else: # Even index in the loop (Bot message)
conversation += f"<|BOT|>\n{message}{eos_token}\n"
# Append the fully formatted conversation to the list
conversations.append(conversation)
inputs = tokenizer(conversations, truncation=True, padding="max_length", max_length=128)
inputs["labels"] = inputs["input_ids"].copy() # Set labels equal to input_ids
return inputs
Ensure the tokenizer has a pad token; if not, set it to the eos_token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
train_dataset = train_dataset_raw.map(preprocess_function, batched=True)
Training arguments
training_args = TrainingArguments(
output_dir="/home/output/",
overwrite_output_dir=True,
num_train_epochs=1,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
save_steps=500,
save_total_limit=2,
logging_dir="/home/logs/",
logging_steps=50,
report_to="none", # Disable W&B
)
Trainer setup
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
)
#trainer.train(resume_from_checkpoint=True)
trainer.train()
Save the fine-tuned model and tokenizer
model.save_pretrained("/home/output/")
tokenizer.save_pretrained("/home/output/")
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
Load the fine-tuned model and tokenizer
model_path = "/home/output/"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).cuda()
model.eval()
Set up conversation history
conversation_history = []
def chatbot_response(user_input):
# Append the user input to conversation history
conversation_history.append(f"<|USER|> {user_input}")
# Prepare the input for the model
conversation_text = " ".join(conversation_history) + " <|BOT|>"
inputs = tokenizer(conversation_text, return_tensors="pt").to("cuda")
# Error happens at this line
outputs = model.generate(
inputs["input_ids"],
max_new_tokens=50,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.7,
)
# Decode and print the response
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
bot_response = response_text.split("<|BOT|>")[-1].strip()
# Append bot response to conversation history
conversation_history.append(f"<|BOT|> {bot_response}")
return bot_response
Interactive loop
print("JASMINE Chatbot is ready! Type 'exit' to end the conversation.")
while True:
user_input = input("User: ")
if user_input.lower() in ["exit", "مع السلامة"]:
break
bot_response = chatbot_response(user_input)
print(f"JASMINE: {bot_response}")