SmolLM2-360M-Instruct-Reasoner
This an experimentally trained reasoning version of SmolLM2-360M-Instruct. It was trained with a custom grpo trainer that scales for models with <=500M params, supports both cpu and gpu (with vllm support). So far the final model tends to perform well on most reasoning problems with responses in desired format, although there is still room for improvements. Feel free to send a PR on the repo.
CODE: https://github.com/Jaykef/ai-algorithms/blob/main/smollm2_360M_135M_grpo_gsm8k.ipynb
Inference
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer
def get_user_prompt(prompt: str) -> str:
match = re.search(r"<\|im_start\|>user\s*(.*?)\s*<\|im_end\|>", prompt, re.DOTALL)
return match.group(1).strip() if match else "\n".join(
line.strip()[4:].strip() if line.strip().lower().startswith("user") else line
for line in prompt.splitlines() if not line.strip().lower().startswith("system")
).strip()
def get_assistant_response(text: str) -> str:
match = re.search(r"<\|im_start\|>assistant\s*(.*?)\s*<\|im_end\|>", text, re.DOTALL)
return match.group(1).strip() if match else "\n".join(
line for line in text.splitlines() if not line.strip().lower().startswith("assistant")
).strip()
model_name = "Jaward/smollm2_360m_grpo_gsm8k_reasoner"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
messages = [
{"role": "system", "content": "Please respond in this specific format ONLY:\n<thinking>\n input your reasoning behind your answer in between these reasoning tags.\n</thinking>\n<answer>\nyour answer in between these answer tags.\n</answer>\n"},
{"role": "user", "content": "If there are 12 cookies in a dozen and you have 5 dozen, how many cookies do you have?"}
]
input_text = tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True, use_cache=False)
decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
print("Question:\n", get_user_prompt(input_text))
print("\nResponse:\n", get_assistant_response(decoded))
# OUTPUT:
"""
Question:
If there are 12 cookies in a dozen and you have 5 dozen, how many cookies do you have?
Response:
<thinking>
12 cookies in a dozen is 12/12 = 1.
5 dozen is 5 * 12 = 60.
So 60 cookies in total.
</thinking>
<answer>
You have 60 cookies.
"""
- Downloads last month
- 0
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.