Edit model card

Description

This model is a 10.2 billion parameter model that combines two sets of 24 layers each from CALM2-7B-chat using slerp-merge.

Chat Template

USER: {user_message1}
ASSISTANT: {assistant_message1}<|endoftext|>
USER: {user_message2}
ASSISTANT: {assistant_message2}<|endoftext|>
USER: {user_message3}
ASSISTANT: {assistant_message3}<|endoftext|>

Tutorial

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("sudy-super/baku-10b-chat-v2")
model = AutoModelForCausalLM.from_pretrained("sudy-super/baku-10b-chat-v2", device_map="auto", torch_dtype=torch.bfloat16)

raw_prompt = "ไป•ไบ‹ใฎ็†ฑๆ„ใ‚’ๅ–ใ‚Šๆˆปใ™ใŸใ‚ใฎใ‚ขใ‚คใƒ‡ใ‚ขใ‚’5ใคๆŒ™ใ’ใฆใใ ใ•ใ„ใ€‚"
prompt = f"USER:{raw_prompt}\nASSISTANT:"

token_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
with torch.no_grad():
    output_ids = model.generate(
        token_ids.to(model.device),
        max_new_tokens=100,
        do_sample=True,
        temperature=0.8,
        pad_token_id=tokenizer.pad_token_id,
        bos_token_id=tokenizer.bos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

result = tokenizer.decode(output_ids.tolist()[0])
print(result)
Downloads last month
20
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.