Edit model card

Usage for Transformers


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2", 
    device_map="cuda:0")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'ν• λΆ€ 결제 λŒ€ν•΄μ„œ μ„€λͺ…ν•΄μ€˜'

pipe = pipeline("text-generation", 
                model=model, 
                tokenizer=tokenizer, 
                max_new_tokens=1024)

messages = [
    {"role": "user", "content": instruction},
]

prompt = pipe.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.8,
    top_k=10,
    top_p=0.9,
    add_special_tokens=True,
    eos_token_id = [ 
        pipe.tokenizer.eos_token_id,
        pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
)

print(outputs[0]['generated_text'][len(prompt):])
print(outputs[0]['generated_text'][len(prompt):])

Result

ν• λΆ€ μš”κΈˆμ€ 물건을 μ‚΄ λ•Œ, κ·Έ 값을 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ μ§€λΆˆν•˜λŠ” λ°©μ‹μž…λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, 50λ§Œμ›μ§œλ¦¬ μ œν’ˆμ„ 10κ°œμ›” ν• λΆ€λ‘œ κ΅¬λ§€ν•œλ‹€λ©΄, 각 λ‹¬λ§ˆλ‹€ 5λ§Œμ›μ”© 10κ°œμ›”μ΄ λ™μ•ˆ μ§€λΆˆν•˜κ²Œ λ©λ‹ˆλ‹€. μ΄λ•Œ, ν• λΆ€ μš”κΈˆμ€ 일정 κΈ°κ°„ λ™μ•ˆ 이자 없이 물건을 μ‚¬μš©ν•  수 μžˆλŠ” 이점이 μžˆμ§€λ§Œ, λ™μ‹œμ— μ—°μ²΄λ£Œκ°€ 뢀과될 수 있으며, 채무가 λ°œμƒν•˜κ²Œ λ©λ‹ˆλ‹€. λ”°λΌμ„œ, ν• λΆ€λ₯Ό μ‚¬μš©ν•  λ•ŒλŠ” μžμ‹ μ˜ μž¬μ • μƒνƒœμ™€ ꡬ맀할 물건을 잘 κ³ λ €ν•΄μ•Ό ν•©λ‹ˆλ‹€.

Usage for VLLM

from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

llm = LLM(model=BASE_MODEL)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'μΉ΄λ“œ ν• λΆ€ κ²°μ œμ— λŒ€ν•΄μ„œ μ•Œλ €μ€˜'

messages = [
    {
      "role": "system",
      "content": "당신은 ν›Œλ₯­ν•œ AI λΉ„μ„œμž…λ‹ˆλ‹€. You are a great AI assistant."
    },
    {
      "role": "user",
      "content": instruction
    }, 
]


prompt_message = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
)

eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.6, top_p=0.8,max_tokens=4096))

for output in outputs:
    propt = output.prompt
    generated_text = output.outputs[0].text
    print(generated_text)

Result

μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” κ²°μ œν•  κΈˆμ•‘μ„ 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ κ°šλŠ” λ°©μ‹μœΌλ‘œ, μΉ΄λ“œμ‚¬μ— μ˜ν•΄ λŒ€μΆœλœ κΈˆμ•‘μ„ κ°šλŠ” κ²ƒμž…λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ •ν•œ κΈ°κ°„ λ™μ•ˆ μƒν™˜ν•  수 μžˆλŠ” κΈˆμ•‘μ„ μ„ νƒν•˜μ—¬ κ²°μ œν•  수 있으며, 이 κ³Όμ •μ—μ„œ 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•©λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ‹œλΆˆ κ²°μ œλ³΄λ‹€ μœ λ¦¬ν•  수 μžˆμ§€λ§Œ, 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•˜κΈ° λ•Œλ¬Έμ— λΉ„μš©μ΄ μ¦κ°€ν•©λ‹ˆλ‹€.

Downloads last month
734
Safetensors
Model size
8.17B params
Tensor type
BF16
Β·
Inference API
Input a message to start chatting with sh2orc/Llama-3-Korean-8B.
Model is too large to load in Inference API (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.