Usage for Transformers


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
    torch_dtype=torch.bfloat16,
    attn_implementation="flash_attention_2", 
    device_map="cuda:0")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'ν• λΆ€ 결제 λŒ€ν•΄μ„œ μ„€λͺ…ν•΄μ€˜'

pipe = pipeline("text-generation", 
                model=model, 
                tokenizer=tokenizer, 
                max_new_tokens=1024)

messages = [
    {"role": "user", "content": instruction},
]

prompt = pipe.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.8,
    top_k=10,
    top_p=0.9,
    add_special_tokens=True,
    eos_token_id = [ 
        pipe.tokenizer.eos_token_id,
        pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
)

print(outputs[0]['generated_text'][len(prompt):])
print(outputs[0]['generated_text'][len(prompt):])

Result

ν• λΆ€ μš”κΈˆμ€ 물건을 μ‚΄ λ•Œ, κ·Έ 값을 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ μ§€λΆˆν•˜λŠ” λ°©μ‹μž…λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, 50λ§Œμ›μ§œλ¦¬ μ œν’ˆμ„ 10κ°œμ›” ν• λΆ€λ‘œ κ΅¬λ§€ν•œλ‹€λ©΄, 각 λ‹¬λ§ˆλ‹€ 5λ§Œμ›μ”© 10κ°œμ›”μ΄ λ™μ•ˆ μ§€λΆˆν•˜κ²Œ λ©λ‹ˆλ‹€. μ΄λ•Œ, ν• λΆ€ μš”κΈˆμ€ 일정 κΈ°κ°„ λ™μ•ˆ 이자 없이 물건을 μ‚¬μš©ν•  수 μžˆλŠ” 이점이 μžˆμ§€λ§Œ, λ™μ‹œμ— μ—°μ²΄λ£Œκ°€ 뢀과될 수 있으며, 채무가 λ°œμƒν•˜κ²Œ λ©λ‹ˆλ‹€. λ”°λΌμ„œ, ν• λΆ€λ₯Ό μ‚¬μš©ν•  λ•ŒλŠ” μžμ‹ μ˜ μž¬μ • μƒνƒœμ™€ ꡬ맀할 물건을 잘 κ³ λ €ν•΄μ•Ό ν•©λ‹ˆλ‹€.

Usage for VLLM

from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline

BASE_MODEL = "sh2orc/Llama-3-Korean-8B"

llm = LLM(model=BASE_MODEL)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = 'μΉ΄λ“œ ν• λΆ€ κ²°μ œμ— λŒ€ν•΄μ„œ μ•Œλ €μ€˜'

messages = [
    {
      "role": "system",
      "content": "당신은 ν›Œλ₯­ν•œ AI λΉ„μ„œμž…λ‹ˆλ‹€. You are a great AI assistant."
    },
    {
      "role": "user",
      "content": instruction
    }, 
]


prompt_message = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
)

eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.6, top_p=0.8,max_tokens=4096))

for output in outputs:
    propt = output.prompt
    generated_text = output.outputs[0].text
    print(generated_text)

Result

μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” κ²°μ œν•  κΈˆμ•‘μ„ 일정 κΈ°κ°„ λ™μ•ˆ λ‚˜λˆ μ„œ κ°šλŠ” λ°©μ‹μœΌλ‘œ, μΉ΄λ“œμ‚¬μ— μ˜ν•΄ λŒ€μΆœλœ κΈˆμ•‘μ„ κ°šλŠ” κ²ƒμž…λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ •ν•œ κΈ°κ°„ λ™μ•ˆ μƒν™˜ν•  수 μžˆλŠ” κΈˆμ•‘μ„ μ„ νƒν•˜μ—¬ κ²°μ œν•  수 있으며, 이 κ³Όμ •μ—μ„œ 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•©λ‹ˆλ‹€. μΉ΄λ“œ ν• λΆ€ κ²°μ œλŠ” μΌμ‹œλΆˆ κ²°μ œλ³΄λ‹€ μœ λ¦¬ν•  수 μžˆμ§€λ§Œ, 이자λ₯Ό μ§€λΆˆν•΄μ•Ό ν•˜κΈ° λ•Œλ¬Έμ— λΉ„μš©μ΄ μ¦κ°€ν•©λ‹ˆλ‹€.

Downloads last month
10
Safetensors
Model size
8.17B params
Tensor type
BF16
Β·
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Model tree for ssoyeun/Llama-3-BCCard-Kor-8B-dp

Finetunes
1 model