Edit model card

kobart-summary

How to use

from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration

# Load Model and Tokenizer
tokenizer = PreTrainedTokenizerFast.from_pretrained("EbanLee/kobart-summary-v2")
model = BartForConditionalGeneration.from_pretrained("EbanLee/kobart-summary-v2")

# Encoding
input_text = "์ผ๋ฐ˜์ ์œผ๋กœ ๋กœ์ปฌํ‘ธ๋“œ ๋†์‚ฐ๋ฌผ์€ ๋„๋งค์‹œ์žฅ ๊ฒฝ๋งค๊ฐ€๋ณด๋‹ค ๋†’๊ณ  ์†Œ๋งค๊ฐ€๋ณด๋‹ค ๋‚ฎ์€ ์ˆ˜์ค€์—์„œ ๊ฐ€๊ฒฉ์ด ๊ฒฐ์ •๋œ๋‹ค. ๋†๊ฐ€๋“ค์ด ์ž์œจ์ ์œผ๋กœ ๊ฐ€๊ฒฉ์„ ๊ฒฐ์ •ํ•˜๊ณ  ์žˆ์ง€๋งŒ, ์‹œ์žฅ๊ฐ€๊ฒฉ์„ ์ฐธ๊ณ ํ•˜๊ธฐ ๋•Œ๋ฌธ์— ๋Œ€์ฒด๋กœ ์ ์ •ํ•œ ๊ฐ€๊ฒฉ์ด ์„ค์ •๋œ๋‹ค. ๋ฌธ์ œ๋Š” ํƒœํ’, ํญ์šฐ, ํญ์—ผ ๋˜๋Š” ๊ณต๊ธ‰๊ณผ์ž‰์— ๋”ฐ๋ฅธ ์‹œ์žฅ๊ฐ€๊ฒฉ ๋“ฑ๋ฝ์ด ์‹ฌํ•ด์งˆ ๋•Œ์ด๋‹ค. ์™„์ฃผ๊ตฐ ๋กœ์ปฌํ‘ธ๋“œ ํ˜‘๋™์กฐํ•ฉ์€ ์œ„์™€ ๊ฐ™์ด ์™ธ๋ถ€์š”์ธ์— ์˜ํ•ด ๋†์‚ฐ๋ฌผ ๊ฐ€๊ฒฉ์— ๊ธ‰๋“ฑ๋ฝ์ด ๋ฐœ์ƒํ–ˆ์„ ๋•Œ์—๋„ ์—ฐ์ค‘ ์ผ์ • ์ˆ˜์ค€์˜ ๊ฐ€๊ฒฉ์œผ๋กœ ํŒ๋งค๋˜๋„๋ก ์œ ํ†ต ์•ˆ์ • ๊ธฐ๊ธˆ์„ ์šด์šฉํ•˜๊ณ  ์žˆ๋‹ค. ๊ทธ๋ฆฌ๊ณ  ๋†์‚ฐ๋ฌผ ์ถœํ•˜์ž์—๊ฒŒ ์‚ฌ์ „์— ๋™์˜๋ฅผ ๊ตฌํ•ด ์ผ๋ถ€ ๊ด€๋ฆฌ ํ’ˆ๋ชฉ์€ ๊ฐ€๊ฒฉ ์ƒํ•˜ํ•œ ๋ฐ ํŒ๋งค๋Ÿ‰์„ ์กฐ์ ˆํ•ด ๋‚ฉํ’ˆํ•˜๋„๋ก ๊ต์œกํ•˜๊ณ  ์žˆ๋‹ค."
input_ids = tokenizer.encode(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=1026)

# Generate Summary Text Ids
summary_text_ids = model.generate(
input_ids=input_ids,
bos_token_id=model.config.bos_token_id,
eos_token_id=model.config.eos_token_id,
length_penalty=1.5,
max_length=256,
min_length=12,
num_beams=6,
repetition_penalty=1.5,
)

# Decoding Text Ids
print(tokenizer.decode(summary_text_ids[0], skip_special_tokens=True))
Downloads last month
123
Safetensors
Model size
124M params
Tensor type
F32
ยท