Edit model card

kobart-summary

How to use

from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration

# Load Model and Tokenizer
tokenizer = PreTrainedTokenizerFast.from_pretrained("EbanLee/kobart-title")
model = BartForConditionalGeneration.from_pretrained("EbanLee/kobart-title")

# Encoding
input_text = "2017 ๋…„ 2 ์›”์— ํŒŒ์‚ฐํ•œ ํ•œ์ง„ํ•ด์šด ์‚ฌํƒœ๋กœ ์ธํ•œ ์‹ค์ง์ž ๊ทœ๋ชจ๊ฐ€ ์ „๊ตญ์ ์œผ๋กœ 1 ๋งŒ ๋ช…์— ์ด๋ฅด๋ €๋‹ค๊ณ  ํ•œ๋‹ค.2 ๋…„ ์—ฌ๊ฐ€ ํ๋ฅธ ์ง€๊ธˆ ์ด๋“ค ์‹ค์ง์ž๋“ค์ด ์–ด๋–ป๊ฒŒ ์ง€๋‚ด๋Š”์ง€, ์ƒˆ๋กœ์šด ์ง์žฅ์€ ๊ตฌํ–ˆ๋Š”์ง€ ๊ถ๊ธˆํ•˜๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„ ์‹ค์ง์ž๋“ค์„ ์œ„ํ•œ ์žฌ์ทจ์—… ํ”„๋กœ๊ทธ๋žจ์ด ๋ถ€์‚ฐ์—์„œ ์ถ”์ง„๋œ๋‹ค๋Š” ๋ฐ˜๊ฐ€์šด ์†Œ์‹์ด ๋“ค๋ฆฐ๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„ ์‹ค์ง์ž๋ฅผ ๋Œ€์ƒ์œผ๋กœ ์žฌ์ทจ์—…์„ ๋•๋Š” ์ •์ฑ…์ด ์‹œํ–‰๋˜๋Š” ๊ฒƒ์€ ์ด๋ฒˆ์ด ์ฒ˜์Œ์ด๋ผ๋‹ˆ ์‚ฌ์‹ค ๋„ˆ๋ฌด ๋Šฆ์—ˆ๋‹ค.๋ช…์ƒ‰์ด ํ•ด์–‘์ˆ˜๋„ ๋ถ€์‚ฐ์ธ๋ฐ ์ง€์—ญ ์‚ฌํšŒ์˜ ๊ด€์‹ฌ์ด ๋ถ€์กฑํ–ˆ๋‹ค.๊ทธ๋™์•ˆ์—๋„ ๋…ธ๋™๋ถ€, ์‚ฐ์—…๋ถ€, ํ–‰์•ˆ๋ถ€ ๋“ฑ์—์„œ ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„๋ฅผ ๋Œ€์ƒ์œผ๋กœ ๋‹ค์–‘ํ•œ ์ผ์ž๋ฆฌ ์ฐฝ์ถœ ์‚ฌ์—…์ด ์žˆ๊ธฐ๋Š” ํ–ˆ๋‹ค.ํ•˜์ง€๋งŒ ๋Œ€๋ถ€๋ถ„์ด ์ฒญ๋…„ ์œ„์ฃผ์˜ ์ •์ฑ…์ด์–ด์„œ ์‹ค์ง์ž๋“ค์ด ํ˜œํƒ์„ ๋ณด๊ธฐ๋Š” ์–ด๋ ค์› ๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„์—์„œ ์ค‘์žฅ๊ธฐ ์ด์ƒ์˜ ์˜ค๋žœ ๊ฒฝ๋ ฅ์ž๋Š” ์ผ์ž๋ฆฌ ์‚ฌ์—…์—์„œ ์šฐ๋Œ€๋ฐ›๊ธฐ๋Š”์ปค๋…• ๋Œ€์ƒ์ž์—์„œ ์ œ์™ธ๋˜๋Š” ์‹ ์„ธ์˜€๋‹ค๋‹ˆ ์ด๋ž˜์„œ๋Š” ์•ˆ ๋  ์ผ์ด๋‹ค.์ด๋“ค์— ๋Œ€ํ•œ ์ •๋ถ€์˜ ์ธ๊ฑด๋น„ ์ง€์›์ด ์—†์–ด ์—…์ฒด๋“ค์˜ ์ฑ„์šฉ ์˜์š•์€ ๋‚ฎ์•˜๊ณ , ์ „๋ฌธ์ ์ธ ๊ต์œก๊ณผ๋„ ์—ฐ๊ณ„๋˜์ง€ ์•Š์•„ ํ˜„์žฅ์˜ ๋‹จ๊ธฐ์  ์ผ์ž๋ฆฌ๋งŒ ์–‘์‚ฐํ•ด ์™”๋‹ค๊ณ  ํ•œ๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„ ์žฌ์ทจ์—… ํ”„๋กœ๊ทธ๋žจ์€ ๊ณต๊ณต๊ทผ๋กœ ์‚ฌ์—…์ด ์•„๋‹ˆ๋ผ๋Š” ์ ์„ ๋ช…์‹ฌํ•ด์•ผ ํ•œ๋‹ค.์˜ˆ์‚ฐ ๋‚ญ๋น„๋งŒ ํ•˜๋Š” ์žฌ์ทจ์—… ํ”„๋กœ๊ทธ๋žจ์€ ํ•  ์ด์œ ๊ฐ€ ์—†๋‹ค.ํ–ฅํ›„ ์ „ํ™˜๋  ์ง๋ฌด์— ๋”ฐ๋ผ ๋งž์ถคํ˜• ์žฌ๊ต์œก์„ ํ•œ ๋’ค ์žฌ์ทจ์—…์„ ์ง€์›ํ•˜๋Š” ์‚ฌ์—…์ด ๋˜์–ด์•ผ ํ•œ๋‹ค.๊ทธ๋ž˜์•ผ 1 ์ธ๋‹น 2000 ๋งŒ ์›์— ๋‹ฌํ•  ๊ฒƒ์ด๋ผ๋Š” ์ทจ์—…์ง€์›๊ธˆ๊ณผ 500 ๋งŒ ์›์˜ ์žฌ๊ต์œก๋น„๊ฐ€ ์•„๊น์ง€ ์•Š๋‹ค.ํ•ด์šดํ•ญ๋งŒ ์‹ค์ง์ž๋“ค์€ ๊ฒฝํ—˜์ด ํ’๋ถ€ํ•œ ์šฐ์ˆ˜ ์ธ๋ ฅ์ด๋‹ค.์ทจ์—…์„ ์ค€๋น„ํ•˜๋Š” ๋ฒ ํ…Œ๋ž‘๋“ค์ด ๋ชจ์—ฌ์„œ ์ •๋ณด์™€ ๊ธฐ์ˆ ์„ ๊ต๋ฅ˜ํ•˜๊ณ  ๊ณต์œ ํ•  ์ง€์› ๊ฑฐ์  ํ”Œ๋žซํผ์ด ๋งˆ๋ จ๋˜๋ฉด ํšจ๊ณผ๋Š” ๋ฐฐ๊ฐ€๋  ๊ฒƒ์ด๋‹ค.๊ด€๊ฑด์€ ์˜ˆ์‚ฐ์ด๋‹ค.ํ•œ์ง„ํ•ด์šด์ด ๋ถ€๋„๋‚œ 2017 ๋…„๊ณผ 2018 ๋…„์—๋„ ํ•ด์šดํ•ญ๋งŒ ๋ถ„์•ผ ํ‡ด์ง์ž์— ๋Œ€ํ•œ ์ง€์› ์‚ฌ์—…์ด ์ถ”์ง„๋˜์—ˆ์ง€๋งŒ ๋งค๋ฒˆ ๊ตญํšŒ ๋ฌธํ„ฑ์„ ๋„˜์ง€ ๋ชปํ–ˆ๋‹ค.๋‹น์‹œ์—๋Š” ํ•ด์–‘์ˆ˜์‚ฐ๋ถ€ ์—ฐ๊ฐ„ ์‚ฌ์—…์— ํฌํ•จ๋˜์ง€ ๋ชปํ–ˆ์ง€๋งŒ ์˜ฌํ•ด ์ฒ˜์Œ์œผ๋กœ ํฌํ•จ๋˜๋ฉด์„œ ์˜ˆ์‚ฐ ์ง€์› ๊ฐ€๋Šฅ์„ฑ์ด ๋†’์€ ์ƒํ™ฉ์ด๋ผ๊ณ  ํ•œ๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„ ์žฌ์ทจ์—… ์ง€์› ์‚ฌ์—…์ด ๋ถ€์‚ฐ์„ ์‹œ์ž‘์œผ๋กœ ์ „๊ตญ์œผ๋กœ ํ™•๋Œ€๋˜๊ธฐ๋ฅผ ๊ธฐ๋Œ€ํ•œ๋‹ค.ํ•ด์šดํ•ญ๋งŒ์—…๊ณ„ ๋ถ€ํ™œ์˜ ๊ฒฌ์ธ์ฐจ๊ฐ€ ๋  ํ•ด์šดํ•ญ๋งŒ ์‹ค์ง์ž ์žฌ์ทจ์—… ํ”„๋กœ๊ทธ๋žจ์€ ์ œ๋Œ€๋กœ ์ถ”์ง„๋˜์–ด์•ผ ํ•œ๋‹ค."
input_ids = tokenizer.encode(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=1026)

# Generate Summary Text Ids
summary_text_ids = model.generate(
input_ids=input_ids,
bos_token_id=model.config.bos_token_id,
eos_token_id=model.config.eos_token_id,
length_penalty=1.0,
max_length=40,
min_length=3,
num_beams=6,
repetition_penalty=1.5,
)

# Decoding Text Ids
print(tokenizer.decode(summary_text_ids[0], skip_special_tokens=True))
Downloads last month
43
Safetensors
Model size
124M params
Tensor type
F32
ยท