Spaces:
Runtime error
Runtime error
import torch | |
import streamlit as st | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel, PreTrainedTokenizerFast | |
import numpy as np | |
model = GPT2LMHeadModel.from_pretrained("jason9693/soongsil-univ-gpt-v1") | |
tokenizer = PreTrainedTokenizerFast.from_pretrained("jason9693/soongsil-univ-gpt-v1") | |
category_map = { | |
"์ญ์ค๋ ์ํ": "<unused5>", | |
"๋ชจ๋์ ์ฐ์ ": "<unused3>", | |
"๋ํ์ ์ก๋ด๋ฐฉ": "<unused4>" | |
} | |
st.markdown("""# University Community KoGPT2 : ์ญ์ค๋ ์๋ธ๋ฆฌํ์๋ด | |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1p6DIxsesi3eJNPwFwvMw0MeM5LkSGoPW?usp=sharing) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/jason9693/UCK-GPT2/issues) ![GitHub](https://img.shields.io/github/license/jason9693/UCK-GPT2) | |
## ๋ํ ์ปค๋ฎค๋ํฐ ๊ฒ์๊ธ ์์ฑ๊ธฐ | |
SKT-AI์์ ๊ณต๊ฐํ [KoGPT2](https://github.com/SKT-AI/KoGPT2) ๋ชจ๋ธ์ ํ์ธํ๋ํ์ฌ ๋ํ ์ปค๋ฎค๋ํฐ ๊ฒ์๊ธ์ ์์ฑํ๋ ๋ชจ๋ธ์ ๋๋ค. ์ด ์๋ธ๋ฆฌํ์, ์บ ํผ์คํฝ ๋ฐ์ดํฐ 22๋ง๊ฐ๋ฅผ ์ด์ฉํด์ ํ์ต์ ์งํํ์ผ๋ฉฐ, ํ์ต์๋ ๋๋ต **3์ผ**์ ๋ ์์๋์์ต๋๋ค. | |
* [GPT ๋ ผ๋ฌธ ๋ฆฌ๋ทฐ ๋งํฌ](https://www.notion.so/Improve-Language-Understanding-by-Generative-Pre-Training-GPT-afb4b5ef6e984961ac022b700c152b6b) | |
## ์์ฐํ๊ธฐ | |
""") | |
seed = st.text_input("Seed", "์กฐ๋ง์ ๊ธฐ๋ ๊ด") | |
category = st.selectbox("Category", list(category_map.keys())) | |
go = st.button("Generate") | |
st.markdown("## ์์ฑ ๊ฒฐ๊ณผ") | |
if go: | |
input_context = category_map[category] + seed | |
input_ids = tokenizer(input_context, return_tensors="pt") | |
outputs = model.generate( | |
input_ids=input_ids["input_ids"], | |
max_length=250, | |
num_return_sequences=1, | |
no_repeat_ngram_size=3, | |
repetition_penalty=2.0, | |
do_sample=True, | |
use_cache=True, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].replace("<unused2>", "\n")) | |