YEOLLM / app.py
skang187's picture
Update app.py
ab3c8c5
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import GenerationConfig
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
#Base Model ๋ฐ Lora Model ์„ ํƒ
base_model = "EleutherAI/polyglot-ko-5.8b" # "beomi/KoAlpaca-Polyglot-5.8B"
lora_weights = "KimSHine/YEOLLM_5.8B-lora_v3" # 'KimSHine/Scenario_Koalpaca_5.8B-lora'
load_8bit = True
# Base Model Tokenizer
tokenizer1 = AutoTokenizer.from_pretrained(base_model)
## token id ์ถ”๊ฐ€
tokenizer1.pad_token_id = 0
tokenizer1.eos_token_id = 2
"""### Base Model ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"""
# KoAlpaca-polyglot-5.8B
model1 = AutoModelForCausalLM.from_pretrained(
base_model,
load_in_8bit=load_8bit,
torch_dtype=torch.float16,
device_map="auto",
)
model1.config.pad_token_id = 0
model1.config.eos_token_id = 2
"""### LoRA Model ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
Fine Tuningํ•œ Model
"""
model1 = PeftModel.from_pretrained(
model1,
lora_weights,
torch_dtype=torch.float16,
)
model1.config.pad_token_id = 0 # unk
model1.config.bos_token_id = 0
model1.config.eos_token_id = 2
val_dict = {"๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ": {
'instruction' : "์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ ํ˜•์‹์˜ ๋Œ€๋ณธ์„ ๋งŒ๋“œ์‹œ์˜ค. ๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ๋Š” ์ง„์ง€ํ•œ ๋Œ€ํ™”์ž…๋‹ˆ๋‹ค. ๊ฐ™์€ ๋ง์„ ๋ฐ˜๋ณตํ•˜์ง€ ๋งˆ์„ธ์š”.",
'temperature' :0.65,
'top_p': 0.95,
'top_k':40,
'max_new_tokens':2048,
'no_repeat_ngram_size': 5,
'do_sample' : True,
'num_beams' : 5},
"์ธํ„ฐ๋ทฐ": {
'instruction' : "์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ์ธํ„ฐ๋ทฐ ํ˜•์‹์˜ ๋Œ€๋ณธ์„ ๋งŒ๋“œ์‹œ์˜ค. ์ธํ„ฐ๋ทฐ๋Š” ์ธํ„ฐ๋ทฐ์–ด์™€ ์ธํ„ฐ๋ทฐ์ด์˜ ๋Œ€ํ™”์ด๋ฉฐ ์ธํ„ฐ๋ทฐ์–ด๊ฐ€ ์งˆ๋ฌธ์„ ํ•˜๊ณ  ์ธํ„ฐ๋ทฐ์ด๊ฐ€ ๋Œ€๋‹ต์„ ํ•˜๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค. ๊ฐ™์€ ๋ง์„ ๋ฐ˜๋ณตํ•˜์ง€ ๋งˆ์‹œ์˜ค.",
'temperature' :0.7,
'top_p': 0.95,
'top_k':40,
'max_new_tokens':2048,
'no_repeat_ngram_size': 5,
'do_sample' : True,
'num_beams' : 5},
"๋‰ด์Šค": {
'instruction' : " ์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋‰ด์Šค ํ˜•์‹์œผ๋กœ ๋Œ€๋ณธ์„ ๋งŒ๋“œ์‹œ์˜ค. ๋‰ด์Šค ํ˜•์‹์˜ ๋Œ€๋ณธ์€ ์•ต์ปค๊ฐ€ ์ค„๊ฑฐ๋ฆฌ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ตœ๋Œ€ํ•œ ์‚ฌ์‹ค์ ์ธ ๋‚ด์šฉ์„ ์ƒ๋™๊ฐ์žˆ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ๋Œ€๋ณธ์ž…๋‹ˆ๋‹ค. ๋‰ด์Šค๋Š” ์•ต์ปค๊ฐ€ ์ธ์‚ฌ๋ง๊ณผ ๋ณธ๋ก , ๋งˆ์ง€๋ง‰ ์ธ์‚ฌ๋ง๋กœ ๊ตฌ์„ฑ๋˜์–ด ์žˆ๋‹ค. ๊ฐ™์€ ๋ง์„ ๋ฐ˜๋ณตํ•˜์ง€ ๋งˆ์‹œ์˜ค.",
'temperature' :0.7,
'top_p': 0.95,
'top_k':40,
'max_new_tokens':2048,
'no_repeat_ngram_size': 5,
'do_sample' : True,
'num_beams' : 5},
"ํ˜„๋Œ€๋“œ๋ผ๋งˆ": {
'instruction' : "์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋“œ๋งˆ๋ผ ํ˜•์‹์œผ๋กœ ๋Œ€๋ณธ์„ ๋งŒ๋“œ์‹œ์˜ค.",
'temperature' :0.8,
'top_p': 0.95,
'top_k':40,
'max_new_tokens':2048,
'no_repeat_ngram_size': 5,
'do_sample' : True,
'num_beams' : 5},
"์‚ฌ๊ทน": {
'instruction' : "์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ๋“œ๋ผ๋งˆ ํ˜•์‹์œผ๋กœ ๋Œ€๋ณธ์„ ๋งŒ๋“œ์‹œ์˜ค.",
'temperature' :0.8,
'top_p': 0.95,
'top_k':40,
'max_new_tokens':2048,
'no_repeat_ngram_size': 5,
'do_sample' : True,
'num_beams' : 5}
}
def yeollm_text(selected_value, summary):
prompt = f"""์•„๋ž˜๋Š” ์ž‘์—…์„ ์„ค๋ช…ํ•˜๋Š” ์ง€์‹œ๋ฌธ๊ณผ ๋Œ€๋ณธ์„ ์ƒ์„ฑํ•˜๋Š”๋ฐ ์ฐธ๊ณ ํ•  ์ค„๊ฑฐ๋ฆฌ์ž…๋‹ˆ๋‹ค.\n
### ์ง€์‹œ๋ฌธ:
{val_dict[selected_value]['instruction']}
### ์ค„๊ฑฐ๋ฆฌ:
{summary}
### ๋Œ€๋ณธ:
"""
inputs = tokenizer1(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(DEVICE)
generation_config = GenerationConfig(
do_sample = val_dict[selected_value]['do_sample'],
temperature=val_dict[selected_value]['temperature'],
top_p=val_dict[selected_value]['top_p'],
top_k=val_dict[selected_value]['top_k'],
pad_token_id = 0, # pad token ์ถ”๊ฐ€
no_repeat_ngram_size = val_dict[selected_value]['no_repeat_ngram_size'],
# num_beams=num_beams,
# **kwargs,
)
# Generate text
with torch.no_grad():
generation_output = model1.generate(
input_ids=input_ids,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=val_dict[selected_value]['max_new_tokens'],
)
s = generation_output.sequences[0]
output = tokenizer1.decode(s)
output = output.split('### ๋Œ€๋ณธ:')[1]
if output[-13:] == '<|endoftext|>':
output = output[:-13]
return output.lstrip()
"""## text davinci 003 ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"""
import openai
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY
model2 = 'text-davinci-003' #'gpt-3.5-turbo'
max_tokens = 2048
temperature = 0.3
Top_p = 1
def davinci_text(selected_value, summary):
prompt = f"""
์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ {val_dict[selected_value]['instruction']} ํ˜•์‹์˜ ๋Œ€๋ณธ์„ ๋งŒ๋“ค์–ด์ค˜.
### ์ค„๊ฑฐ๋ฆฌ:
{summary}
### ๋Œ€๋ณธ:
"""
response = openai.Completion.create(
engine = model2,
prompt = prompt,
temperature = temperature,
max_tokens = max_tokens,
n=1,
)
return response.choices[0].text.strip()
"""## gpt 3.5 turbo ๋ถˆ๋Ÿฌ์˜ค๊ธฐ"""
import openai
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY
model4 = 'gpt-3.5-turbo' #'gpt-3.5-turbo'
max_tokens = 2048
temperature = 0.3
Top_p = 1
def gpt_text(selected_value, summary):
prompt = f"""
### ์ง€์‹œ๋ฌธ:
์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ {val_dict[selected_value]['instruction']} ํ˜•์‹์˜ ๋Œ€๋ณธ์„ ๋งŒ๋“ค์–ด์ค˜.
### ์ค„๊ฑฐ๋ฆฌ:
{summary}
### ๋Œ€๋ณธ:
"""
response = openai.ChatCompletion.create(
model = model4,
messages=[
{"role": "system", "content": "์•„๋ž˜๋Š” ์ž‘์—…์„ ์„ค๋ช…ํ•˜๋Š” ์ง€์‹œ๋ฌธ๊ณผ ๋Œ€๋ณธ์„ ์ƒ์„ฑํ•˜๋Š”๋ฐ ์ฐธ๊ณ ํ•  ์ค„๊ฑฐ๋ฆฌ์™€ ์ง์„ ์ด๋ฃจ๋Š” ์˜ˆ์ œ์ž…๋‹ˆ๋‹ค. ์š”์ฒญ์„ ์ ์ ˆํžˆ ๋งŒ์กฑํ•˜๋Š” ๋Œ€๋ณธ์„ ์ž‘์„ฑํ•˜์„ธ์š”."},
{"role": "user", "content": prompt},
],
temperature = temperature,
max_tokens = max_tokens,
n=1,
)
for choice in response["choices"]:
content = choice["message"]["content"]
return content.lstrip()
"""# gradio"""
import gradio as gr
generator1 = gr.Interface(
fn=yeollm_text,
inputs=[
gr.Dropdown(["๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ", "์ธํ„ฐ๋ทฐ", "๋‰ด์Šค", 'ํ˜„๋Œ€๋“œ๋ผ๋งˆ', '์‚ฌ๊ทน'], label="ํ˜•์‹"),
#gr.inputs.Textbox(label="Instruction",placeholder="์ค„๊ฑฐ๋ฆฌ๋ฅผ ์ฐธ๊ณ ํ•ด์„œ ํ˜„๋Œ€ ๋“œ๋ผ๋งˆ ํ˜•์‹์˜ ๋Œ€๋ณธ์„ ๋งŒ๋“ค์–ด์ค˜"),
gr.inputs.Textbox(label="Summary",placeholder="๋Œ€๋ณธ์œผ๋กœ ๋ฐ”๊พธ๊ณ  ์‹ถ์€ ์ค„๊ฑฐ๋ฆฌ"),
],
outputs=gr.outputs.Textbox(label="Yeollm Scenario"),
title="Yeollm Scenario Generation",
description="Generate scenarios using the Yeollm model.",
theme="huggingface"
)
generator2 = gr.Interface(
fn=davinci_text,
inputs=[
gr.Dropdown(["๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ", "์ธํ„ฐ๋ทฐ", "๋‰ด์Šค", 'ํ˜„๋Œ€๋“œ๋ผ๋งˆ', '์‚ฌ๊ทน'], label="ํ˜•์‹"),
gr.inputs.Textbox(label="Summary")
],
outputs=gr.outputs.Textbox(label="Davinci Scenario"),
title="Davinci Generation",
description="Generate scenarios using the Davinci model.",
theme="huggingface"
)
generator3 = gr.Interface(
fn=gpt_text,
inputs=[
gr.Dropdown(["๋‹คํ๋ฉ˜ํ„ฐ๋ฆฌ", "์ธํ„ฐ๋ทฐ", "๋‰ด์Šค", 'ํ˜„๋Œ€๋“œ๋ผ๋งˆ', '์‚ฌ๊ทน'], label="ํ˜•์‹"),
gr.inputs.Textbox(label="Summary")
],
outputs=gr.outputs.Textbox(label="GPT Scenario"),
title="GPT Generation",
description="Generate scenarios using the GPT model.",
theme="huggingface"
)
gr.Parallel(generator1, generator2, generator3).launch()