|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import torch |
|
import gradio as gr |
|
|
|
model_name = "EleutherAI/polyglot-ko-1.3b" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
device_map="auto" |
|
) |
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=128, |
|
do_sample=True, |
|
temperature=0.5, |
|
top_p=0.9, |
|
) |
|
|
|
def chat_fn(prompt): |
|
try: |
|
outputs = pipe(prompt) |
|
|
|
|
|
return outputs[0]["generated_text"] |
|
except Exception as e: |
|
print(f"Error in chat_fn: {str(e)}") |
|
return f"Error: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
input_box = gr.Textbox(label="Prompt", lines=2) |
|
with gr.Row(): |
|
output_box = gr.Textbox(label="Response") |
|
|
|
btn = gr.Button("Generate") |
|
btn.click(chat_fn, inputs=input_box, outputs=output_box) |
|
|
|
|
|
gr.Examples( |
|
examples=["์๋
?", "ํ๊ตญ์ ๋ํด ๋งํด์ค"], |
|
inputs=input_box |
|
) |
|
|
|
demo.load(chat_fn, inputs=input_box, outputs=output_box) |
|
|
|
|
|
api_demo = gr.Interface( |
|
fn=chat_fn, |
|
inputs="text", |
|
outputs="text", |
|
api_name="predict" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue() |
|
api_demo.launch(share=False) |
|
|