v1 / app.py
plannist
4
533157f
raw
history blame
1.67 kB
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr
model_name = "EleutherAI/polyglot-ko-1.3b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=128,
do_sample=True,
temperature=0.5,
top_p=0.9,
)
def chat_fn(prompt):
try:
outputs = pipe(prompt)
# text-generation ํŒŒ์ดํ”„๋ผ์ธ์˜ ์ถœ๋ ฅ์€ ๋ฆฌ์ŠคํŠธ ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜๋˜๋ฉฐ,
# ๊ฐ ์ถœ๋ ฅ์€ generated_text ํ‚ค๋ฅผ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค
return outputs[0]["generated_text"]
except Exception as e:
print(f"Error in chat_fn: {str(e)}") # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์—๋Ÿฌ ๋กœ๊น… ์ถ”๊ฐ€
return f"Error: {str(e)}"
with gr.Blocks() as demo:
with gr.Row():
input_box = gr.Textbox(label="Prompt", lines=2)
with gr.Row():
output_box = gr.Textbox(label="Response")
btn = gr.Button("Generate")
btn.click(chat_fn, inputs=input_box, outputs=output_box)
# โœ… Hugging Face Spaces์˜ API ์š”์ฒญ์šฉ endpoint ์ •์˜
gr.Examples(
examples=["์•ˆ๋…•?", "ํ•œ๊ตญ์— ๋Œ€ํ•ด ๋งํ•ด์ค˜"],
inputs=input_box
)
demo.load(chat_fn, inputs=input_box, outputs=output_box)
# โœ… API endpoint๋กœ ์‚ฌ์šฉํ•  Interface ๊ฐ์ฒด ๋“ฑ๋ก
api_demo = gr.Interface(
fn=chat_fn,
inputs="text",
outputs="text",
api_name="predict" # API ์—”๋“œํฌ์ธํŠธ ์ด๋ฆ„ ๋ช…์‹œ
)
if __name__ == "__main__":
demo.queue()
api_demo.launch(share=False)