|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig |
|
|
|
|
|
|
|
model_name = "microsoft/phi-2" |
|
|
|
import os |
|
token = os.environ.get("HUGGING_FACE_TOKEN") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
|
|
use_auth_token=token, |
|
trust_remote_code=True |
|
) |
|
model.config.use_cache = False |
|
model.load_adapter("checkpoint_500") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("checkpoint_500", trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
def inference(prompt, count): |
|
count = int(count) |
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer) |
|
result = pipe(f"{prompt}",max_new_tokens=count) |
|
out_text = result[0]['generated_text'] |
|
return out_text |
|
|
|
title = "TSAI S21 Assignment: Adaptive QLoRA training on open assist oasst1 dataset, using microsoft/phi2 model" |
|
description = "A simple Gradio interface that accepts a context and generates GPT like text " |
|
examples = [["What is a large language model?","50"] |
|
] |
|
|
|
|
|
demo = gr.Interface( |
|
inference, |
|
inputs = [gr.Textbox(placeholder="Enter a prompt"), gr.Textbox(placeholder="Enter number of characters you want to generate")], |
|
outputs = [gr.Textbox(label="Chat GPT like text")], |
|
title = title, |
|
description = description, |
|
examples = examples |
|
) |
|
demo.launch() |