Spaces:

ToletiSri
/

TSAI_S27

Sleeping

File size: 1,627 Bytes

9bc80c6
 
1717bbe
 
 
 
 
 
6c66946
 
 
 
f60d35d
 
 
 
 
1717bbe
 
 
f60d35d
6c66946
1717bbe
 
 
2dcf2f9
9bc80c6
6da9f6d
9bc80c6
 
 
ef9fa1a
d101868
d881d64
9bc80c6
 
 
 
 
d881d64
9bc80c6

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig

#model = AutoModelForCausalLM.from_pretrained("checkpoint_500",trust_remote_code=True)

model_name = "microsoft/phi-2"

import os
token = os.environ.get("HUGGING_FACE_TOKEN")


#bnb_config = BitsAndBytesConfig(
#    load_in_4bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_compute_dtype=torch.float16,
#)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #quantization_config=bnb_config,
    use_auth_token=token,
    trust_remote_code=True
)
model.config.use_cache = False
model.load_adapter("checkpoint_500")

tokenizer = AutoTokenizer.from_pretrained("checkpoint_500", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

def inference(prompt, count):
    count = int(count)
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer) 
    result = pipe(f"{prompt}",max_new_tokens=count)
    out_text = result[0]['generated_text']
    return out_text

title = "TSAI S21 Assignment: Adaptive QLoRA training on open assist oasst1 dataset, using microsoft/phi2 model"
description = "A simple Gradio interface that accepts a context and generates GPT like text "
examples = [["What is a large language model?","50"]
           ]
 

demo = gr.Interface(
    inference, 
    inputs = [gr.Textbox(placeholder="Enter a prompt"), gr.Textbox(placeholder="Enter number of characters you want to generate")], 
    outputs = [gr.Textbox(label="Chat GPT like text")],
    title = title,
    description = description,
    examples = examples
)
demo.launch()