jvaddi's picture
Update app.py
a3b5695
from tokenizers import Tokenizer
import gradio as gr
from transformers import pipeline
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
from trl import SFTTrainer
def run_inference_on_model(prompt):
gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
return (result[0]['generated_text'].replace(prompt, ''))
max_length = 256
# Load Model
model_name = "microsoft/phi-2"
model = AutoModelForCausalLM.from_pretrained(model_name,trust_remote_code=True)
model.config.use_cache = False
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# Load the fine tuned weights
finetuned_model_path = './fine_tuned_model'
model.load_adapter(finetuned_model_path)
# Define Interface
description = 'An AI assistant that runs on the Microsoft Phi 2 model fine tuned on Open Assistant dataset using QLora approach. Link to the model: https://huggingface.co/microsoft/phi-2 Link to the dataset: https://huggingface.co/datasets/OpenAssistant/oasst1 '
title = 'AI Chat bot finetuned on Microsoft Phi 2 model'
demo = gr.Interface(run_inference_on_model,
inputs = [gr.Textbox('Enter your prompt here', label="Input prompt")],
outputs = [gr.Textbox(label='AI response', scale=2)],
title = title,
description = description
)
demo.launch(debug=False)