Spaces:
Runtime error
Runtime error
from tokenizers import Tokenizer | |
import gradio as gr | |
from transformers import pipeline | |
import torch | |
from transformers import BitsAndBytesConfig | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from datasets import load_dataset | |
from peft import prepare_model_for_kbit_training | |
from peft import LoraConfig, get_peft_model | |
from transformers import TrainingArguments | |
from trl import SFTTrainer | |
def run_inference_on_model(prompt): | |
gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length) | |
result = gen(prompt) | |
return (result[0]['generated_text'].replace(prompt, '')) | |
max_length = 256 | |
# Load Model | |
model_name = "microsoft/phi-2" | |
model = AutoModelForCausalLM.from_pretrained(model_name,trust_remote_code=True) | |
model.config.use_cache = False | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
# Load the fine tuned weights | |
finetuned_model_path = './fine_tuned_model' | |
model.load_adapter(finetuned_model_path) | |
# Define Interface | |
description = 'An AI assistant that runs on the Microsoft Phi 2 model fine tuned on Open Assistant dataset using QLora approach. Link to the model: https://huggingface.co/microsoft/phi-2 Link to the dataset: https://huggingface.co/datasets/OpenAssistant/oasst1 ' | |
title = 'AI Chat bot finetuned on Microsoft Phi 2 model' | |
demo = gr.Interface(run_inference_on_model, | |
inputs = [gr.Textbox('Enter your prompt here', label="Input prompt")], | |
outputs = [gr.Textbox(label='AI response', scale=2)], | |
title = title, | |
description = description | |
) | |
demo.launch(debug=False) |