Spaces:
Runtime error
Runtime error
import os | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
from google.colab import drive | |
# Install bitsandbytes and accelerate | |
!pip install bitsandbytes | |
!pip install accelerate | |
# Mount Google Drive | |
drive.mount('/content/drive') | |
# Set the path to the local directory where the model and tokenizer are saved | |
MODEL_PATH = "/content/drive/My Drive/phi35" | |
# Load the tokenizer from the local directory | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
# Load the model with 8-bit quantization | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_PATH, | |
device_map='auto', | |
load_in_8bit=True | |
) | |
# Create the text-generation pipeline | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_length=256, # Adjusted for faster inference | |
do_sample=True, | |
top_p=0.95, | |
top_k=50, | |
temperature=0.8, | |
device_map={'': 0} | |
) | |
# Define the function for the Gradio interface | |
def chat_with_phi(message): | |
response = pipe(message) | |
return response[0]['generated_text'] | |
# Set up the Gradio interface | |
app = gr.Interface( | |
fn=chat_with_phi, | |
inputs=gr.Textbox(label="Type your message:"), | |
outputs=gr.Textbox(label="Phi 3.5 Responds:"), | |
title="Phi 3.5 Text Chat", | |
description="Chat with Phi 3.5 model. Ask anything!", | |
theme="default" | |
) | |
# Launch the app | |
app.launch(debug=True) | |