Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import gradio as gr | |
| from google.colab import drive | |
| # Install bitsandbytes and accelerate | |
| !pip install bitsandbytes | |
| !pip install accelerate | |
| # Mount Google Drive | |
| drive.mount('/content/drive') | |
| # Set the path to the local directory where the model and tokenizer are saved | |
| MODEL_PATH = "/content/drive/My Drive/phi35" | |
| # Load the tokenizer from the local directory | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
| # Load the model with 8-bit quantization | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_PATH, | |
| device_map='auto', | |
| load_in_8bit=True | |
| ) | |
| # Create the text-generation pipeline | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_length=256, # Adjusted for faster inference | |
| do_sample=True, | |
| top_p=0.95, | |
| top_k=50, | |
| temperature=0.8, | |
| device_map={'': 0} | |
| ) | |
| # Define the function for the Gradio interface | |
| def chat_with_phi(message): | |
| response = pipe(message) | |
| return response[0]['generated_text'] | |
| # Set up the Gradio interface | |
| app = gr.Interface( | |
| fn=chat_with_phi, | |
| inputs=gr.Textbox(label="Type your message:"), | |
| outputs=gr.Textbox(label="Phi 3.5 Responds:"), | |
| title="Phi 3.5 Text Chat", | |
| description="Chat with Phi 3.5 model. Ask anything!", | |
| theme="default" | |
| ) | |
| # Launch the app | |
| app.launch(debug=True) | |