limitedonly41's picture
Update app.py
4f45e25 verified
raw
history blame
2.27 kB
import gradio as gr
import torch
import spaces
import logging
# Configure logging to write messages to a file
logging.basicConfig(filename='app.log', level=logging.ERROR)
# Configuration
max_seq_length = 2048
dtype = None # Auto detection of dtype
load_in_4bit = True # Use 4-bit quantization to reduce memory usage
peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
# Initialize model and tokenizer variables
model = None
tokenizer = None
@spaces.GPU()
def classify_website(site_text):
global model, tokenizer # Declare model and tokenizer as global variables
try:
# Load the model and tokenizer if they are not already loaded
if model is None or tokenizer is None:
from unsloth import FastLanguageModel
# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Categorize the website into one of the 3 categories:
1) OTHER
2) NEWS/BLOG
3) E-commerce
### Input:
{site_text}
### Response:"""
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
ans = tokenizer.batch_decode(outputs)[0]
ans_pred = ans.split('### Response:')[1].split('<')[0]
return ans_pred
except Exception as e:
print(e)
logging.exception(e)
return str(e)
# Create a Gradio interface
iface = gr.Interface(
fn=classify_website,
inputs="text",
outputs="text",
title="Website Categorization",
description="Categorize a website into one of the 3 categories: OTHER, NEWS/BLOG, or E-commerce."
)
# Launch the interface
iface.launch()