import gradio as gr import torch import spaces import logging # Configure logging to write messages to a file logging.basicConfig(filename='app.log', level=logging.ERROR) # Configuration max_seq_length = 2048 dtype = None # Auto detection of dtype load_in_4bit = True # Use 4-bit quantization to reduce memory usage peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big" # Initialize model and tokenizer variables model = None tokenizer = None @spaces.GPU() def classify_website(site_text): global model, tokenizer # Declare model and tokenizer as global variables try: # Load the model and tokenizer if they are not already loaded if model is None or tokenizer is None: from unsloth import FastLanguageModel # Load the model and tokenizer model, tokenizer = FastLanguageModel.from_pretrained( model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit, ) FastLanguageModel.for_inference(model) # Enable native 2x faster inference prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: Categorize the website into one of the 3 categories: 1) OTHER 2) NEWS/BLOG 3) E-commerce ### Input: {site_text} ### Response:""" inputs = tokenizer(prompt, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True) ans = tokenizer.batch_decode(outputs)[0] ans_pred = ans.split('### Response:')[1].split('<')[0] return ans_pred except Exception as e: print(e) logging.exception(e) return str(e) # Create a Gradio interface iface = gr.Interface( fn=classify_website, inputs="text", outputs="text", title="Website Categorization", description="Categorize a website into one of the 3 categories: OTHER, NEWS/BLOG, or E-commerce." ) # Launch the interface iface.launch()