Spaces:

limitedonly41
/

website_classification

Running on Zero

App Files Files Community

limitedonly41 commited on Jun 8

Commit

4f45e25

•

1 Parent(s): 98d9f50

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -34

app.py CHANGED Viewed

@@ -3,11 +3,9 @@ import torch
 import spaces
 import logging
 # Configure logging to write messages to a file
 logging.basicConfig(filename='app.log', level=logging.ERROR)
 # Configuration
 max_seq_length = 2048
 dtype = None  # Auto detection of dtype
@@ -15,40 +13,27 @@ load_in_4bit = True  # Use 4-bit quantization to reduce memory usage
 peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
-# from unsloth import FastLanguageModel
-# # Load the model and tokenizer
-# model, tokenizer = FastLanguageModel.from_pretrained(
-#     model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
-#     max_seq_length=max_seq_length,
-#     dtype=dtype,
-#     load_in_4bit=load_in_4bit,
-# )
-# FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
-# def return_prediction(prompt):
-#     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-#     outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
-#     ans = tokenizer.batch_decode(outputs)[0]
-#     ans_pred = ans.split('### Response:')[1].split('<')[0]
-#     return ans_pred
 @spaces.GPU()
 def classify_website(site_text):
     try:
-        from unsloth import FastLanguageModel
-        # Load the model and tokenizer
-        model, tokenizer = FastLanguageModel.from_pretrained(
-            model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
-            max_seq_length=max_seq_length,
-            dtype=dtype,
-            load_in_4bit=load_in_4bit,
-        )
-        FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
         prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -73,8 +58,7 @@ def classify_website(site_text):
     except Exception as e:
         print(e)
         logging.exception(e)
-        return e
-    # return return_prediction(prompt)
 # Create a Gradio interface
 iface = gr.Interface(
@@ -86,4 +70,4 @@ iface = gr.Interface(
 )
 # Launch the interface
-iface.launch(debug=True)

 import spaces
 import logging
 # Configure logging to write messages to a file
 logging.basicConfig(filename='app.log', level=logging.ERROR)
 # Configuration
 max_seq_length = 2048
 dtype = None  # Auto detection of dtype
 peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
+# Initialize model and tokenizer variables
+model = None
+tokenizer = None
 @spaces.GPU()
 def classify_website(site_text):
+    global model, tokenizer  # Declare model and tokenizer as global variables
     try:
+        # Load the model and tokenizer if they are not already loaded
+        if model is None or tokenizer is None:
+            from unsloth import FastLanguageModel
+            # Load the model and tokenizer
+            model, tokenizer = FastLanguageModel.from_pretrained(
+                model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
+                max_seq_length=max_seq_length,
+                dtype=dtype,
+                load_in_4bit=load_in_4bit,
+            )
+            FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
         prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
     except Exception as e:
         print(e)
         logging.exception(e)
+        return str(e)
 # Create a Gradio interface
 iface = gr.Interface(
 )
 # Launch the interface
+iface.launch()