Spaces:
Running
on
Zero
Running
on
Zero
limitedonly41
commited on
Commit
•
4f45e25
1
Parent(s):
98d9f50
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,9 @@ import torch
|
|
3 |
import spaces
|
4 |
import logging
|
5 |
|
6 |
-
|
7 |
# Configure logging to write messages to a file
|
8 |
logging.basicConfig(filename='app.log', level=logging.ERROR)
|
9 |
|
10 |
-
|
11 |
# Configuration
|
12 |
max_seq_length = 2048
|
13 |
dtype = None # Auto detection of dtype
|
@@ -15,40 +13,27 @@ load_in_4bit = True # Use 4-bit quantization to reduce memory usage
|
|
15 |
|
16 |
peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
# model, tokenizer = FastLanguageModel.from_pretrained(
|
22 |
-
# model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
|
23 |
-
# max_seq_length=max_seq_length,
|
24 |
-
# dtype=dtype,
|
25 |
-
# load_in_4bit=load_in_4bit,
|
26 |
-
# )
|
27 |
-
# FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
28 |
-
|
29 |
-
# def return_prediction(prompt):
|
30 |
-
# inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
31 |
-
# outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
|
32 |
-
# ans = tokenizer.batch_decode(outputs)[0]
|
33 |
-
# ans_pred = ans.split('### Response:')[1].split('<')[0]
|
34 |
-
# return ans_pred
|
35 |
|
36 |
@spaces.GPU()
|
37 |
def classify_website(site_text):
|
|
|
38 |
|
39 |
try:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
54 |
|
@@ -73,8 +58,7 @@ def classify_website(site_text):
|
|
73 |
except Exception as e:
|
74 |
print(e)
|
75 |
logging.exception(e)
|
76 |
-
return e
|
77 |
-
# return return_prediction(prompt)
|
78 |
|
79 |
# Create a Gradio interface
|
80 |
iface = gr.Interface(
|
@@ -86,4 +70,4 @@ iface = gr.Interface(
|
|
86 |
)
|
87 |
|
88 |
# Launch the interface
|
89 |
-
iface.launch(
|
|
|
3 |
import spaces
|
4 |
import logging
|
5 |
|
|
|
6 |
# Configure logging to write messages to a file
|
7 |
logging.basicConfig(filename='app.log', level=logging.ERROR)
|
8 |
|
|
|
9 |
# Configuration
|
10 |
max_seq_length = 2048
|
11 |
dtype = None # Auto detection of dtype
|
|
|
13 |
|
14 |
peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
|
15 |
|
16 |
+
# Initialize model and tokenizer variables
|
17 |
+
model = None
|
18 |
+
tokenizer = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
@spaces.GPU()
|
21 |
def classify_website(site_text):
|
22 |
+
global model, tokenizer # Declare model and tokenizer as global variables
|
23 |
|
24 |
try:
|
25 |
+
# Load the model and tokenizer if they are not already loaded
|
26 |
+
if model is None or tokenizer is None:
|
27 |
+
from unsloth import FastLanguageModel
|
28 |
+
|
29 |
+
# Load the model and tokenizer
|
30 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
31 |
+
model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
|
32 |
+
max_seq_length=max_seq_length,
|
33 |
+
dtype=dtype,
|
34 |
+
load_in_4bit=load_in_4bit,
|
35 |
+
)
|
36 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
37 |
|
38 |
prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
39 |
|
|
|
58 |
except Exception as e:
|
59 |
print(e)
|
60 |
logging.exception(e)
|
61 |
+
return str(e)
|
|
|
62 |
|
63 |
# Create a Gradio interface
|
64 |
iface = gr.Interface(
|
|
|
70 |
)
|
71 |
|
72 |
# Launch the interface
|
73 |
+
iface.launch()
|