limitedonly41 commited on
Commit
4f45e25
1 Parent(s): 98d9f50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -34
app.py CHANGED
@@ -3,11 +3,9 @@ import torch
3
  import spaces
4
  import logging
5
 
6
-
7
  # Configure logging to write messages to a file
8
  logging.basicConfig(filename='app.log', level=logging.ERROR)
9
 
10
-
11
  # Configuration
12
  max_seq_length = 2048
13
  dtype = None # Auto detection of dtype
@@ -15,40 +13,27 @@ load_in_4bit = True # Use 4-bit quantization to reduce memory usage
15
 
16
  peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
17
 
18
- # from unsloth import FastLanguageModel
19
-
20
- # # Load the model and tokenizer
21
- # model, tokenizer = FastLanguageModel.from_pretrained(
22
- # model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
23
- # max_seq_length=max_seq_length,
24
- # dtype=dtype,
25
- # load_in_4bit=load_in_4bit,
26
- # )
27
- # FastLanguageModel.for_inference(model) # Enable native 2x faster inference
28
-
29
- # def return_prediction(prompt):
30
- # inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
31
- # outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
32
- # ans = tokenizer.batch_decode(outputs)[0]
33
- # ans_pred = ans.split('### Response:')[1].split('<')[0]
34
- # return ans_pred
35
 
36
  @spaces.GPU()
37
  def classify_website(site_text):
 
38
 
39
  try:
40
-
41
- from unsloth import FastLanguageModel
42
-
43
- # Load the model and tokenizer
44
- model, tokenizer = FastLanguageModel.from_pretrained(
45
- model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
46
- max_seq_length=max_seq_length,
47
- dtype=dtype,
48
- load_in_4bit=load_in_4bit,
49
- )
50
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
51
-
52
 
53
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
54
 
@@ -73,8 +58,7 @@ def classify_website(site_text):
73
  except Exception as e:
74
  print(e)
75
  logging.exception(e)
76
- return e
77
- # return return_prediction(prompt)
78
 
79
  # Create a Gradio interface
80
  iface = gr.Interface(
@@ -86,4 +70,4 @@ iface = gr.Interface(
86
  )
87
 
88
  # Launch the interface
89
- iface.launch(debug=True)
 
3
  import spaces
4
  import logging
5
 
 
6
  # Configure logging to write messages to a file
7
  logging.basicConfig(filename='app.log', level=logging.ERROR)
8
 
 
9
  # Configuration
10
  max_seq_length = 2048
11
  dtype = None # Auto detection of dtype
 
13
 
14
  peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
15
 
16
+ # Initialize model and tokenizer variables
17
+ model = None
18
+ tokenizer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  @spaces.GPU()
21
  def classify_website(site_text):
22
+ global model, tokenizer # Declare model and tokenizer as global variables
23
 
24
  try:
25
+ # Load the model and tokenizer if they are not already loaded
26
+ if model is None or tokenizer is None:
27
+ from unsloth import FastLanguageModel
28
+
29
+ # Load the model and tokenizer
30
+ model, tokenizer = FastLanguageModel.from_pretrained(
31
+ model_name=peft_model_name, # YOUR MODEL YOU USED FOR TRAINING
32
+ max_seq_length=max_seq_length,
33
+ dtype=dtype,
34
+ load_in_4bit=load_in_4bit,
35
+ )
36
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
37
 
38
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
39
 
 
58
  except Exception as e:
59
  print(e)
60
  logging.exception(e)
61
+ return str(e)
 
62
 
63
  # Create a Gradio interface
64
  iface = gr.Interface(
 
70
  )
71
 
72
  # Launch the interface
73
+ iface.launch()