AIModels24 commited on
Commit
d9e9b95
·
verified ·
1 Parent(s): d4c0217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -1,23 +1,26 @@
1
- # Ensure you don't load bitsandbytes if you are on CPU
2
- from transformers import AutoTokenizer
3
- from unsloth import FastLanguageModel
4
  import torch
 
5
 
 
6
  model_name = "AIModels24/Indian_Constitution" # Replace with your actual model path
7
 
8
- @st.cache_resource
9
- def load_model():
10
- # Load the tokenizer and model with CPU optimizations
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
-
13
- # Skip the 'bitsandbytes' specific configurations for CPU use
14
- model = FastLanguageModel.from_pretrained(
15
- model_name=model_name,
16
- max_seq_length=2048,
17
- load_in_4bit=False, # Disable 4-bit quantization (you can adjust this for CPU)
18
- dtype=torch.float32, # Use float32 (default for CPU)
19
- )
20
-
21
- return model, tokenizer
22
 
23
- model, tokenizer = load_model()
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from unsloth import FastLanguageModel
3
 
4
+ # Load the model on the CPU
5
  model_name = "AIModels24/Indian_Constitution" # Replace with your actual model path
6
 
7
+ # Explicitly set the device to CPU
8
+ device = torch.device('cpu')
9
+
10
+ # Load model without GPU dependencies
11
+ model = FastLanguageModel.from_pretrained(
12
+ model_name=model_name,
13
+ max_seq_length=2048,
14
+ load_in_4bit=False, # Disable 4-bit quantization (required for CPU)
15
+ dtype=torch.float32, # Use float32 (default for CPU)
16
+ )
17
+
18
+ model = model.to(device) # Ensure the model is loaded to CPU
 
 
19
 
20
+ # Example inference function
21
+ def generate_text(prompt):
22
+ inputs = tokenizer(prompt, return_tensors="pt")
23
+ inputs = inputs.to(device) # Move inputs to CPU
24
+ with torch.no_grad():
25
+ outputs = model.generate(inputs['input_ids'], max_length=200)
26
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)