Spaces:

AIModels24
/

Indian_Law_Justice

Running

AIModels24 commited on Nov 28, 2024

Commit

d9e9b95

verified ·

1 Parent(s): d4c0217

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,26 @@
-# Ensure you don't load bitsandbytes if you are on CPU
-from transformers import AutoTokenizer
-from unsloth import FastLanguageModel
 import torch
 model_name = "AIModels24/Indian_Constitution"  # Replace with your actual model path
-@st.cache_resource
-def load_model():
-    # Load the tokenizer and model with CPU optimizations
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Skip the 'bitsandbytes' specific configurations for CPU use
-    model = FastLanguageModel.from_pretrained(
-        model_name=model_name,
-        max_seq_length=2048,
-        load_in_4bit=False,  # Disable 4-bit quantization (you can adjust this for CPU)
-        dtype=torch.float32,  # Use float32 (default for CPU)
-    )
-    return model, tokenizer
-model, tokenizer = load_model()

 import torch
+from unsloth import FastLanguageModel
+# Load the model on the CPU
 model_name = "AIModels24/Indian_Constitution"  # Replace with your actual model path
+# Explicitly set the device to CPU
+device = torch.device('cpu')
+# Load model without GPU dependencies
+model = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=2048,
+    load_in_4bit=False,  # Disable 4-bit quantization (required for CPU)
+    dtype=torch.float32,  # Use float32 (default for CPU)
+)
+model = model.to(device)  # Ensure the model is loaded to CPU
+# Example inference function
+def generate_text(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    inputs = inputs.to(device)  # Move inputs to CPU
+    with torch.no_grad():
+        outputs = model.generate(inputs['input_ids'], max_length=200)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)