Spaces:

forestav
/

radiography_helper

Runtime error

Filip commited on Dec 6, 2024

Commit

fe01251

1 Parent(s): 56d8f41

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,33 +5,42 @@ import gc
 import os
 # Enable better CPU performance
-torch.set_num_threads(4)  # Adjust based on available CPU cores
 device = "cpu"
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
-    # Load tokenizer and processor first to free up memory
     print("Loading tokenizer and processor...")
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    processor = AutoProcessor.from_pretrained(model_name)
     print("Loading model...")
     # Load model with CPU optimizations
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
-        torch_dtype=torch.float32,  # Use float32 for CPU
         low_cpu_mem_usage=True,
-        offload_folder="offload",  # Enable disk offloading
-        offload_state_dict=True    # Offload state dict to disk
     )
-    # Quantize the model for CPU
     print("Quantizing model...")
     model = torch.quantization.quantize_dynamic(
         model,
-        {torch.nn.Linear},  # Quantize linear layers
         dtype=torch.qint8
     )
@@ -81,7 +90,7 @@ def analyze_image(image, instruction):
                 min_p=0.1,
                 use_cache=True,
                 pad_token_id=tokenizer.eos_token_id,
-                num_beams=1  # Reduce beam search to save memory
             )
         # Decode the response

 import os
 # Enable better CPU performance
+torch.set_num_threads(4)
 device = "cpu"
 def load_model():
     model_name = "forestav/unsloth_vision_radiography_finetune"
+    base_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct"  # Correct base model
     print("Loading tokenizer and processor...")
+    # Load tokenizer from base model
+    tokenizer = AutoTokenizer.from_pretrained(
+        base_model_name,
+        trust_remote_code=True
+    )
+    # Load processor from base model
+    processor = AutoProcessor.from_pretrained(
+        base_model_name,
+        trust_remote_code=True
+    )
     print("Loading model...")
     # Load model with CPU optimizations
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
+        torch_dtype=torch.float32,
         low_cpu_mem_usage=True,
+        offload_folder="offload",
+        offload_state_dict=True,
+        trust_remote_code=True
     )
     print("Quantizing model...")
     model = torch.quantization.quantize_dynamic(
         model,
+        {torch.nn.Linear},
         dtype=torch.qint8
     )
                 min_p=0.1,
                 use_cache=True,
                 pad_token_id=tokenizer.eos_token_id,
+                num_beams=1
             )
         # Decode the response