Spaces:

zbing
/

demo

Paused

App Files Files Community

zbing commited on Jul 31

Commit

c88b0bd

•

1 Parent(s): a5cb3bd

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

api.py +21 -11

api.py CHANGED Viewed

@@ -3,25 +3,34 @@ from flask import Flask, request, jsonify
 from PIL import Image
 from io import BytesIO
 import base64
 from transformers import AutoProcessor, AutoModelForCausalLM
 import threading
-from unittest.mock import patch
-from transformers.dynamic_module_utils import get_imports
 app = Flask(__name__)
 # Parse command line arguments
 parser = argparse.ArgumentParser(description='Start the Flask server with specified model and device.')
-parser.add_argument('--model-path', type=str, default="models/Florence-2-base", help='Path to the pretrained model')
 parser.add_argument('--device', type=str, choices=['cpu', 'gpu'], default='auto', help='Device to use: "cpu", "gpu", or "auto"')
 args = parser.parse_args()
 # Determine the device
-device = "cpu"
-# Initialize the model and processor
-with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
-            model = AutoModelForCausalLM.from_pretrained(model_path, attn_implementation="sdpa", torch_dtype=dtype,trust_remote_code=True)
 def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
     if not str(filename).endswith("modeling_florence2.py"):
@@ -30,9 +39,10 @@ def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
     imports.remove("flash_attn")
     return imports
-with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
-            model = AutoModelForCausalLM.from_pretrained(args.model_path, attn_implementation="sdpa", torch_dtype=dtype,trust_remote_code=True)
-processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True, device_map=device)
 lock = threading.Lock()  # Use a lock to ensure thread safety when accessing the model
@@ -40,7 +50,7 @@ def predict_image(image, task: str = "<OD>", prompt: str = None):
     prompt = task + " " + prompt if prompt else task
     print(f"Prompt: {prompt}")
     with lock:
-        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
         generated_ids = model.generate(
             input_ids=inputs["input_ids"],
             pixel_values=inputs["pixel_values"],

 from PIL import Image
 from io import BytesIO
 import base64
+import torch
 from transformers import AutoProcessor, AutoModelForCausalLM
 import threading
 app = Flask(__name__)
 # Parse command line arguments
 parser = argparse.ArgumentParser(description='Start the Flask server with specified model and device.')
+parser.add_argument('--model-path', type=str, required=True, help='Path to the pretrained model')
 parser.add_argument('--device', type=str, choices=['cpu', 'gpu'], default='auto', help='Device to use: "cpu", "gpu", or "auto"')
 args = parser.parse_args()
 # Determine the device
+if args.device == 'auto':
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+elif args.device == 'gpu':
+    if torch.cuda.is_available():
+        device = "cuda:0"
+    else:
+        raise ValueError("GPU option specified but no GPU is available.")
+else:
+    device = "cpu"
+torch_dtype = torch.float16 if device.startswith("cuda") else torch.float32
+from unittest.mock import patch
+from transformers.dynamic_module_utils import get_imports
+import os
 def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
     if not str(filename).endswith("modeling_florence2.py"):
     imports.remove("flash_attn")
     return imports
+# Initialize the model and processor
+ with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
+            model = AutoModelForCausalLM.from_pretrained(args.model_path, attn_implementation="sdpa", torch_dtype=torch_dtype,trust_remote_code=True).to(device)
+processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True)
 lock = threading.Lock()  # Use a lock to ensure thread safety when accessing the model
     prompt = task + " " + prompt if prompt else task
     print(f"Prompt: {prompt}")
     with lock:
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
         generated_ids = model.generate(
             input_ids=inputs["input_ids"],
             pixel_values=inputs["pixel_values"],