| | import subprocess |
| | import sys |
| | import torch |
| | import base64 |
| | from io import BytesIO |
| | from PIL import Image |
| | import requests |
| | from transformers import AutoModelForCausalLM, AutoProcessor |
| | def install(package): |
| | subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package]) |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | required_packages = ['timm', 'einops', 'flash-attn', 'Pillow','transformers'] |
| | for package in required_packages: |
| | try: |
| | install(package) |
| | print(f"Successfully installed {package}") |
| | except Exception as e: |
| | print(f"Failed to install {package}: {str(e)}") |
| | |
| | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | print(f"Using device: {self.device}") |
| | |
| | self.model_name = "microsoft/Florence-2-base" |
| | self.model = AutoModelForCausalLM.from_pretrained( |
| | self.model_name, |
| | trust_remote_code=True, |
| | ).to(self.device) |
| | |
| | self.processor = AutoProcessor.from_pretrained( |
| | self.model_name, |
| | trust_remote_code=True, |
| | ) |
| | |
| | if torch.cuda.is_available(): |
| | torch.cuda.empty_cache() |
| |
|
| | def process_image(self,image_data): |
| | print("[DEBUG] Attempting to process image") |
| | try: |
| | |
| | if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data): |
| | with open(image_data, 'rb') as image_file: |
| | print("[DEBUG] File opened successfully") |
| | image = Image.open(image_file) |
| | else: |
| | |
| | print("[DEBUG] Decoding base64 image data") |
| | image_bytes = base64.b64decode(image_data) |
| | image = Image.open(BytesIO(image_bytes)) |
| | |
| | print("[DEBUG] Image opened with PIL:", image.format, image.size, image.mode) |
| | return image |
| | except Exception as e: |
| | print(f"[ERROR] Error processing image: {str(e)}") |
| | return None |
| |
|
| | def __call__(self, data): |
| | try: |
| | |
| | inputs = data.pop("inputs", data) |
| | |
| | |
| | if isinstance(inputs, dict): |
| | image_path = inputs.get("image", None) |
| | text_input = inputs.get("text", "") |
| | else: |
| | |
| | image_path = inputs |
| | text_input = "What is in this image?" |
| | print("[INFO]",image_path,text_input) |
| | |
| | image = self.process_image(image_path) if image_path else None |
| | print("[INFO]",image) |
| | |
| | model_inputs = self.processor( |
| | images=image if image else None, |
| | text=text_input, |
| | return_tensors="pt" |
| | ) |
| | |
| | |
| | model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v |
| | for k, v in model_inputs.items()} |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = self.model.generate(**model_inputs) |
| | |
| | |
| | decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) |
| | print(f"[INFO],{decoded_outputs}") |
| | print(f"[INFO],{decoded_outputs[0]}") |
| | return {"generated_text": decoded_outputs[0]} |
| | |
| | except Exception as e: |
| | return {"error": str(e)} |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |