import subprocess import sys import torch import base64 from io import BytesIO from PIL import Image import requests from transformers import AutoModelForCausalLM, AutoProcessor def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package]) class EndpointHandler: def __init__(self, path=""): required_packages = ['timm', 'einops', 'flash-attn', 'Pillow'] for package in required_packages: try: install(package) print(f"Successfully installed {package}") except Exception as e: print(f"Failed to install {package}: {str(e)}") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {self.device}") self.model_name = "microsoft/Florence-2-base-ft" self.model = AutoModelForCausalLM.from_pretrained( self.model_name, trust_remote_code=True, revision='refs/pr/6' ).to(self.device) self.processor = AutoProcessor.from_pretrained( self.model_name, trust_remote_code=True, revision='refs/pr/6' ) if torch.cuda.is_available(): torch.cuda.empty_cache() def process_image(self, image_path): try: with open(image_path, 'rb') as image_file: image = Image.open(image_file) return image except Exception as e: print(f"Error processing image: {str(e)}") return None def __call__(self, data): try: # Extract inputs from the expected Hugging Face format inputs = data.pop("inputs", data) # Check if inputs is a dict or string if isinstance(inputs, dict): image_path = inputs.get("image", None) text_input = inputs.get("text", "") else: # If inputs is not a dict, assume it's the image path image_path = inputs text_input = "What is in this image?" # Process image image = self.process_image(image_path) if image_path else None # Prepare inputs for the model model_inputs = self.processor( images=image if image else None, text=text_input, return_tensors="pt" ) # Move inputs to device model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v for k, v in model_inputs.items()} # Generate output with torch.no_grad(): outputs = self.model.generate(**model_inputs) # Decode outputs decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) return {"generated_text": decoded_outputs[0]} except Exception as e: return {"error": str(e)} # import subprocess # import sys # import torch # from transformers import AutoModelForCausalLM, AutoProcessor # def install(package): # subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package]) # class EndpointHandler: # def __init__(self, path=""): # required_packages = ['timm', 'einops', 'flash-attn'] # for package in required_packages: # try: # install(package) # print(f"Successfully installed {package}") # except Exception as e: # print(f"Failed to install {package}: {str(e)}") # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(f"Using device: {self.device}") # self.model_name = "microsoft/Florence-2-base-ft" # self.model = AutoModelForCausalLM.from_pretrained( # self.model_name, # trust_remote_code=True, # revision='refs/pr/6' # ).to(self.device) # self.processor = AutoProcessor.from_pretrained( # self.model_name, # trust_remote_code=True, # revision='refs/pr/6' # ) # if torch.cuda.is_available(): # torch.cuda.empty_cache() # def __call__(self, data): # try: # inputs = data.pop("inputs", data) # processed_inputs = self.processor(inputs, return_tensors="pt") # processed_inputs = {k: v.to(self.device) for k, v in processed_inputs.items()} # with torch.no_grad(): # outputs = self.model.generate(**processed_inputs) # decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True) # return {"outputs": decoded_outputs} # except Exception as e: # return {"error": str(e)}