File size: 4,116 Bytes
87cf582 b78a790 87cf582 e470fae 87cf582 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
from tokenizers import Tokenizer, pre_tokenizers
import os
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])
class EndpointHandler:
def __init__(self, path=""):
# Install necessary packages
required_packages = ['timm', 'einops', 'flash-attn', 'Pillow']
for package in required_packages:
try:
install(package)
print(f"Successfully installed {package}")
except Exception as e:
print(f"Failed to install {package}: {str(e)}")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {self.device}")
# Load the model
self.model_name = "arjunanand13/LADP_Florence-40e"
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name, trust_remote_code=True
).to(self.device)
# Manually load the tokenizer with a whitespace pre-tokenizer
self.tokenizer = self.load_tokenizer()
# Initialize the processor
self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)
if torch.cuda.is_available():
torch.cuda.empty_cache()
def load_tokenizer(self):
"""Manually loads the tokenizer and adds a whitespace pre-tokenizer."""
try:
tokenizer = Tokenizer.from_pretrained(self.model_name)
tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
print("[INFO] Whitespace pre-tokenizer added.")
return tokenizer
except Exception as e:
print(f"[ERROR] Failed to load tokenizer: {str(e)}")
return None
def process_image(self, image_data):
"""Processes image data from file path or base64-encoded string."""
print("[DEBUG] Attempting to process image")
try:
if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
with open(image_data, 'rb') as image_file:
print("[DEBUG] File opened successfully")
image = Image.open(image_file)
else:
print("[DEBUG] Decoding base64 image data")
image_bytes = base64.b64decode(image_data)
image = Image.open(BytesIO(image_bytes))
print("[DEBUG] Image opened:", image.format, image.size, image.mode)
return image
except Exception as e:
print(f"[ERROR] Error processing image: {str(e)}")
return None
def __call__(self, data):
"""Processes input and generates model output."""
try:
inputs = data.pop("inputs", data)
if isinstance(inputs, dict):
image_path = inputs.get("image", None)
text_input = inputs.get("text", "")
else:
image_path = inputs
text_input = "What is in this image?"
print("[INFO] Image path:", image_path, "| Text input:", text_input)
image = self.process_image(image_path) if image_path else None
model_inputs = self.processor(
images=image if image else None,
text=text_input,
return_tensors="pt"
)
model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
for k, v in model_inputs.items()}
with torch.no_grad():
outputs = self.model.generate(**model_inputs)
decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
print(f"[INFO] Generated text: {decoded_outputs[0]}")
return {"generated_text": decoded_outputs[0]}
except Exception as e:
print(f"[ERROR] {str(e)}")
return {"error": str(e)}
|