File size: 1,609 Bytes
5258acf 4ffd51d 35aecfe 5258acf 4ffd51d b455b18 4ffd51d 5258acf b92e4f6 b455b18 68a8f79 1d5753b b455b18 b92e4f6 07aa66c b455b18 07aa66c b92e4f6 b455b18 68a8f79 b92e4f6 b455b18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from transformers import AutoModelForCausalLM, AutoProcessor
from PIL import Image
import requests
import torch
import io
class EndpointHandler:
def __init__(self, model_dir):
device = "cuda" if torch.cuda.is_available() else "cpu"
self.model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True).to(device)
self.processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
self.device = device
def __call__(self, data):
try:
url = data.get("inputs", {}).get("url")
if not url:
return {"error": "Missing URL"}
headers = {
"User-Agent": "Mozilla/5.0",
"Accept": "image/*"
}
response = requests.get(url, headers=headers, verify=False)
response.raise_for_status()
image_data = io.BytesIO(response.content)
image = Image.open(image_data).convert("RGB")
inputs = self.processor(
text="<MORE_DETAILED_CAPTION>",
images=image,
return_tensors="pt"
).to(self.device)
with torch.inference_mode():
output = self.model.generate(
**inputs,
max_new_tokens=512,
num_beams=3
)
text = self.processor.batch_decode(output, skip_special_tokens=True)[0]
return {"caption": text}
except Exception as e:
return {"error": str(e)} |