aayushgs commited on
Commit
c6a4459
1 Parent(s): 32bd642

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +42 -0
handler.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from io import BytesIO
3
+ from PIL import Image
4
+ import torch
5
+ from transformers import CLIPProcessor, CLIPModel
6
+ from typing import Dict, Any
7
+
8
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
+
10
+ class EndpointHandler():
11
+ def __init__(self, path=""):
12
+ self.processor = CLIPProcessor.from_pretrained("openai/openai/clip-vit-large-patch14")
13
+ self.model = CLIPModel.from_pretrained("openai/openai/clip-vit-large-patch14").to(device)
14
+ self.model.eval()
15
+
16
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
17
+ input_data = data.get("inputs", {})
18
+ encoded_images = input_data.get("images")
19
+ texts = input_data.get("texts", [])
20
+
21
+ if not encoded_images or not texts:
22
+ return {"error": "Both images and texts must be provided"}
23
+
24
+ try:
25
+ images = [Image.open(BytesIO(base64.b64decode(img))).convert("RGB") for img in encoded_images]
26
+ inputs = self.processor(text=texts, images=images, return_tensors="pt", padding=True)
27
+
28
+ # Move tensors to the same device as model
29
+ inputs = {k: v.to(device) for k, v in inputs.items()}
30
+
31
+ with torch.no_grad():
32
+ outputs = self.model(**inputs)
33
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
34
+ logits_per_text = outputs.logits_per_text # this is the text-image similarity score
35
+
36
+ return {
37
+ "logits_per_image": logits_per_image.cpu().numpy().tolist(),
38
+ "logits_per_text": logits_per_text.cpu().numpy().tolist()
39
+ }
40
+ except Exception as e:
41
+ print(f"Error during processing: {str(e)}")
42
+ return {"error": str(e)}