File size: 4,116 Bytes
87cf582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78a790
87cf582
 
 
 
 
 
 
 
 
 
 
e470fae
87cf582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import subprocess
import sys
import torch
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import AutoModelForCausalLM, AutoProcessor
from tokenizers import Tokenizer, pre_tokenizers  
import os

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-warn-script-location", package])

class EndpointHandler:
    def __init__(self, path=""):
        # Install necessary packages
        required_packages = ['timm', 'einops', 'flash-attn', 'Pillow']
        for package in required_packages:
            try:
                install(package)
                print(f"Successfully installed {package}")
            except Exception as e:
                print(f"Failed to install {package}: {str(e)}")

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")

        # Load the model
        self.model_name = "arjunanand13/LADP_Florence-40e"
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, trust_remote_code=True
        ).to(self.device)

        # Manually load the tokenizer with a whitespace pre-tokenizer
        self.tokenizer = self.load_tokenizer()

        # Initialize the processor
        self.processor = AutoProcessor.from_pretrained(self.model_name, trust_remote_code=True)

        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def load_tokenizer(self):
        """Manually loads the tokenizer and adds a whitespace pre-tokenizer."""
        try:
            tokenizer = Tokenizer.from_pretrained(self.model_name)
            tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
            print("[INFO] Whitespace pre-tokenizer added.")
            return tokenizer
        except Exception as e:
            print(f"[ERROR] Failed to load tokenizer: {str(e)}")
            return None

    def process_image(self, image_data):
        """Processes image data from file path or base64-encoded string."""
        print("[DEBUG] Attempting to process image")
        try:
            if isinstance(image_data, str) and len(image_data) < 256 and os.path.exists(image_data):
                with open(image_data, 'rb') as image_file:
                    print("[DEBUG] File opened successfully")
                    image = Image.open(image_file)
            else:
                print("[DEBUG] Decoding base64 image data")
                image_bytes = base64.b64decode(image_data)
                image = Image.open(BytesIO(image_bytes))

            print("[DEBUG] Image opened:", image.format, image.size, image.mode)
            return image
        except Exception as e:
            print(f"[ERROR] Error processing image: {str(e)}")
            return None

    def __call__(self, data):
        """Processes input and generates model output."""
        try:
            inputs = data.pop("inputs", data)

            if isinstance(inputs, dict):
                image_path = inputs.get("image", None)
                text_input = inputs.get("text", "")
            else:
                image_path = inputs
                text_input = "What is in this image?"

            print("[INFO] Image path:", image_path, "| Text input:", text_input)

            image = self.process_image(image_path) if image_path else None

            model_inputs = self.processor(
                images=image if image else None,
                text=text_input,
                return_tensors="pt"
            )

            model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
                            for k, v in model_inputs.items()}

            with torch.no_grad():
                outputs = self.model.generate(**model_inputs)

            decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
            print(f"[INFO] Generated text: {decoded_outputs[0]}")
            return {"generated_text": decoded_outputs[0]}

        except Exception as e:
            print(f"[ERROR] {str(e)}")
            return {"error": str(e)}