arjunanand13
/

Florence-enphase2

@@ -39,47 +39,46 @@ class EndpointHandler:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-    def process_image(self, image_input):
-        if isinstance(image_input, str):
-            # Check if it's a URL
-            if image_input.startswith('http://') or image_input.startswith('https://'):
-                image = Image.open(requests.get(image_input, stream=True).raw)
-            # Check if it's a base64 string
-            elif image_input.startswith('data:image'):
-                image_data = base64.b64decode(image_input.split(',')[1])
-                image = Image.open(BytesIO(image_data))
-            else:
-                raise ValueError("Invalid image input")
-        elif isinstance(image_input, bytes):
-            image = Image.open(BytesIO(image_input))
-        else:
-            raise ValueError("Unsupported image input type")
-        return image
     def __call__(self, data):
         try:
-            # Handle different input formats
-            image_input = data.pop("image", None)
-            text_input = data.pop("text", "")
-            # Process image if provided
-            image = self.process_image(image_input) if image_input else None
-            # Prepare inputs
-            inputs = self.processor(
                 images=image if image else None,
                 text=text_input,
                 return_tensors="pt"
             )
             # Move inputs to device
-            inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
-                     for k, v in inputs.items()}
             # Generate output
             with torch.no_grad():
-                outputs = self.model.generate(**inputs)
             # Decode outputs
             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
@@ -88,7 +87,6 @@ class EndpointHandler:
         except Exception as e:
             return {"error": str(e)}
 # import subprocess
 # import sys
 # import torch

         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+    def process_image(self, image_path):
+        try:
+            with open(image_path, 'rb') as image_file:
+                image = Image.open(image_file)
+                return image
+        except Exception as e:
+            print(f"Error processing image: {str(e)}")
+            return None
     def __call__(self, data):
         try:
+            # Extract inputs from the expected Hugging Face format
+            inputs = data.pop("inputs", data)
+            # Check if inputs is a dict or string
+            if isinstance(inputs, dict):
+                image_path = inputs.get("image", None)
+                text_input = inputs.get("text", "")
+            else:
+                # If inputs is not a dict, assume it's the image path
+                image_path = inputs
+                text_input = "What is in this image?"
+            # Process image
+            image = self.process_image(image_path) if image_path else None
+            # Prepare inputs for the model
+            model_inputs = self.processor(
                 images=image if image else None,
                 text=text_input,
                 return_tensors="pt"
             )
             # Move inputs to device
+            model_inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
+                           for k, v in model_inputs.items()}
             # Generate output
             with torch.no_grad():
+                outputs = self.model.generate(**model_inputs)
             # Decode outputs
             decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
         except Exception as e:
             return {"error": str(e)}
 # import subprocess
 # import sys
 # import torch