KevinX-Penn28
/

testing

Safetensors

vine

custom_code

Model card Files Files and versions

xet

Community

KevinX-Penn28 commited on Nov 18, 2025

Commit

2b669dd

verified ·

1 Parent(s): 1e0506d

Upload 3 files

Browse files

Files changed (3) hide show

vine_config.py +18 -2
vine_model.py +66 -20
vine_pipeline.py +1 -1

vine_config.py CHANGED Viewed

@@ -40,7 +40,13 @@ class VineConfig(PretrainedConfig):
         self,
         model_name: str = "openai/clip-vit-base-patch32",
         hidden_dim = 768,
-        pretrained_vine_path: Optional[str] = None,
         num_top_pairs: int = 18,
         segmentation_method: str = "grounding_dino_sam2",
         box_threshold: float = 0.35,
@@ -63,7 +69,17 @@ class VineConfig(PretrainedConfig):
         **kwargs
     ):
         self.model_name = model_name
-        self.pretrained_vine_path = pretrained_vine_path
         self.hidden_dim = hidden_dim
         self.num_top_pairs = num_top_pairs
         self.segmentation_method = segmentation_method

         self,
         model_name: str = "openai/clip-vit-base-patch32",
         hidden_dim = 768,
+        use_hf_repo: bool = False,
+        model_repo: Optional[str] = None,
+        model_file: Optional[str] = None,
+        local_dir: Optional[str] = None,
+        local_filename: Optional[str] = None,
         num_top_pairs: int = 18,
         segmentation_method: str = "grounding_dino_sam2",
         box_threshold: float = 0.35,
         **kwargs
     ):
         self.model_name = model_name
+        self.use_hf_repo = use_hf_repo
+        if use_hf_repo:
+            self.model_repo = model_repo
+            self.model_file = model_file
+            self.local_dir = None
+            self.local_filename = None
+        else:
+            self.model_repo = None
+            self.model_file = None
+            self.local_dir = local_dir
+            self.local_filename = local_filename
         self.hidden_dim = hidden_dim
         self.num_top_pairs = num_top_pairs
         self.segmentation_method = segmentation_method

vine_model.py CHANGED Viewed

@@ -57,7 +57,6 @@ class VineModel(PreTrainedModel):
         # Initialize CLIP components
         self.clip_tokenizer = AutoTokenizer.from_pretrained(config.model_name)
         if self.clip_tokenizer.pad_token is None:
             self.clip_tokenizer.pad_token = (
@@ -72,13 +71,36 @@ class VineModel(PreTrainedModel):
         # Then try to load pretrained VINE weights if specified
-        if config.pretrained_vine_path:
-            self._load_pretrained_vine_weights(config.pretrained_vine_path)
         # Move models to devicexwxw
         self.to(self._device)
-    def _load_pretrained_vine_weights(self, pretrained_path: str, epoch: int = 0):
         """
         Load pretrained VINE weights from a saved .pt file or ensemble format.
         """
@@ -86,15 +108,11 @@ class VineModel(PreTrainedModel):
         # x = torch.load(pretrained_path, map_location=self._device, weights_only=False)
         # print(f"Loaded VINE checkpoint type: {type(x)}")
-        if pretrained_path == "video-fm/vine_v0":
-            self.clip_tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
-            self.clip_cate_model = AutoModel.from_pretrained(pretrained_path)
-            self.clip_unary_model = AutoModel.from_pretrained(pretrained_path)
-            self.clip_binary_model = AutoModel.from_pretrained(pretrained_path)
-        if pretrained_path.endswith(".pkl"):
-            print(f"Loading VINE weights from: {pretrained_path}")
-            loaded_vine_model = torch.load(pretrained_path, map_location=self._device, weights_only=False)
             print(f"Loaded state type: {type(loaded_vine_model)}")
             if not isinstance(loaded_vine_model, dict):
@@ -106,17 +124,17 @@ class VineModel(PreTrainedModel):
                     self.clip_binary_model.load_state_dict(loaded_vine_model.clip_binary_model.state_dict())
                 return True
-        elif pretrained_path.endswith(".pt") or pretrained_path.endswith(".pth"):
-            state = torch.load(pretrained_path, map_location=self._device, weights_only=True)
             print(f"Loaded state type: {type(state)}")
             self.load_state_dict(state)
             return True
         #  handle directory + epoch format
-        if os.path.isdir(pretrained_path):
-            model_files = [f for f in os.listdir(pretrained_path) if f.endswith(f'.{epoch}.model')]
             if model_files:
-                model_file = os.path.join(pretrained_path, model_files[0])
                 print(f"Loading VINE weights from: {model_file}")
                 pretrained_model = torch.load(model_file, map_location="cpu")
@@ -131,7 +149,7 @@ class VineModel(PreTrainedModel):
                 print("✓ Loaded all sub-model weights from ensemble format")
                 return True
             else:
-                print(f"No model file found for epoch {epoch} in {pretrained_path}")
                 return False
         print("Unsupported format for pretrained_vine_path")
@@ -249,10 +267,38 @@ class VineModel(PreTrainedModel):
         Returns:
             VineModel instance with loaded weights
         """
         if config is None:
-            config = VineConfig(pretrained_vine_path=model_path)
         else:
-            config.pretrained_vine_path = model_path
         # Create model instance (will automatically load weights)
         model = cls(config, **kwargs)

         # Initialize CLIP components
         self.clip_tokenizer = AutoTokenizer.from_pretrained(config.model_name)
         if self.clip_tokenizer.pad_token is None:
             self.clip_tokenizer.pad_token = (
         # Then try to load pretrained VINE weights if specified
+        if config.use_hf_repo:
+            self._load_huggingface_vine_weights(config.model_repo, config.model_file)
+        else:
+            self._load_local_pretrained_vine_weights(config.local_dir, config.local_filename)
         # Move models to devicexwxw
         self.to(self._device)
+    def _load_huggingface_vine_weights(self, model_repo: str, model_file: Optional[str] = None):
+        """
+        Load pretrained VINE weights from HuggingFace Hub.
+        """
+        try:
+            print(f"Loading VINE weights from HuggingFace repo: {model_repo}")
+            vine_model = AutoModel.from_pretrained(
+                model_repo,
+                trust_remote_code=True,
+                revision=model_file if model_file else "main"
+            )
+            self.clip_cate_model = vine_model.clip_cate_model
+            self.clip_unary_model = vine_model.clip_unary_model
+            self.clip_binary_model = vine_model.clip_binary_model
+            print("✓ Successfully loaded VINE weights from HuggingFace Hub")
+            return True
+        except Exception as e:
+            print(f"✗ Error loading VINE weights from HuggingFace Hub: {e}")
+            print("Using base CLIP models instead")
+            return False
+    def _load_local_pretrained_vine_weights(self, local_dir: str, local_filename: Optional[str] = None, epoch: int = 0):
         """
         Load pretrained VINE weights from a saved .pt file or ensemble format.
         """
         # x = torch.load(pretrained_path, map_location=self._device, weights_only=False)
         # print(f"Loaded VINE checkpoint type: {type(x)}")
+        full_path = os.path.join(local_dir, local_filename) if local_filename else local_dir
+        if full_path.endswith(".pkl"):
+            print(f"Loading VINE weights from: {full_path}")
+            loaded_vine_model = torch.load(full_path, map_location=self._device, weights_only=False)
             print(f"Loaded state type: {type(loaded_vine_model)}")
             if not isinstance(loaded_vine_model, dict):
                     self.clip_binary_model.load_state_dict(loaded_vine_model.clip_binary_model.state_dict())
                 return True
+        elif full_path.endswith(".pt") or full_path.endswith(".pth"):
+            state = torch.load(full_path, map_location=self._device, weights_only=True)
             print(f"Loaded state type: {type(state)}")
             self.load_state_dict(state)
             return True
         #  handle directory + epoch format
+        if os.path.isdir(full_path):
+            model_files = [f for f in os.listdir(full_path) if f.endswith(f'.{epoch}.model')]
             if model_files:
+                model_file = os.path.join(full_path, model_files[0])
                 print(f"Loading VINE weights from: {model_file}")
                 pretrained_model = torch.load(model_file, map_location="cpu")
                 print("✓ Loaded all sub-model weights from ensemble format")
                 return True
             else:
+                print(f"No model file found for epoch {epoch} in {full_path}")
                 return False
         print("Unsupported format for pretrained_vine_path")
         Returns:
             VineModel instance with loaded weights
         """
+        # Normalize the incoming model_path into the new VineConfig fields.
         if config is None:
+            # Heuristics: if path looks like a HF repo (contains a "/" and
+            # doesn't exist on disk) treat it as a repo. Otherwise treat as local.
+            if model_path and ("/" in model_path and not os.path.exists(model_path)):
+                config = VineConfig(use_hf_repo=True, model_repo=model_path)
+            else:
+                # Local path: could be a file or directory
+                if os.path.isdir(model_path):
+                    config = VineConfig(use_hf_repo=False, local_dir=model_path)
+                else:
+                    config = VineConfig(
+                        use_hf_repo=False,
+                        local_dir=os.path.dirname(model_path) or None,
+                        local_filename=os.path.basename(model_path) or None,
+                    )
         else:
+            # Update provided config to reflect the requested pretrained path
+            if model_path and ("/" in model_path and not os.path.exists(model_path)):
+                config.use_hf_repo = True
+                config.model_repo = model_path
+                config.model_file = None
+                config.local_dir = None
+                config.local_filename = None
+            else:
+                config.use_hf_repo = False
+                if os.path.isdir(model_path):
+                    config.local_dir = model_path
+                    config.local_filename = None
+                else:
+                    config.local_dir = os.path.dirname(model_path) or None
+                    config.local_filename = os.path.basename(model_path) or None
         # Create model instance (will automatically load weights)
         model = cls(config, **kwargs)

vine_pipeline.py CHANGED Viewed

@@ -391,7 +391,7 @@ class VinePipeline(Pipeline):
                 crop_n_layers=2,
                 box_nms_thresh=0.6,
                 crop_n_points_downscale_factor=2,
-                min_mask_region_area=30.0,
                 use_m2m=True,
             )
             print("✓ SAM2 models initialized successfully")

                 crop_n_layers=2,
                 box_nms_thresh=0.6,
                 crop_n_points_downscale_factor=2,
+                min_mask_region_area=100,
                 use_m2m=True,
             )
             print("✓ SAM2 models initialized successfully")