Spaces:

PolyU-ChenLab
/

UniPixel

Running on Zero

yeliudev commited on Oct 4

Commit

0ce76cd

1 Parent(s): 214dc2c

Fix model loading

Files changed (4) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
-import os
 import re
 import uuid
 from functools import partial
@@ -20,8 +19,6 @@ from unipixel.utils.io import load_image, load_video
 from unipixel.utils.transforms import get_sam2_transform
 from unipixel.utils.visualizer import draw_mask, sample_color
-PATH = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
 MODEL = 'PolyU-ChenLab/UniPixel-3B'
 TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'

 # Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
 import re
 import uuid
 from functools import partial
 from unipixel.utils.transforms import get_sam2_transform
 from unipixel.utils.visualizer import draw_mask, sample_color
 MODEL = 'PolyU-ChenLab/UniPixel-3B'
 TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'

sam2/configs/sam2.1_hiera_b+.yaml CHANGED Viewed

@@ -35,7 +35,7 @@ model:
       dropout: 0.1
       pos_enc_at_attn: false
       self_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
         rope_theta: 10000.0
         feat_sizes: [64, 64]
         embedding_dim: 256
@@ -46,7 +46,7 @@ model:
       pos_enc_at_cross_attn_keys: true
       pos_enc_at_cross_attn_queries: false
       cross_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
         rope_theta: 10000.0
         feat_sizes: [64, 64]
         rope_k_repeat: True

       dropout: 0.1
       pos_enc_at_attn: false
       self_attention:
+        # _target_: sam2.modeling.sam.transformer.RoPEAttention
         rope_theta: 10000.0
         feat_sizes: [64, 64]
         embedding_dim: 256
       pos_enc_at_cross_attn_keys: true
       pos_enc_at_cross_attn_queries: false
       cross_attention:
+        # _target_: sam2.modeling.sam.transformer.RoPEAttention
         rope_theta: 10000.0
         feat_sizes: [64, 64]
         rope_k_repeat: True

sam2/modeling/memory_attention.py CHANGED Viewed

@@ -32,8 +32,8 @@ class MemoryAttentionLayer(nn.Module):
         self.d_model = d_model
         self.dim_feedforward = dim_feedforward
         self.dropout_value = dropout
-        self.self_attn = self_attention
-        self.cross_attn_image = cross_attention
         # Implementation of Feedforward model
         self.linear1 = nn.Linear(d_model, dim_feedforward)

         self.d_model = d_model
         self.dim_feedforward = dim_feedforward
         self.dropout_value = dropout
+        self.self_attn = RoPEAttention(**self_attention)
+        self.cross_attn_image = RoPEAttention(**cross_attention)
         # Implementation of Feedforward model
         self.linear1 = nn.Linear(d_model, dim_feedforward)

unipixel/model/qwen2_5_vl.py CHANGED Viewed

@@ -18,7 +18,7 @@ from sam2.modeling.sam.prompt_encoder import PromptEncoder
 from sam2.sam2_train import BatchedVideoDatapoint
-def cache_state_hook(module, inputs, ouputs=None):
     module.state = inputs[0] if isinstance(inputs, tuple) else inputs

 from sam2.sam2_train import BatchedVideoDatapoint
+def cache_state_hook(module, inputs, outputs=None):
     module.state = inputs[0] if isinstance(inputs, tuple) else inputs