yeliudev commited on
Commit
0ce76cd
·
1 Parent(s): 214dc2c

Fix model loading

Browse files
app.py CHANGED
@@ -1,6 +1,5 @@
1
  # Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
2
 
3
- import os
4
  import re
5
  import uuid
6
  from functools import partial
@@ -20,8 +19,6 @@ from unipixel.utils.io import load_image, load_video
20
  from unipixel.utils.transforms import get_sam2_transform
21
  from unipixel.utils.visualizer import draw_mask, sample_color
22
 
23
- PATH = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
24
-
25
  MODEL = 'PolyU-ChenLab/UniPixel-3B'
26
 
27
  TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
 
1
  # Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
2
 
 
3
  import re
4
  import uuid
5
  from functools import partial
 
19
  from unipixel.utils.transforms import get_sam2_transform
20
  from unipixel.utils.visualizer import draw_mask, sample_color
21
 
 
 
22
  MODEL = 'PolyU-ChenLab/UniPixel-3B'
23
 
24
  TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
sam2/configs/sam2.1_hiera_b+.yaml CHANGED
@@ -35,7 +35,7 @@ model:
35
  dropout: 0.1
36
  pos_enc_at_attn: false
37
  self_attention:
38
- _target_: sam2.modeling.sam.transformer.RoPEAttention
39
  rope_theta: 10000.0
40
  feat_sizes: [64, 64]
41
  embedding_dim: 256
@@ -46,7 +46,7 @@ model:
46
  pos_enc_at_cross_attn_keys: true
47
  pos_enc_at_cross_attn_queries: false
48
  cross_attention:
49
- _target_: sam2.modeling.sam.transformer.RoPEAttention
50
  rope_theta: 10000.0
51
  feat_sizes: [64, 64]
52
  rope_k_repeat: True
 
35
  dropout: 0.1
36
  pos_enc_at_attn: false
37
  self_attention:
38
+ # _target_: sam2.modeling.sam.transformer.RoPEAttention
39
  rope_theta: 10000.0
40
  feat_sizes: [64, 64]
41
  embedding_dim: 256
 
46
  pos_enc_at_cross_attn_keys: true
47
  pos_enc_at_cross_attn_queries: false
48
  cross_attention:
49
+ # _target_: sam2.modeling.sam.transformer.RoPEAttention
50
  rope_theta: 10000.0
51
  feat_sizes: [64, 64]
52
  rope_k_repeat: True
sam2/modeling/memory_attention.py CHANGED
@@ -32,8 +32,8 @@ class MemoryAttentionLayer(nn.Module):
32
  self.d_model = d_model
33
  self.dim_feedforward = dim_feedforward
34
  self.dropout_value = dropout
35
- self.self_attn = self_attention
36
- self.cross_attn_image = cross_attention
37
 
38
  # Implementation of Feedforward model
39
  self.linear1 = nn.Linear(d_model, dim_feedforward)
 
32
  self.d_model = d_model
33
  self.dim_feedforward = dim_feedforward
34
  self.dropout_value = dropout
35
+ self.self_attn = RoPEAttention(**self_attention)
36
+ self.cross_attn_image = RoPEAttention(**cross_attention)
37
 
38
  # Implementation of Feedforward model
39
  self.linear1 = nn.Linear(d_model, dim_feedforward)
unipixel/model/qwen2_5_vl.py CHANGED
@@ -18,7 +18,7 @@ from sam2.modeling.sam.prompt_encoder import PromptEncoder
18
  from sam2.sam2_train import BatchedVideoDatapoint
19
 
20
 
21
- def cache_state_hook(module, inputs, ouputs=None):
22
  module.state = inputs[0] if isinstance(inputs, tuple) else inputs
23
 
24
 
 
18
  from sam2.sam2_train import BatchedVideoDatapoint
19
 
20
 
21
+ def cache_state_hook(module, inputs, outputs=None):
22
  module.state = inputs[0] if isinstance(inputs, tuple) else inputs
23
 
24