Spaces:
Running
on
Zero
Running
on
Zero
Fix model loading
Browse files- app.py +0 -3
- sam2/configs/sam2.1_hiera_b+.yaml +2 -2
- sam2/modeling/memory_attention.py +2 -2
- unipixel/model/qwen2_5_vl.py +1 -1
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
# Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
|
| 2 |
|
| 3 |
-
import os
|
| 4 |
import re
|
| 5 |
import uuid
|
| 6 |
from functools import partial
|
|
@@ -20,8 +19,6 @@ from unipixel.utils.io import load_image, load_video
|
|
| 20 |
from unipixel.utils.transforms import get_sam2_transform
|
| 21 |
from unipixel.utils.visualizer import draw_mask, sample_color
|
| 22 |
|
| 23 |
-
PATH = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
| 24 |
-
|
| 25 |
MODEL = 'PolyU-ChenLab/UniPixel-3B'
|
| 26 |
|
| 27 |
TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
|
|
|
|
| 1 |
# Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause license.
|
| 2 |
|
|
|
|
| 3 |
import re
|
| 4 |
import uuid
|
| 5 |
from functools import partial
|
|
|
|
| 19 |
from unipixel.utils.transforms import get_sam2_transform
|
| 20 |
from unipixel.utils.visualizer import draw_mask, sample_color
|
| 21 |
|
|
|
|
|
|
|
| 22 |
MODEL = 'PolyU-ChenLab/UniPixel-3B'
|
| 23 |
|
| 24 |
TITLE = 'UniPixel: Unified Object Referring and Segmentation for Pixel-Level Visual Reasoning'
|
sam2/configs/sam2.1_hiera_b+.yaml
CHANGED
|
@@ -35,7 +35,7 @@ model:
|
|
| 35 |
dropout: 0.1
|
| 36 |
pos_enc_at_attn: false
|
| 37 |
self_attention:
|
| 38 |
-
_target_: sam2.modeling.sam.transformer.RoPEAttention
|
| 39 |
rope_theta: 10000.0
|
| 40 |
feat_sizes: [64, 64]
|
| 41 |
embedding_dim: 256
|
|
@@ -46,7 +46,7 @@ model:
|
|
| 46 |
pos_enc_at_cross_attn_keys: true
|
| 47 |
pos_enc_at_cross_attn_queries: false
|
| 48 |
cross_attention:
|
| 49 |
-
_target_: sam2.modeling.sam.transformer.RoPEAttention
|
| 50 |
rope_theta: 10000.0
|
| 51 |
feat_sizes: [64, 64]
|
| 52 |
rope_k_repeat: True
|
|
|
|
| 35 |
dropout: 0.1
|
| 36 |
pos_enc_at_attn: false
|
| 37 |
self_attention:
|
| 38 |
+
# _target_: sam2.modeling.sam.transformer.RoPEAttention
|
| 39 |
rope_theta: 10000.0
|
| 40 |
feat_sizes: [64, 64]
|
| 41 |
embedding_dim: 256
|
|
|
|
| 46 |
pos_enc_at_cross_attn_keys: true
|
| 47 |
pos_enc_at_cross_attn_queries: false
|
| 48 |
cross_attention:
|
| 49 |
+
# _target_: sam2.modeling.sam.transformer.RoPEAttention
|
| 50 |
rope_theta: 10000.0
|
| 51 |
feat_sizes: [64, 64]
|
| 52 |
rope_k_repeat: True
|
sam2/modeling/memory_attention.py
CHANGED
|
@@ -32,8 +32,8 @@ class MemoryAttentionLayer(nn.Module):
|
|
| 32 |
self.d_model = d_model
|
| 33 |
self.dim_feedforward = dim_feedforward
|
| 34 |
self.dropout_value = dropout
|
| 35 |
-
self.self_attn = self_attention
|
| 36 |
-
self.cross_attn_image = cross_attention
|
| 37 |
|
| 38 |
# Implementation of Feedforward model
|
| 39 |
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
|
|
|
| 32 |
self.d_model = d_model
|
| 33 |
self.dim_feedforward = dim_feedforward
|
| 34 |
self.dropout_value = dropout
|
| 35 |
+
self.self_attn = RoPEAttention(**self_attention)
|
| 36 |
+
self.cross_attn_image = RoPEAttention(**cross_attention)
|
| 37 |
|
| 38 |
# Implementation of Feedforward model
|
| 39 |
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
unipixel/model/qwen2_5_vl.py
CHANGED
|
@@ -18,7 +18,7 @@ from sam2.modeling.sam.prompt_encoder import PromptEncoder
|
|
| 18 |
from sam2.sam2_train import BatchedVideoDatapoint
|
| 19 |
|
| 20 |
|
| 21 |
-
def cache_state_hook(module, inputs,
|
| 22 |
module.state = inputs[0] if isinstance(inputs, tuple) else inputs
|
| 23 |
|
| 24 |
|
|
|
|
| 18 |
from sam2.sam2_train import BatchedVideoDatapoint
|
| 19 |
|
| 20 |
|
| 21 |
+
def cache_state_hook(module, inputs, outputs=None):
|
| 22 |
module.state = inputs[0] if isinstance(inputs, tuple) else inputs
|
| 23 |
|
| 24 |
|