Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,055 Bytes
947767a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from .clip_encoder import CLIPVisionTower
def build_vision_tower(vision_tower_cfg, **kwargs):
vision_tower = getattr(
vision_tower_cfg,
"mm_vision_tower",
getattr(vision_tower_cfg, "vision_tower", None),
)
if vision_tower.startswith("openai") or vision_tower.startswith("laion"):
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
elif vision_tower == "pointcloud-perceiver":
return PointCloudPerceiverVisionTower(vision_tower, args=vision_tower_cfg)
elif vision_tower == "pointcloud-minkowski":
return PointCloudMinkowskiVisionTower(vision_tower, args=vision_tower_cfg)
elif vision_tower == "pointcloud-minkowski-mask3d":
return PointCloudMinkowskiMask3DVisionTower(vision_tower, args=vision_tower_cfg)
elif vision_tower == "bbox-ground-truth":
return None # return None so that there is no vision input to the LLM, see prepare_inputs_labels_for_multimodal() in llava_arch.py
raise ValueError(f"Unknown vision tower: {vision_tower}")
|