see-2-sound

Sleeping

App Files Files Community

see-2-sound / torchhub /facebookresearch_dinov2_main /hubconf.py

rishitdagli

push a demo

46ff99b 5 months ago

raw

history blame

5.4 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	#
	# This source code is licensed under the Apache License, Version 2.0
	# found in the LICENSE file in the root directory of this source tree.

	from enum import Enum
	from typing import Union

	import torch

	_DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"


	def _make_dinov2_model_name(
	arch_name: str, patch_size: int, num_register_tokens: int = 0
	) -> str:
	compact_arch_name = arch_name.replace("_", "")[:4]
	registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
	return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"


	class Weights(Enum):
	LVD142M = "LVD142M"


	def _make_dinov2_model(
	*,
	arch_name: str = "vit_large",
	img_size: int = 518,
	patch_size: int = 14,
	init_values: float = 1.0,
	ffn_layer: str = "mlp",
	block_chunks: int = 0,
	num_register_tokens: int = 0,
	interpolate_antialias: bool = False,
	interpolate_offset: float = 0.1,
	pretrained: bool = True,
	weights: Union[Weights, str] = Weights.LVD142M,
	**kwargs,
	):
	import vision_transformer as vits

	if isinstance(weights, str):
	try:
	weights = Weights[weights]
	except KeyError:
	raise AssertionError(f"Unsupported weights: {weights}")

	model_base_name = _make_dinov2_model_name(arch_name, patch_size)
	vit_kwargs = dict(
	img_size=img_size,
	patch_size=patch_size,
	init_values=init_values,
	ffn_layer=ffn_layer,
	block_chunks=block_chunks,
	num_register_tokens=num_register_tokens,
	interpolate_antialias=interpolate_antialias,
	interpolate_offset=interpolate_offset,
	)
	vit_kwargs.update(**kwargs)
	model = vits.__dict__[arch_name](**vit_kwargs)

	if pretrained:
	model_full_name = _make_dinov2_model_name(
	arch_name, patch_size, num_register_tokens
	)
	url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth"
	state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu")
	model.load_state_dict(state_dict, strict=True)

	return model


	def dinov2_vits14(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs
	)


	def dinov2_vitb14(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs
	)


	def dinov2_vitl14(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs
	)


	def dinov2_vitg14(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_giant2",
	ffn_layer="swiglufused",
	weights=weights,
	pretrained=pretrained,
	**kwargs,
	)


	def dinov2_vits14_reg(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_small",
	pretrained=pretrained,
	weights=weights,
	num_register_tokens=4,
	interpolate_antialias=True,
	interpolate_offset=0.0,
	**kwargs,
	)


	def dinov2_vitb14_reg(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_base",
	pretrained=pretrained,
	weights=weights,
	num_register_tokens=4,
	interpolate_antialias=True,
	interpolate_offset=0.0,
	**kwargs,
	)


	def dinov2_vitl14_reg(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_large",
	pretrained=pretrained,
	weights=weights,
	num_register_tokens=4,
	interpolate_antialias=True,
	interpolate_offset=0.0,
	**kwargs,
	)


	def dinov2_vitg14_reg(
	, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, *kwargs
	):
	"""
	DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset.
	"""
	return _make_dinov2_model(
	arch_name="vit_giant2",
	ffn_layer="swiglufused",
	weights=weights,
	pretrained=pretrained,
	num_register_tokens=4,
	interpolate_antialias=True,
	interpolate_offset=0.0,
	**kwargs,
	)