Tiny dummy models
Collection
Randomly initialized tiny models for debugging/testing purpose
•
65 items
•
Updated
•
4
This model is intended for debugging.
from transformers import CLIPProcessor, CLIPModel, CLIPConfig
from PIL import Image
import requests
import torch
model_id = "yujiepan/clip-vit-tiny-random-patch14-336"
model = CLIPModel.from_pretrained(model_id).cuda()
processor = CLIPProcessor.from_pretrained(model_id)
url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(requests.get(url, stream=True).raw)
text = "A description of the image"
inputs = processor(text=[text], images=image, return_tensors="pt", padding=True).to("cuda")
with torch.no_grad():
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # shape: [batch_size, num_texts]
logits_per_text = outputs.logits_per_text # shape: [batch_size, num_images]
probs = logits_per_image.softmax(dim=1) # shape: [batch_size, num_texts]
print(probs)
from transformers import CLIPProcessor, CLIPModel, CLIPConfig
from PIL import Image
import requests
import torch
model_name = "openai/clip-vit-large-patch14-336"
config = CLIPConfig.from_pretrained(model_name)
config = config.to_dict()
config["projection_dim"] = 8
config["text_config"]["hidden_size"] = 8
config["text_config"]["projection_dim"] = 8
config["text_config"]["intermediate_size"] = 16
config["text_config"]["num_hidden_layers"] = 2
config["text_config"]["num_attention_heads"] = 2
config["vision_config"]["hidden_size"] = 8
config["vision_config"]["projection_dim"] = 8
config["vision_config"]["intermediate_size"] = 16
config["vision_config"]["num_hidden_layers"] = 2
config["vision_config"]["num_attention_heads"] = 2
config = CLIPConfig.from_dict(config)
model = CLIPModel(config).half().cuda()
processor = CLIPProcessor.from_pretrained(model_name)
url = "https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png"
image = Image.open(requests.get(url, stream=True).raw)
text = "A description of the image"
inputs = processor(text=[text], images=image, return_tensors="pt", padding=True).to("cuda")
with torch.no_grad():
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image # shape: [batch_size, num_texts]
logits_per_text = outputs.logits_per_text # shape: [batch_size, num_images]
probs = logits_per_image.softmax(dim=1) # shape: [batch_size, num_texts]
print(probs)
model.push_to_hub("yujiepan/clip-vit-tiny-random-patch14-336")
processor.push_to_hub("yujiepan/clip-vit-tiny-random-patch14-336")