Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,6 +40,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
40 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
41 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
42 |
|
|
|
43 |
#orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
|
44 |
|
45 |
|
@@ -58,7 +59,7 @@ height = 256 # height for resizing images
|
|
58 |
|
59 |
def predict(image, labels):
|
60 |
with torch.no_grad():
|
61 |
-
inputs = clip_processor(text=[f"a photo of {c}" for c in labels], images=image, return_tensors="pt", padding=True)
|
62 |
outputs = clip_model(**inputs)
|
63 |
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
64 |
probs = logits_per_image.softmax(dim=1).cpu().numpy() # we can take the softmax to get the label probabilities
|
@@ -144,7 +145,7 @@ def get_caption(img,model_name):
|
|
144 |
model = model.eval()
|
145 |
model = model.to(device)
|
146 |
|
147 |
-
|
148 |
|
149 |
input = clip_processor(images=img, return_tensors="pt").to(device)
|
150 |
with torch.no_grad():
|
@@ -181,7 +182,7 @@ def search(search_query):
|
|
181 |
with torch.no_grad():
|
182 |
|
183 |
# Encode and normalize the description using CLIP (HF CLIP)
|
184 |
-
inputs = clip_processor(text=search_query, images=None, return_tensors="pt", padding=True)
|
185 |
text_encoded = clip_model.get_text_features(**inputs)
|
186 |
|
187 |
# # Encode and normalize the description using CLIP (original CLIP)
|
|
|
40 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
41 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
42 |
|
43 |
+
clip_model = clip_model.to(device)
|
44 |
#orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
|
45 |
|
46 |
|
|
|
59 |
|
60 |
def predict(image, labels):
|
61 |
with torch.no_grad():
|
62 |
+
inputs = clip_processor(text=[f"a photo of {c}" for c in labels], images=image, return_tensors="pt", padding=True).to(device)
|
63 |
outputs = clip_model(**inputs)
|
64 |
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
65 |
probs = logits_per_image.softmax(dim=1).cpu().numpy() # we can take the softmax to get the label probabilities
|
|
|
145 |
model = model.eval()
|
146 |
model = model.to(device)
|
147 |
|
148 |
+
|
149 |
|
150 |
input = clip_processor(images=img, return_tensors="pt").to(device)
|
151 |
with torch.no_grad():
|
|
|
182 |
with torch.no_grad():
|
183 |
|
184 |
# Encode and normalize the description using CLIP (HF CLIP)
|
185 |
+
inputs = clip_processor(text=search_query, images=None, return_tensors="pt", padding=True).to(device)
|
186 |
text_encoded = clip_model.get_text_features(**inputs)
|
187 |
|
188 |
# # Encode and normalize the description using CLIP (original CLIP)
|