ryaalbr commited on
Commit
54beb65
1 Parent(s): 4150f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -15,7 +15,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
15
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
16
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
17
 
18
- orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
19
 
20
 
21
  # Load the Unsplash dataset
@@ -31,15 +31,15 @@ def predict(image, labels):
31
  return {k: float(v) for k, v in zip(labels, probs[0])}
32
 
33
 
34
- def predict2(image, labels):
35
- image = orig_clip_processor(image).unsqueeze(0).to(device)
36
- text = clip.tokenize(labels).to(device)
37
- with torch.no_grad():
38
- image_features = orig_clip_model.encode_image(image)
39
- text_features = orig_clip_model.encode_text(text)
40
- logits_per_image, logits_per_text = orig_clip_model(image, text)
41
- probs = logits_per_image.softmax(dim=-1).cpu().numpy()
42
- return {k: float(v) for k, v in zip(labels, probs[0])}
43
 
44
  def rand_image():
45
  n = dataset.num_rows
@@ -64,15 +64,11 @@ with open(emb_filename, 'rb') as emb:
64
 
65
  def search(search_query):
66
 
67
-
68
-
69
-
70
-
71
  with torch.no_grad():
72
 
73
  # Encode and normalize the description using CLIP (HF CLIP)
74
- inputs = processor(text=[text], images=None, return_tensors="pt", padding=True)
75
- text_encoded = model.get_text_features(**inputs)
76
 
77
  # # Encode and normalize the description using CLIP (original CLIP)
78
  # text_encoded = orig_clip_model.encode_text(clip.tokenize(search_query))
@@ -135,7 +131,7 @@ with gr.Blocks(css=".caption-text {font-size: 40px !important;}") as demo:
135
  "day, night, dawn, dusk"], inputs=label_text)
136
  with gr.Row():
137
  with gr.Column(variant="panel"):
138
- im = gr.Image(interactive=False, type="pil").style(height=height)
139
  with gr.Row():
140
  get_btn = gr.Button("Get Random Image").style(full_width=False)
141
  reclass_btn = gr.Button("Re-Classify Image").style(full_width=False)
 
15
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
16
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
17
 
18
+ #orig_clip_model, orig_clip_processor = clip.load("ViT-B/32", device=device, jit=False)
19
 
20
 
21
  # Load the Unsplash dataset
 
31
  return {k: float(v) for k, v in zip(labels, probs[0])}
32
 
33
 
34
+ # def predict2(image, labels):
35
+ # image = orig_clip_processor(image).unsqueeze(0).to(device)
36
+ # text = clip.tokenize(labels).to(device)
37
+ # with torch.no_grad():
38
+ # image_features = orig_clip_model.encode_image(image)
39
+ # text_features = orig_clip_model.encode_text(text)
40
+ # logits_per_image, logits_per_text = orig_clip_model(image, text)
41
+ # probs = logits_per_image.softmax(dim=-1).cpu().numpy()
42
+ # return {k: float(v) for k, v in zip(labels, probs[0])}
43
 
44
  def rand_image():
45
  n = dataset.num_rows
 
64
 
65
  def search(search_query):
66
 
 
 
 
 
67
  with torch.no_grad():
68
 
69
  # Encode and normalize the description using CLIP (HF CLIP)
70
+ inputs = processor(text=search_query, images=None, return_tensors="pt", padding=True)
71
+ text_encoded = model.get_text_features(**inputs)
72
 
73
  # # Encode and normalize the description using CLIP (original CLIP)
74
  # text_encoded = orig_clip_model.encode_text(clip.tokenize(search_query))
 
131
  "day, night, dawn, dusk"], inputs=label_text)
132
  with gr.Row():
133
  with gr.Column(variant="panel"):
134
+ im = gr.Image(interactive=False).style(height=height)
135
  with gr.Row():
136
  get_btn = gr.Button("Get Random Image").style(full_width=False)
137
  reclass_btn = gr.Button("Re-Classify Image").style(full_width=False)