RobotJelly commited on
Commit
e59258f
1 Parent(s): 333423b
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -8,15 +8,16 @@ from PIL import Image
8
  from io import BytesIO
9
  import requests
10
  import gradio as gr
11
- # Load the openAI's CLIP model
12
- #model, preprocess = clip.load("ViT-B/32", jit=False)
13
- #display output photo
14
 
15
  # check if CUDA available
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
  # Load the openAI's CLIP model
19
- model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
 
 
 
20
 
21
  # taking photo IDs
22
  photo_ids = pd.read_csv("./photo_ids.csv")
@@ -44,13 +45,17 @@ def show_output_image(matched_images) :
44
  #img = Image.open('./photos/'+photo_jpg)
45
  image.append(img)
46
  return image
 
47
  # Encode and normalize the search query using CLIP
48
  def encode_search_query(search_query, model, device):
49
  with torch.no_grad():
50
- text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
51
- text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
 
52
  # Retrieve the feature vector from the GPU and convert it to a numpy array
53
- return text_encoded.cpu().numpy()
 
 
54
  # Find all matched photos
55
  def find_matches(text_features, photo_features, photo_ids, results_count=4):
56
  # Compute the similarity between the search query and each photo using the Cosine similarity
@@ -84,8 +89,10 @@ def image_search(search_text, search_image, option):
84
  elif option == "Image-To-Image":
85
  # Input Image for Search
86
  with torch.no_grad():
87
- image_feature = model.encode_image(preprocess(search_image).unsqueeze(0).to(device))
88
- image_feature = (image_feature / image_feature.norm(dim=-1, keepdim=True)).cpu().numpy()
 
 
89
  # Find the matched Images
90
  matched_images = find_matches(image_feature, photo_features, photo_ids, 4)
91
  #is_input_image = True
8
  from io import BytesIO
9
  import requests
10
  import gradio as gr
11
+ from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
 
 
12
 
13
  # check if CUDA available
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  # Load the openAI's CLIP model
17
+ #model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
18
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
19
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
20
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
21
 
22
  # taking photo IDs
23
  photo_ids = pd.read_csv("./photo_ids.csv")
45
  #img = Image.open('./photos/'+photo_jpg)
46
  image.append(img)
47
  return image
48
+
49
  # Encode and normalize the search query using CLIP
50
  def encode_search_query(search_query, model, device):
51
  with torch.no_grad():
52
+ inputs = tokenizer([search_query], padding=True, return_tensors="pt")
53
+ #text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
54
+ #text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
55
  # Retrieve the feature vector from the GPU and convert it to a numpy array
56
+ return model.get_text_features(**inputs).cpu().numpy()
57
+ #return text_encoded.cpu().numpy()
58
+
59
  # Find all matched photos
60
  def find_matches(text_features, photo_features, photo_ids, results_count=4):
61
  # Compute the similarity between the search query and each photo using the Cosine similarity
89
  elif option == "Image-To-Image":
90
  # Input Image for Search
91
  with torch.no_grad():
92
+ processed_image = processor(text=None, images=search_image, return_tensors="pt", padding=True)["pixel_values"]
93
+ image_feature = model.get_image_features(processed_image.to(device))
94
+ image_feature /= image_feature.norm(dim=-1, keepdim=True)
95
+ image_feature = image_feature.cpu().numpy()
96
  # Find the matched Images
97
  matched_images = find_matches(image_feature, photo_features, photo_ids, 4)
98
  #is_input_image = True