Spaces:

sohojoe
/

soho-clip-embeddings-explorer

Running

App Files Files Community

sohojoe commited on May 11, 2023

Commit

0441b41

•

1 Parent(s): 7bef5db

vision experiments

Browse files

Files changed (4) hide show

experimental/images/plant-001.jpeg +0 -0
experimental/images/plant-002.jpeg +0 -0
experimental/vision001.py +213 -0
experimental/vision002.py +210 -0

experimental/images/plant-001.jpeg ADDED Viewed

experimental/images/plant-002.jpeg ADDED Viewed

experimental/vision001.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from concurrent.futures import ThreadPoolExecutor, as_completed
+import json
+import os
+import time
+import numpy as np
+import requests
+import torch
+from clip_app_client import ClipAppClient
+from clip_retrieval.clip_client import ClipClient, Modality
+clip_retrieval_service_url = "https://knn.laion.ai/knn-service"
+map_clip_to_clip_retreval = {
+    "ViT-L/14": "laion5B-L-14",
+}
+def safe_url(url):
+    import urllib.parse
+    url = urllib.parse.quote(url, safe=':/')
+    # if url has two .jpg filenames, take the first one
+    if url.count('.jpg') > 0:
+        url = url.split('.jpg')[0] + '.jpg'
+    return url
+# test_image_path = os.path.join(os.getcwd(), "images", "plant-001.png")
+test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-001.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-002.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-002.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "car-002.jpeg")
+app_client = ClipAppClient()
+clip_retrieval_client = ClipClient(
+    url=clip_retrieval_service_url,
+    indice_name=map_clip_to_clip_retreval[app_client.clip_model],
+    # use_safety_model = False,
+    # use_violence_detector = False,
+    # use_mclip = False,
+    num_images = 300,
+    # modality = Modality.TEXT,
+    # modality = Modality.TEXT,
+    )
+preprocessed_image = app_client.preprocess_image(test_image_path)
+preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image)
+print (f"embeddings: {preprocessed_image_embeddings.shape}")
+embedding_as_list = preprocessed_image_embeddings[0].tolist()
+results = clip_retrieval_client.query(embedding_input=embedding_as_list)
+# hints = ""
+# for result in results:
+#     url = safe_url(result["url"])
+#     similarty = float("{:.4f}".format(result["similarity"]))
+#     title = result["caption"]
+#     print (f"{similarty} \"{title}\" {url}")
+#     if len(hints) > 0:
+#         hints += f", \"{title}\""
+#     else:
+#         hints += f"\"{title}\""
+# print("---")
+# print(hints)
+image_labels = [r['caption'] for r in results]
+image_label_vectors = [app_client.text_to_embedding(label) for label in image_labels]
+image_label_vectors = torch.cat(image_label_vectors, dim=0)
+dot_product = torch.mm(image_label_vectors, preprocessed_image_embeddings.T)
+similarity_image_label = [(float("{:.4f}".format(dot_product[i][0])), image_labels[i]) for i in range(len(image_labels))]
+similarity_image_label.sort(reverse=True)
+for similarity, image_label in similarity_image_label:
+    print (f"{similarity} {image_label}")
+print (f"----\n")
+# now do the same for images
+def _safe_image_url_to_embedding(url, safe_return):
+    try:
+        return app_client.image_url_to_embedding(url)
+    except:
+        return safe_return
+image_urls = [safe_url(r['url']) for r in results]
+image_vectors = [_safe_image_url_to_embedding(url, preprocessed_image_embeddings * 0) for url in image_urls]
+image_vectors = torch.cat(image_vectors, dim=0)
+dot_product = torch.mm(image_vectors, preprocessed_image_embeddings.T)
+similarity_image = [(float("{:.4f}".format(dot_product[i][0])), image_labels[i]) for i in range(len(image_labels))]
+similarity_image.sort(reverse=True)
+for similarity, image_label in similarity_image:
+    print (f"{similarity} {image_label}")
+def mean_template(embeddings):
+    template = torch.mean(embeddings, dim=0, keepdim=True)
+    return template
+def principal_component_analysis_template(embeddings):
+    mean = torch.mean(embeddings, dim=0)
+    embeddings_centered = embeddings - mean  # Subtract the mean
+    u, s, v = torch.svd(embeddings_centered)  # Perform SVD
+    template = u[:, 0]  # The first column of u gives the first principal component
+    return template
+def clustering_templates(embeddings, n_clusters=5):
+    from sklearn.cluster import KMeans
+    import numpy as np
+    kmeans = KMeans(n_clusters=n_clusters)
+    embeddings_np = embeddings.numpy()  # Convert to numpy
+    clusters = kmeans.fit_predict(embeddings_np)
+    templates = []
+    for cluster in np.unique(clusters):
+        cluster_mean = np.mean(embeddings_np[clusters == cluster], axis=0)
+        templates.append(torch.from_numpy(cluster_mean))  # Convert back to tensor
+    return templates
+# create a templates using clustering
+print(f"create a templates using clustering")
+merged_embeddings = torch.cat([image_label_vectors, image_vectors], dim=0)
+clusters = clustering_templates(merged_embeddings, n_clusters=5)
+# convert from list to 2d matrix
+clusters = torch.stack(clusters, dim=0)
+dot_product = torch.mm(clusters, preprocessed_image_embeddings.T)
+cluster_similarity = [(float("{:.4f}".format(dot_product[i][0])), i) for i in range(len(clusters))]
+cluster_similarity.sort(reverse=True)
+for similarity, idx in cluster_similarity:
+    print (f"{similarity} {idx}")
+# template = highest scoring cluster
+# template = clusters[cluster_similarity[0][1]]
+template = preprocessed_image_embeddings * (len(clusters)-1)
+for i in range(1, len(clusters)):
+    template -= clusters[cluster_similarity[i][1]]
+print("---")
+print(f"seaching based on template")
+results = clip_retrieval_client.query(embedding_input=template[0].tolist())
+hints = ""
+for result in results:
+    url = safe_url(result["url"])
+    similarty = float("{:.4f}".format(result["similarity"]))
+    title = result["caption"]
+    print (f"{similarty} \"{title}\" {url}")
+    if len(hints) > 0:
+        hints += f", \"{title}\""
+    else:
+        hints += f"\"{title}\""
+print(hints)
+# cluster_num = 1
+# for template in clusters:
+#     print("---")
+#     print(f"cluster {cluster_num} of {len(clusters)}")
+#     results = clip_retrieval_client.query(embedding_input=template.tolist())
+#     hints = ""
+#     for result in results:
+#         url = safe_url(result["url"])
+#         similarty = float("{:.4f}".format(result["similarity"]))
+#         title = result["caption"]
+#         print (f"{similarty} \"{title}\" {url}")
+#         if len(hints) > 0:
+#             hints += f", \"{title}\""
+#         else:
+#             hints += f"\"{title}\""
+#     print(hints)
+#     cluster_num += 1
+# create a template
+# mean
+# image_label_template = mean_template(image_label_vectors)
+# image_template = mean_template(image_vectors)
+# pca
+# image_label_template = principal_component_analysis_template(image_label_vectors)
+# image_template = principal_component_analysis_template(image_vectors)
+# clustering
+# image_label_template = clustering_template(image_label_vectors)
+# image_template = clustering_template(image_vectors)
+# take the embedding and subtract the template
+# image_label_template = preprocessed_image_embeddings - image_label_template
+# image_template = preprocessed_image_embeddings - image_template
+# image_label_template =  image_label_template - preprocessed_image_embeddings
+# image_template =  image_template - preprocessed_image_embeddings
+# normalize
+# image_label_template = image_label_template / image_label_template.norm()
+# image_template = image_template / image_template.norm()
+# results = clip_retrieval_client.query(embedding_input=image_label_template[0].tolist())
+# hints = ""
+# print("---")
+# print("average of image labels")
+# for result in results:
+#     url = safe_url(result["url"])
+#     similarty = float("{:.4f}".format(result["similarity"]))
+#     title = result["caption"]
+#     print (f"{similarty} \"{title}\" {url}")
+#     if len(hints) > 0:
+#         hints += f", \"{title}\""
+#     else:
+#         hints += f"\"{title}\""
+# print(hints)
+# print("---")
+# print("average of images")
+# results = clip_retrieval_client.query(embedding_input=image_template[0].tolist())
+# hints = ""
+# for result in results:
+#     url = safe_url(result["url"])
+#     similarty = float("{:.4f}".format(result["similarity"]))
+#     title = result["caption"]
+#     print (f"{similarty} \"{title}\" {url}")
+#     if len(hints) > 0:
+#         hints += f", \"{title}\""
+#     else:
+#         hints += f"\"{title}\""
+# print(hints)

experimental/vision002.py ADDED Viewed

	@@ -0,0 +1,210 @@

+from concurrent.futures import ThreadPoolExecutor, as_completed
+import json
+import os
+import time
+import numpy as np
+import requests
+import torch
+from clip_app_client import ClipAppClient
+from clip_retrieval.clip_client import ClipClient, Modality
+clip_retrieval_service_url = "https://knn.laion.ai/knn-service"
+map_clip_to_clip_retreval = {
+    "ViT-L/14": "laion5B-L-14",
+}
+def safe_url(url):
+    import urllib.parse
+    url = urllib.parse.quote(url, safe=':/')
+    # if url has two .jpg filenames, take the first one
+    if url.count('.jpg') > 0:
+        url = url.split('.jpg')[0] + '.jpg'
+    return url
+def _safe_image_url_to_embedding(url, safe_return):
+    try:
+        return app_client.image_url_to_embedding(url)
+    except:
+        return safe_return
+def mean_template(embeddings):
+    template = torch.mean(embeddings, dim=0, keepdim=True)
+    return template
+def principal_component_analysis_template(embeddings):
+    mean = torch.mean(embeddings, dim=0)
+    embeddings_centered = embeddings - mean  # Subtract the mean
+    u, s, v = torch.svd(embeddings_centered)  # Perform SVD
+    template = u[:, 0]  # The first column of u gives the first principal component
+    return template
+def clustering_templates(embeddings, n_clusters=5):
+    from sklearn.cluster import KMeans
+    import numpy as np
+    kmeans = KMeans(n_clusters=n_clusters)
+    embeddings_np = embeddings.numpy()  # Convert to numpy
+    clusters = kmeans.fit_predict(embeddings_np)
+    templates = []
+    for cluster in np.unique(clusters):
+        cluster_mean = np.mean(embeddings_np[clusters == cluster], axis=0)
+        templates.append(torch.from_numpy(cluster_mean))  # Convert back to tensor
+    return templates
+# test_image_path = os.path.join(os.getcwd(), "images", "plant-001.png")
+test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-001.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-002.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "plant-002.jpeg")
+# test_image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "images", "car-002.jpeg")
+app_client = ClipAppClient()
+clip_retrieval_client = ClipClient(
+    url=clip_retrieval_service_url,
+    indice_name=map_clip_to_clip_retreval[app_client.clip_model],
+    # use_safety_model = False,
+    # use_violence_detector = False,
+    # use_mclip = False,
+    # num_images = 300,
+    # modality = Modality.TEXT,
+    # modality = Modality.TEXT,
+    )
+preprocessed_image = app_client.preprocess_image(test_image_path)
+preprocessed_image_embeddings = app_client.preprocessed_image_to_embedding(preprocessed_image)
+print (f"embeddings: {preprocessed_image_embeddings.shape}")
+template = preprocessed_image_embeddings
+for step_num in range(3):
+    print (f"\n\n---- Step {step_num} ----")
+    embedding_as_list = template[0].tolist()
+    results = clip_retrieval_client.query(embedding_input=embedding_as_list)
+    # get best matching labels
+    image_labels = [r['caption'] for r in results]
+    image_label_vectors = [app_client.text_to_embedding(label) for label in image_labels]
+    image_label_vectors = torch.cat(image_label_vectors, dim=0)
+    dot_product = torch.mm(image_label_vectors, preprocessed_image_embeddings.T)
+    similarity_image_label = [(float("{:.4f}".format(dot_product[i][0])), image_labels[i]) for i in range(len(image_labels))]
+    similarity_image_label.sort(reverse=True)
+    for similarity, image_label in similarity_image_label:
+        print (f"{similarity} {image_label}")
+    # now do the same for images
+    image_urls = [safe_url(r['url']) for r in results]
+    image_vectors = [_safe_image_url_to_embedding(url, preprocessed_image_embeddings * 0) for url in image_urls]
+    image_vectors = torch.cat(image_vectors, dim=0)
+    dot_product = torch.mm(image_vectors, preprocessed_image_embeddings.T)
+    similarity_image = [(float("{:.4f}".format(dot_product[i][0])), image_labels[i]) for i in range(len(image_labels))]
+    similarity_image.sort(reverse=True)
+    for similarity, image_label in similarity_image:
+        print (f"{similarity} {image_label}")
+    # remove images with low similarity as these will be images that did not load
+    image_vectors = torch.stack([image_vectors[i] for i in range(len(image_vectors)) if similarity_image[i][0] > 0.001], dim=0)
+    # create a templates using clustering
+    print(f"create a templates using clustering")
+    merged_embeddings = torch.cat([image_label_vectors, image_vectors], dim=0)
+    # merged_embeddings = image_label_vectors # only use labels
+    # merged_embeddings = image_vectors # only use images
+    clusters = clustering_templates(merged_embeddings, n_clusters=5)
+    # convert from list to 2d matrix
+    clusters = torch.stack(clusters, dim=0)
+    dot_product = torch.mm(clusters, preprocessed_image_embeddings.T)
+    cluster_similarity = [(float("{:.4f}".format(dot_product[i][0])), i) for i in range(len(clusters))]
+    cluster_similarity.sort(reverse=True)
+    for similarity, idx in cluster_similarity:
+        print (f"{similarity} {idx}")
+    # template = highest scoring cluster
+    # template = clusters[cluster_similarity[0][1]]
+    template = preprocessed_image_embeddings * (len(clusters)-1)
+    for i in range(1, len(clusters)):
+        template -= clusters[cluster_similarity[i][1]]
+    print("---")
+    print(f"seaching based on template")
+    results = clip_retrieval_client.query(embedding_input=template[0].tolist())
+    hints = ""
+    for result in results:
+        url = safe_url(result["url"])
+        similarty = float("{:.4f}".format(result["similarity"]))
+        title = result["caption"]
+        print (f"{similarty} \"{title}\" {url}")
+        if len(hints) > 0:
+            hints += f", \"{title}\""
+        else:
+            hints += f"\"{title}\""
+    print(hints)
+# cluster_num = 1
+# for template in clusters:
+#     print("---")
+#     print(f"cluster {cluster_num} of {len(clusters)}")
+#     results = clip_retrieval_client.query(embedding_input=template.tolist())
+#     hints = ""
+#     for result in results:
+#         url = safe_url(result["url"])
+#         similarty = float("{:.4f}".format(result["similarity"]))
+#         title = result["caption"]
+#         print (f"{similarty} \"{title}\" {url}")
+#         if len(hints) > 0:
+#             hints += f", \"{title}\""
+#         else:
+#             hints += f"\"{title}\""
+#     print(hints)
+#     cluster_num += 1
+# create a template
+# mean
+# image_label_template = mean_template(image_label_vectors)
+# image_template = mean_template(image_vectors)
+# pca
+# image_label_template = principal_component_analysis_template(image_label_vectors)
+# image_template = principal_component_analysis_template(image_vectors)
+# clustering
+# image_label_template = clustering_template(image_label_vectors)
+# image_template = clustering_template(image_vectors)
+# take the embedding and subtract the template
+# image_label_template = preprocessed_image_embeddings - image_label_template
+# image_template = preprocessed_image_embeddings - image_template
+# image_label_template =  image_label_template - preprocessed_image_embeddings
+# image_template =  image_template - preprocessed_image_embeddings
+# normalize
+# image_label_template = image_label_template / image_label_template.norm()
+# image_template = image_template / image_template.norm()
+# results = clip_retrieval_client.query(embedding_input=image_label_template[0].tolist())
+# hints = ""
+# print("---")
+# print("average of image labels")
+# for result in results:
+#     url = safe_url(result["url"])
+#     similarty = float("{:.4f}".format(result["similarity"]))
+#     title = result["caption"]
+#     print (f"{similarty} \"{title}\" {url}")
+#     if len(hints) > 0:
+#         hints += f", \"{title}\""
+#     else:
+#         hints += f"\"{title}\""
+# print(hints)
+# print("---")
+# print("average of images")
+# results = clip_retrieval_client.query(embedding_input=image_template[0].tolist())
+# hints = ""
+# for result in results:
+#     url = safe_url(result["url"])
+#     similarty = float("{:.4f}".format(result["similarity"]))
+#     title = result["caption"]
+#     print (f"{similarty} \"{title}\" {url}")
+#     if len(hints) > 0:
+#         hints += f", \"{title}\""
+#     else:
+#         hints += f"\"{title}\""
+# print(hints)