import matplotlib matplotlib.use('Agg') import gradio as gr import tensorflow as tf from huggingface_hub import from_pretrained_keras import numpy as np from collections import defaultdict import matplotlib.pyplot as plt import plotly.express as px from plotly import subplots import pandas as pd import random (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() x_data = np.concatenate([x_train, x_test]) y_data = np.concatenate([y_train, y_test]) num_classes = 10 classes = [ "airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck", ] clustering_model = from_pretrained_keras("johko/semantic-image-clustering") # Get the cluster probability distribution of the input images. clustering_probs = clustering_model.predict(x_data, batch_size=500, verbose=1) # Get the cluster of the highest probability. cluster_assignments = tf.math.argmax(clustering_probs, axis=-1).numpy() # Store the clustering confidence. # Images with the highest clustering confidence are considered the 'prototypes' # of the clusters. cluster_confidence = tf.math.reduce_max(clustering_probs, axis=-1).numpy() clusters = defaultdict(list) for idx, c in enumerate(cluster_assignments): clusters[c].append((idx, cluster_confidence[idx])) def get_cluster_size(cluster_number: int): cluster_size = len(clusters[cluster_number-1]) return f"Cluster #{cluster_number} consists of {cluster_size} objects" def get_images_from_cluster(cluster_number: int, num_images: int, image_mode: str): position = 1 if image_mode == "Random Images from Cluster": cluster_instances = clusters[cluster_number-1] random.shuffle(cluster_instances) else : cluster_instances = sorted(clusters[cluster_number-1], key=lambda kv: kv[1], reverse=True) fig = plt.figure() for j in range(num_images): image_idx = cluster_instances[j][0] plt.subplot(1, num_images, position) plt.imshow(x_data[image_idx].astype("uint8")) plt.title(classes[y_data[image_idx][0]]) plt.axis("off") position += 1 fig.tight_layout() return fig # labels = [] # images = [] # for j in range(num_images): # image_idx = cluster_instances[j][0] # images.append(x_data[image_idx].astype("uint8")) # labels.append(classes[y_data[image_idx][0]]) # fig = subplots.make_subplots(rows=int(num_images/4)+1, cols=4, subplot_titles=labels) # for j in range(num_images): # fig.add_trace(px.imshow(images[j]).data[0], row=int(j/4)+1, col=j%4+1) # fig.update_xaxes(visible=False) # fig.update_yaxes(visible=False) # return fig def get_cluster_details(cluster_number: int): cluster_label_counts = list() cluster_label_counts = [0] * num_classes instances = clusters[cluster_number-1] for i, _ in instances: cluster_label_counts[y_data[i][0]] += 1 class_count = zip(classes, cluster_label_counts) class_count_dict = dict(class_count) count_df = pd.Series(class_count_dict).to_frame() fig_pie = px.pie(count_df, values=0, names=count_df.index, title='Number of class objects in cluster') return fig_pie def get_cluster_info(cluster_number: int, num_images: int, image_mode: str): cluster_size = get_cluster_size(cluster_number) img_fig = get_images_from_cluster(cluster_number, num_images, image_mode) detail_fig = get_cluster_details(cluster_number) return [cluster_size, img_fig, detail_fig] article = """