volumetrader's picture
Add video url in output
54a82f6 verified
raw
history blame
3.64 kB
import gradio as gr
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import cv2
from keras.models import load_model
from keras.models import Model
from datasets import load_dataset
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
autoencoder = load_model("autoencoder_model.keras")
encoded_images = np.load("X_encoded_compressed.npy")
dataset = load_dataset("eybro/images-split")
num_clusters = 10 # Choose the number of clusters
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(encoded_images)
def create_url_from_title(title: str, timestamp: int):
video_urls = load_dataset("eybro/video_urls")
df = video_urls['train'].to_pandas()
filtered = df[df['title'] == title]
base_url = df["url"][0]
return base_url + f"?t={timestamp}s"
def find_nearest_neighbors(encoded_images, input_image, top_n=5):
"""
Find the closest neighbors to the input image in the encoded image space.
Args:
encoded_images (np.ndarray): Array of encoded images (shape: (n_samples, n_features)).
input_image (np.ndarray): The encoded input image (shape: (1, n_features)).
top_n (int): The number of nearest neighbors to return.
Returns:
List of tuples: (index, distance) of the top_n nearest neighbors.
"""
# Compute pairwise distances
distances = euclidean_distances(encoded_images, input_image.reshape(1, -1)).flatten()
# Sort by distance
nearest_neighbors = np.argsort(distances)[:top_n]
return [(index, distances[index]) for index in nearest_neighbors]
def get_image(index):
split = len(dataset["train"])
if index < split:
return dataset["train"][index]
else:
return dataset["test"][index-split]
def process_image(image):
img = np.array(image)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (64, 64))
img = img.astype('float32')
img /= 255.0
img = np.expand_dims(img, axis=0)
layer_model = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output)
encoded_array = layer_model.predict(img)
pooled_array = encoded_array.max(axis=-1)
return pooled_array # Shape: (1, n_features)
def inference(image):
""""""
input_image = process_image(image)
# input_image = encoded_images[2000]
nearest_neighbors = find_nearest_neighbors(encoded_images, input_image, top_n=5)
# Print the results
print("Nearest neighbors (index, distance):")
for neighbor in nearest_neighbors:
print(neighbor)
top4 = [int(i[0]) for i in nearest_neighbors[:4]]
print(f"top 4: {top4}")
for i in top4:
im = get_image(i)
print(im["label"], im["timestamp"])
result_image = get_image(top4[0])
result = f"{result_image['label']} {result_image['timestamp']} \n{create_url_from_title(result_image['label'], result_image['timestamp'])}"
n=2
plt.figure(figsize=(8, 8))
for i, (image1, image2) in enumerate(zip(top4[:2], top4[2:])):
ax = plt.subplot(2, n, i + 1)
image1 = get_image(image1)["image"]
image2 = get_image(image2)["image"]
plt.imshow(image1)
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(image2)
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
return result
demo = gr.Interface(fn=inference,
inputs=gr.Image(label='Upload image'),
outputs="text")
demo.launch()