Spaces:

eybro
/

image_video_timestamp

Sleeping

File size: 6,097 Bytes

f9a8213
de4f74d
815d67b
 
b6bd42c
0f89a2a
232bcf4
2fe3715
90b017a
bd24987
c38fdbf
815d67b
bd24987
ffbab7b
 
d4c93a0
bd24987
 
 
 
f7c2be1
8a9b2cb
 
 
f9a8213
8a9f973
 
f46f75a
91f849b
8a9f973
 
3874ecc
 
 
97be3b4
3874ecc
97be3b4
 
 
3874ecc
56cb512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f4db1
a1f97f0
 
 
 
56cb512
de4f74d
815d67b
 
 
 
b6bd42c
 
de4f74d
40b6e7e
381fd53
 
 
 
 
815d67b
248b867
5cde69f
c38fdbf
 
 
5cde69f
 
90e53e5
 
5cde69f
 
 
 
de4f74d
 
 
 
 
 
 
 
 
 
 
 
 
 
815d67b
eb1decc
97be3b4
 
 
b369fe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de4f74d
eb1decc
8a9f973
 
5cde69f
 
 
8a9f973
2157496
97be3b4
2157496
8a9f973
 
d8e56db
2157496
 
8a9f973
 
49bca64
 
2157496
49bca64
2157496
09f23d8
2157496
 
 
 
8a9f973
1f43bd0
2157496
 
 
 
 
1f43bd0
 
 
 
d8e56db
1f43bd0
31fcb50
1f43bd0
 
db25dac
1063e47
db25dac
 
 
49bca64
 
2157496
97be3b4

import gradio as gr
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import cv2
from keras.models import load_model
from keras.models import Model
from datasets import load_dataset
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download
from PIL import Image

# Download and load model and encoded images
model_path = hf_hub_download(repo_id="eybro/autoencoder", filename="autoencoder_model.keras", repo_type='model')
data_path = hf_hub_download(repo_id="eybro/encoded_images", filename="X_encoded_compressed.npy", repo_type='dataset')

autoencoder = load_model(model_path)
encoded_images = np.load(data_path)

# Load and split dataset
dataset = load_dataset("eybro/images")
split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)  # 80% train, 20% test
dataset['train'] = split_dataset['train']
dataset['test'] = split_dataset['test']

# Example images
example_images = {
    "Example 1": "example_2.png",
    "Example 2": "examples/example_1.png"
}

def create_url_from_title(title: str, timestamp: int):
    video_urls = load_dataset("eybro/video_urls")
    df = video_urls['train'].to_pandas()
    print(df.to_string())
    filtered = df[df['title'] == title]
    print(filtered)
    base_url = filtered.iloc[0, :]["url"]
    return base_url + f"&t={timestamp}s"

def find_nearest_neighbors(encoded_images, input_image, top_n=5):
    """
    Find the closest neighbors to the input image in the encoded image space.
    Args:
    encoded_images (np.ndarray): Array of encoded images (shape: (n_samples, n_features)).
    input_image (np.ndarray): The encoded input image (shape: (1, n_features)).
    top_n (int): The number of nearest neighbors to return.
    Returns:
    List of tuples: (index, distance) of the top_n nearest neighbors.
    """
    # Compute pairwise distances
    distances = euclidean_distances(encoded_images, input_image.reshape(1, -1)).flatten()

    # Sort by distance
    nearest_neighbors = np.argsort(distances)[:top_n]
    return [(index, distances[index]) for index in nearest_neighbors]

def get_image(index):
    split = len(dataset["train"])
    if index < split:
        return dataset["train"][index]
    else:
        return dataset["test"][index-split]

def process_image(image):
    img = np.array(image)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    img = cv2.resize(img, (64, 64))  
    img = img.astype('float32')  
    img /= 255.0
    img = np.expand_dims(img, axis=0)

    layer_model = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output)

    encoded_array = layer_model.predict(img) 

    pooled_array = encoded_array.max(axis=-1)
    return pooled_array  # Shape: (1, n_features)
    
def inference(user_image=None, selected_example=None):

    if user_image is not None and selected_example is not None:
        return "Please upload an image or select an example image."
    elif user_image is not None:
        input_image = process_image(user_image)
    elif selected_example is not None:
        input_image = load_example(selected_example)
        input_image = process_image(input_image)
    else:
        return "Please upload an image or select an example image."

    # input_image = process_image(image)

    nearest_neighbors = find_nearest_neighbors(encoded_images, input_image, top_n=5)
    
    # Print the results
    print("Nearest neighbors (index, distance):")
    for neighbor in nearest_neighbors:
        print(neighbor)
    
    top4 = [int(i[0]) for i in nearest_neighbors[:4]]
    print(f"top 4: {top4}")
    
    for i in top4:
      im = get_image(i)
      print(im["label"], im["timestamp"])

    result_image = get_image(top4[0])
    url = create_url_from_title(result_image['label'], result_image['timestamp'])
    result = f"{result_image['label']} {result_image['timestamp']} \n{url}"
    
    n=2
    plt.figure(figsize=(8, 8))
    for i, (image1, image2) in enumerate(zip(top4[:2], top4[2:])):
        ax = plt.subplot(2, n, i + 1)
        image1 = get_image(image1)["image"]
        image2 = get_image(image2)["image"]
    
        plt.imshow(image1)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(image2)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    
    return result

def load_example(example_name):
    image_path = example_images.get(example_name)
    if image_path:
        return Image.open(image_path)
    return None
           
with gr.Blocks() as demo:
    gr.Markdown("""
        # Image to Video App
        Find your favorite Gordon Ramasay scene by uploading an image from the scene, the app will thereafter find a corresponding youtube video for that scene. 
        Or try one of our examples (unseen images for the model).
        """)

    with gr.Row():
        with gr.Column():
            inp_image = gr.Image(label="Upload Image", type="pil")
            example_selection = gr.Radio(
                choices=list(example_images.keys()),
                label="Select Example Image",
                type="value"  # Ensure single string return value
            )
            example_display = gr.Image(label="Selected Example Image", type="pil")

        with gr.Column():
            output = gr.Markdown()

    
    example_selection.change(
        lambda selected_example: load_example(selected_example),
        inputs=[example_selection],
        outputs=[example_display]
    )
    
    clear_button = gr.Button("Clear Example")
    
    clear_button.click(
        lambda: (None, None), 
        inputs=[],
        outputs=[example_selection, example_display]
    )

    submit_button = gr.Button("Submit")

    submit_button.click(
        lambda user_image, selected_example: inference(user_image=user_image, selected_example=selected_example),
        inputs=[inp_image, example_selection],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()