import gradio as gr
import torch
import os

from PIL import Image
from pathlib import Path
from more_itertools import chunked

from transformers import CLIPProcessor, CLIPModel

checkpoint = "vincentclaes/emoji-predictor"
x_, _, files = next(os.walk("./emojis"))
no_of_emojis = range(len(files))
emojis_as_images = [Image.open(f"emojis/{i}.png") for i in no_of_emojis]
K = 4

processor = CLIPProcessor.from_pretrained(checkpoint)
model = CLIPModel.from_pretrained(checkpoint)


def concat_images(*images):
    """Generate composite of all supplied images.
    https://stackoverflow.com/a/71315656/1771155
    """
    # Get the widest width.
    width = max(image.width for image in images)
    # Add up all the heights.
    height = max(image.height for image in images)
    # set the correct size of width and heigtht of composite.
    composite = Image.new('RGB', (2*width, 2*height))
    assert K == 4, "We expect 4 suggestions, other numbers won't work."
    for i, image in enumerate(images):
        if i == 0:
            composite.paste(image, (0, 0))
        elif i == 1:
            composite.paste(image, (width, 0))
        elif i == 2:
            composite.paste(image, (0, height))
        elif i == 3:
            composite.paste(image, (width, height))
    return composite


def get_emoji(text, model=model, processor=processor, emojis=emojis_as_images, K=4):
    inputs = processor(text=text, images=emojis, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)

    logits_per_text = outputs.logits_per_text
    # we take the softmax to get the label probabilities
    probs = logits_per_text.softmax(dim=1)
    # top K number of options
    predictions_suggestions_for_chunk = [torch.topk(prob, K).indices.tolist() for prob in probs][0]
    predictions_suggestions_for_chunk

    images = [Image.open(f"emojis/{i}.png") for i in predictions_suggestions_for_chunk]
    images_concat = concat_images(*images)
    return images_concat


text = gr.inputs.Textbox(placeholder="Enter a text and we will try to predict an emoji...")
title = "Predicting an Emoji"
description = """You provide a sentence and our few-shot fine tuned CLIP model will suggest 4 from the following emoji's:
\n❤️ 😍 😂 💕 🔥 😊 😎 ✨ 💙 😘 📷 🇺🇸 ☀ 💜 😉 💯 😁 🎄 📸 😜 ☹️ 😭 😔 😡 💢 😤 😳 🙃 😩 😠 🙈 🙄\n
"""
article = """
\n
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
\n
#### Let's connect on Linkedin: https://www.linkedin.com/in/vincent-claes-0b346337/
\n
# Context
I fine tuned Open Ai's CLIP model on both text (tweets) and images of emoji's!\n
The current model you can play with is fine-tuned on 15 samples per emoji.

- model: https://huggingface.co/vincentclaes/emoji-predictor \n
- dataset: https://huggingface.co/datasets/vincentclaes/emoji-predictor \n
- profile: https://huggingface.co/vincentclaes \n

# Precision

Below you can find a table with the precision for predictions and suggestions 
for a range of samples per emoji we fine-tuned CLIP on.

### Prediction vs. Suggestion
- The column "Prediction" indicates the precision for predicting the right emoji.

- Since there can be some confusion about the right emoji for a tweet,
I also tried to present 4 suggestions. If 1 of the 4 suggestions is the same as the label,
I consider it a valid prediction. See the column "Suggestion".

- Randomly predicting an emoji would have a precision of 1/32 or 0.0325.
- Randomly suggesting an emoji would have a precision of 4/32 or 0.12.


           | Samples  | Prediction  | Suggestion  |
           |--------- |------------ |------------ |
           |    0     |    0.13     |    0.33     |
           |    1     |    0.11     |    0.30     |
           |    5     |    0.14     |    0.38     |
           |    10    |    0.20     |    0.45     |
           |    15    |    0.22     |    0.51     |
           |    20    |    0.19     |    0.49     |
           |    25    |    0.24     |    0.54     |
           |    50    |    0.23     |    0.53     |
           |   100    |    0.25     |    0.57     |
           |   250    |    0.29     |    0.62     |
           |   500    |    0.29     |    0.63     |
           
           
"""
examples = [
    "I'm so happy for you!",
    "I'm not feeling great today.",
    "This makes me angry!",
    "Can I follow you?",
    "I'm so bored right now ...",
]
gr.Interface(fn=get_emoji, inputs=text, outputs=gr.Image(shape=(72,72)), 
             examples=examples, title=title, description=description,
             article=article).launch()