File size: 4,660 Bytes
351bc1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import torch
import os

from PIL import Image
from pathlib import Path
from more_itertools import chunked

from transformers import CLIPProcessor, CLIPModel

checkpoint = "vincentclaes/emoji-predictor"
x_, _, files = next(os.walk("./emojis"))
no_of_emojis = range(len(files))
emojis_as_images = [Image.open(f"emojis/{i}.png") for i in no_of_emojis]
K = 4

processor = CLIPProcessor.from_pretrained(checkpoint)
model = CLIPModel.from_pretrained(checkpoint)


def concat_images(*images):
    """Generate composite of all supplied images.
    https://stackoverflow.com/a/71315656/1771155
    """
    # Get the widest width.
    width = max(image.width for image in images)
    # Add up all the heights.
    height = max(image.height for image in images)
    # set the correct size of width and heigtht of composite.
    composite = Image.new('RGB', (2*width, 2*height))
    assert K == 4, "We expect 4 suggestions, other numbers won't work."
    for i, image in enumerate(images):
        if i == 0:
            composite.paste(image, (0, 0))
        elif i == 1:
            composite.paste(image, (width, 0))
        elif i == 2:
            composite.paste(image, (0, height))
        elif i == 3:
            composite.paste(image, (width, height))
    return composite


def get_emoji(text, model=model, processor=processor, emojis=emojis_as_images, K=4):
    inputs = processor(text=text, images=emojis, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)

    logits_per_text = outputs.logits_per_text
    # we take the softmax to get the label probabilities
    probs = logits_per_text.softmax(dim=1)
    # top K number of options
    predictions_suggestions_for_chunk = [torch.topk(prob, K).indices.tolist() for prob in probs][0]
    predictions_suggestions_for_chunk

    images = [Image.open(f"emojis/{i}.png") for i in predictions_suggestions_for_chunk]
    images_concat = concat_images(*images)
    return images_concat


text = gr.inputs.Textbox(placeholder="Enter a text and we will try to predict an emoji...")
title = "Predicting an Emoji"
description = """You provide a sentence and our few-shot fine tuned CLIP model will suggest 4 from the following emoji's:
\n❀️ 😍 πŸ˜‚ πŸ’• πŸ”₯ 😊 😎 ✨ πŸ’™ 😘 πŸ“· πŸ‡ΊπŸ‡Έ β˜€ πŸ’œ πŸ˜‰ πŸ’― 😁 πŸŽ„ πŸ“Έ 😜 ☹️ 😭 πŸ˜” 😑 πŸ’’ 😀 😳 πŸ™ƒ 😩 😠 πŸ™ˆ πŸ™„\n
"""
article = """
\n
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
\n
#### Let's connect on Linkedin: https://www.linkedin.com/in/vincent-claes-0b346337/
\n
# Context
I fine tuned Open Ai's CLIP model on both text (tweets) and images of emoji's!\n
The current model you can play with is fine-tuned on 15 samples per emoji.

- model: https://huggingface.co/vincentclaes/emoji-predictor \n
- dataset: https://huggingface.co/datasets/vincentclaes/emoji-predictor \n
- profile: https://huggingface.co/vincentclaes \n

# Precision

Below you can find a table with the precision for predictions and suggestions 
for a range of samples per emoji we fine-tuned CLIP on.

### Prediction vs. Suggestion
- The column "Prediction" indicates the precision for predicting the right emoji.

- Since there can be some confusion about the right emoji for a tweet,
I also tried to present 4 suggestions. If 1 of the 4 suggestions is the same as the label,
I consider it a valid prediction. See the column "Suggestion".

- Randomly predicting an emoji would have a precision of 1/32 or 0.0325.
- Randomly suggesting an emoji would have a precision of 4/32 or 0.12.


           | Samples  | Prediction  | Suggestion  |
           |--------- |------------ |------------ |
           |    0     |    0.13     |    0.33     |
           |    1     |    0.11     |    0.30     |
           |    5     |    0.14     |    0.38     |
           |    10    |    0.20     |    0.45     |
           |    15    |    0.22     |    0.51     |
           |    20    |    0.19     |    0.49     |
           |    25    |    0.24     |    0.54     |
           |    50    |    0.23     |    0.53     |
           |   100    |    0.25     |    0.57     |
           |   250    |    0.29     |    0.62     |
           |   500    |    0.29     |    0.63     |
           
           


"""
examples = [
    "I'm so happy for you!",
    "I'm not feeling great today.",
    "This makes me angry!",
    "Can I follow you?",
    "I'm so bored right now ...",
]
gr.Interface(fn=get_emoji, inputs=text, outputs=gr.Image(shape=(72,72)), 
             examples=examples, title=title, description=description,
             article=article).launch()