import emoji_data_python import pickle from tqdm import tqdm from sentence_transformers import SentenceTransformer import numpy as np model = SentenceTransformer('all-mpnet-base-v2') try: with open('embeddings_list.pkl', 'rb') as f: embeddings_list = pickle.load(f) except: embeddings_list = [] emojis_to_compute = [e for e in emoji_data_python.emoji_data if e.unified not in [e[0] for e in embeddings_list]] if emojis_to_compute: for e in tqdm(emojis_to_compute, desc='Computing embeddings'): strings = [n.replace('_', ' ').strip() for n in e.short_names] + [e.name.lower()] for s in strings: embedding = model.encode(s) embeddings_list.append((e.unified, embedding)) with open('embeddings_list.pkl', 'wb') as f: pickle.dump(embeddings_list, f) def closest_emoji(text): text_embedding = model.encode(text) closest_emoji = None closest_distance = np.inf for emoji, emoji_embedding in embeddings_list: distance = np.linalg.norm(text_embedding - emoji_embedding) if distance < closest_distance: closest_distance = distance closest_emoji = emoji return emoji_data_python.unified_to_char(closest_emoji) import gradio as gr emoji_input = gr.inputs.Textbox(label='text in') emoji_output = gr.outputs.Textbox(label='emoji out') iface = gr.Interface(fn=closest_emoji, inputs=emoji_input, outputs=emoji_output, title='text to emoji') iface.launch()