Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Import
|
2 |
+
## ----------------
|
3 |
+
import pandas as pd
|
4 |
+
import streamlit as st
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
|
7 |
+
## Init
|
8 |
+
## ----------------
|
9 |
+
# set config
|
10 |
+
st.set_page_config(layout="wide", page_title="emoji_suggestion ๐ต")
|
11 |
+
|
12 |
+
# load the sbert model (cache for faster loading)
|
13 |
+
@st.cache(allow_output_mutation=True)
|
14 |
+
def load_similarity_model(model_name='all-MiniLM-L6-v2'):
|
15 |
+
model = SentenceTransformer(model_name)
|
16 |
+
return model
|
17 |
+
|
18 |
+
# list of supported models
|
19 |
+
supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphrase-MiniLM-L3-v2', 'all-distilroberta-v1', 'all-mpnet-base-v2']
|
20 |
+
|
21 |
+
# read the emoji df and extract the relevant columns
|
22 |
+
emoji_df = pd.read_csv('EmojiCharts_unicodeorg.csv')[['name', 'codepoints']]
|
23 |
+
|
24 |
+
# function to encode and decode the emoji text
|
25 |
+
def encode_emoji(emoji):
|
26 |
+
emoji_text = ""
|
27 |
+
emoji = emoji.replace("U+", "")
|
28 |
+
if len(emoji) == 4:
|
29 |
+
emoji_text = f"\\U0000{emoji}"
|
30 |
+
elif len(emoji) == 5:
|
31 |
+
emoji_text = f"\\U000{emoji}"
|
32 |
+
return emoji_text.encode().decode('unicode-escape')
|
33 |
+
|
34 |
+
# find the top similar sentences
|
35 |
+
def find_similar_sentences(query, target_sentences, n=5):
|
36 |
+
# compute embeddings
|
37 |
+
embeddings_query = model.encode([query], convert_to_tensor=True)
|
38 |
+
embeddings_target = model.encode(target_sentences, convert_to_tensor=True)
|
39 |
+
cosine_scores = util.pytorch_cos_sim(embeddings_query, embeddings_target)
|
40 |
+
# return the index of top 5 values in a list
|
41 |
+
score_list = cosine_scores.tolist()[0]
|
42 |
+
top_indices = sorted(range(len(score_list)), key=lambda i: score_list[i], reverse=True)[:n]
|
43 |
+
return top_indices
|
44 |
+
|
45 |
+
# settings
|
46 |
+
selected_model_name = st.sidebar.selectbox('Similarity model', options=supported_models)
|
47 |
+
emoji_count = st.sidebar.slider('Emoji output count', min_value=1, max_value=10, value=5, step=1)
|
48 |
+
|
49 |
+
# title and headers
|
50 |
+
st.title("emoji_suggestion ๐ต")
|
51 |
+
st.markdown("""**Semantic Search** for Emojis, allowing you to find emojis for any situation, like when youโre hungry and need an emoji-inspired meal plan for the month. just emojis hungry ๐ฒ ๐ฅซ ๐ฝ๏ธ ๐ ๐ฅฃ ๐ซ ๐ซ ๐ฏ ๐ ๐งโ๐ผ ๐ฅฎ ๐ญ""")
|
52 |
+
query_text = st.text_area("Enter your text here: ", "I am hungry")
|
53 |
+
find_button = st.button("Submit")
|
54 |
+
|
55 |
+
# load the model
|
56 |
+
model = load_similarity_model(selected_model_name)
|
57 |
+
|
58 |
+
# callback
|
59 |
+
with st.spinner("EmojiFinder is looking for clues to find the best emoji...."):
|
60 |
+
if find_button:
|
61 |
+
# fidn the top N similar sentences
|
62 |
+
top_indices = find_similar_sentences(query_text, emoji_df['name'], emoji_count)
|
63 |
+
# print the emojis
|
64 |
+
for i in top_indices:
|
65 |
+
emoji = emoji_df.iloc[i]
|
66 |
+
# prep the text
|
67 |
+
text = f'{emoji["name"]} - '
|
68 |
+
# add all of the codepoints
|
69 |
+
text += ' '.join([encode_emoji(x) for x in emoji['codepoints'].split(' ')])
|
70 |
+
st.write(text)
|