AmitT's picture
Update app.py
0fc79bc
## Import
## ----------------
import pandas as pd
import streamlit as st
from sentence_transformers import SentenceTransformer, util
## Init
## ----------------
# set config
st.set_page_config(layout="wide", page_title="emoji_suggestion ๐Ÿ•ต")
# load the sbert model (cache for faster loading)
@st.cache(allow_output_mutation=True)
def load_similarity_model(model_name='all-MiniLM-L6-v2'):
model = SentenceTransformer(model_name)
return model
# list of supported models
supported_models = {'English': 'all-mpnet-base-v2', 'Multilingual': 'paraphrase-multilingual-MiniLM-L12-v2'}
#supported_models = ['English', 'Multilingual']
# read the emoji df and extract the relevant columns
emoji_df = pd.read_csv('EmojiCharts_unicodeorg.csv')[['name', 'codepoints']]
# function to encode and decode the emoji text
def encode_emoji(emoji):
emoji_text = ""
emoji = emoji.replace("U+", "")
if len(emoji) == 4:
emoji_text = f"\\U0000{emoji}"
elif len(emoji) == 5:
emoji_text = f"\\U000{emoji}"
return emoji_text.encode().decode('unicode-escape')
# find the top similar sentences
def find_similar_sentences(query, target_sentences, n=2):
# compute embeddings
embeddings_query = model.encode([query], convert_to_tensor=True)
embeddings_target = model.encode(target_sentences, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(embeddings_query, embeddings_target)
# return the index of top 5 values in a list
score_list = cosine_scores.tolist()[0]
top_indices = sorted(range(len(score_list)), key=lambda i: score_list[i], reverse=True)[:n]
return top_indices
# settings
selected_model_name = supported_models.get(st.sidebar.selectbox('Similarity model', options=supported_models.keys()))
emoji_count = st.sidebar.slider('Emoji output count', min_value=1, max_value=10, value=5, step=1)
# title and headers
st.title("emoji_suggestion ๐Ÿ•ต")
st.markdown("""**Semantic Search** for Emojis, allowing you to find emojis for any situation, like when youโ€™re hungry and need an emoji-inspired meal plan for the month. just emojis hungry ๐Ÿฒ ๐Ÿฅซ ๐Ÿฝ๏ธ ๐ŸŸ ๐Ÿฅฃ ๐Ÿซ˜ ๐Ÿซ ๐ŸŒฏ ๐Ÿ˜‹ ๐Ÿง‘โ€๐Ÿผ ๐Ÿฅฎ ๐ŸŒญ""")
query_text = st.text_area("Enter your text here: ", "I am hungry")
find_button = st.button("Submit")
# load the model
model = load_similarity_model(selected_model_name)
# callback
with st.spinner("Searching..."):
if find_button:
# fidn the top N similar sentences
top_indices = find_similar_sentences(query_text, emoji_df['name'], emoji_count)
# print the emojis
for i in top_indices:
emoji = emoji_df.iloc[i]
# prep the text
#text = f'{emoji["name"]} - '
text = f'{query_text} '
# add all of the codepoints
text += ' '.join([encode_emoji(x) for x in emoji['codepoints'].split(' ')])
st.write(text)