AmitT commited on
Commit
93a162e
โ€ข
1 Parent(s): 6238b34

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Import
2
+ ## ----------------
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from sentence_transformers import SentenceTransformer, util
6
+
7
+ ## Init
8
+ ## ----------------
9
+ # set config
10
+ st.set_page_config(layout="wide", page_title="emoji_suggestion ๐Ÿ•ต")
11
+
12
+ # load the sbert model (cache for faster loading)
13
+ @st.cache(allow_output_mutation=True)
14
+ def load_similarity_model(model_name='all-MiniLM-L6-v2'):
15
+ model = SentenceTransformer(model_name)
16
+ return model
17
+
18
+ # list of supported models
19
+ supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphrase-MiniLM-L3-v2', 'all-distilroberta-v1', 'all-mpnet-base-v2']
20
+
21
+ # read the emoji df and extract the relevant columns
22
+ emoji_df = pd.read_csv('EmojiCharts_unicodeorg.csv')[['name', 'codepoints']]
23
+
24
+ # function to encode and decode the emoji text
25
+ def encode_emoji(emoji):
26
+ emoji_text = ""
27
+ emoji = emoji.replace("U+", "")
28
+ if len(emoji) == 4:
29
+ emoji_text = f"\\U0000{emoji}"
30
+ elif len(emoji) == 5:
31
+ emoji_text = f"\\U000{emoji}"
32
+ return emoji_text.encode().decode('unicode-escape')
33
+
34
+ # find the top similar sentences
35
+ def find_similar_sentences(query, target_sentences, n=5):
36
+ # compute embeddings
37
+ embeddings_query = model.encode([query], convert_to_tensor=True)
38
+ embeddings_target = model.encode(target_sentences, convert_to_tensor=True)
39
+ cosine_scores = util.pytorch_cos_sim(embeddings_query, embeddings_target)
40
+ # return the index of top 5 values in a list
41
+ score_list = cosine_scores.tolist()[0]
42
+ top_indices = sorted(range(len(score_list)), key=lambda i: score_list[i], reverse=True)[:n]
43
+ return top_indices
44
+
45
+ # settings
46
+ selected_model_name = st.sidebar.selectbox('Similarity model', options=supported_models)
47
+ emoji_count = st.sidebar.slider('Emoji output count', min_value=1, max_value=10, value=5, step=1)
48
+
49
+ # title and headers
50
+ st.title("emoji_suggestion ๐Ÿ•ต")
51
+ st.markdown("""**Semantic Search** for Emojis, allowing you to find emojis for any situation, like when youโ€™re hungry and need an emoji-inspired meal plan for the month. just emojis hungry ๐Ÿฒ ๐Ÿฅซ ๐Ÿฝ๏ธ ๐ŸŸ ๐Ÿฅฃ ๐Ÿซ˜ ๐Ÿซ ๐ŸŒฏ ๐Ÿ˜‹ ๐Ÿง‘โ€๐Ÿผ ๐Ÿฅฎ ๐ŸŒญ""")
52
+ query_text = st.text_area("Enter your text here: ", "I am hungry")
53
+ find_button = st.button("Submit")
54
+
55
+ # load the model
56
+ model = load_similarity_model(selected_model_name)
57
+
58
+ # callback
59
+ with st.spinner("EmojiFinder is looking for clues to find the best emoji...."):
60
+ if find_button:
61
+ # fidn the top N similar sentences
62
+ top_indices = find_similar_sentences(query_text, emoji_df['name'], emoji_count)
63
+ # print the emojis
64
+ for i in top_indices:
65
+ emoji = emoji_df.iloc[i]
66
+ # prep the text
67
+ text = f'{emoji["name"]} - '
68
+ # add all of the codepoints
69
+ text += ' '.join([encode_emoji(x) for x in emoji['codepoints'].split(' ')])
70
+ st.write(text)