Spaces:

EdwardXu
/

class_practice_1

Sleeping

App Files Files Community

EdwardXu commited on Feb 2

Commit

8ed8a0a

•

1 Parent(s): 5dfb73d

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -0

app.py CHANGED Viewed

@@ -276,3 +276,205 @@ Create a simple streamlit or equivalent webapp like the link in 5.
 This is also part of your Mini-Project 1!
 """

 This is also part of your Mini-Project 1!
 """
+def plot_piechart(sorted_cosine_scores_items):
+    sorted_cosine_scores = np.array([
+            sorted_cosine_scores_items[index][1]
+            for index in range(len(sorted_cosine_scores_items))
+        ]
+    )
+    categories = st.session_state.categories.split(" ")
+    categories_sorted = [
+        categories[sorted_cosine_scores_items[index][0]]
+        for index in range(len(sorted_cosine_scores_items))
+    ]
+    fig, ax = plt.subplots()
+    ax.pie(sorted_cosine_scores, labels=categories_sorted, autopct="%1.1f%%")
+    st.pyplot(fig)  # Figure
+def plot_piechart_helper(sorted_cosine_scores_items):
+    sorted_cosine_scores = np.array(
+        [
+            sorted_cosine_scores_items[index][1]
+            for index in range(len(sorted_cosine_scores_items))
+        ]
+    )
+    categories = st.session_state.categories.split(" ")
+    categories_sorted = [
+        categories[sorted_cosine_scores_items[index][0]]
+        for index in range(len(sorted_cosine_scores_items))
+    ]
+    fig, ax = plt.subplots(figsize=(3, 3))
+    my_explode = np.zeros(len(categories_sorted))
+    my_explode[0] = 0.2
+    if len(categories_sorted) == 3:
+        my_explode[1] = 0.1  # explode this by 0.2
+    elif len(categories_sorted) > 3:
+        my_explode[2] = 0.05
+    ax.pie(
+        sorted_cosine_scores,
+        labels=categories_sorted,
+        autopct="%1.1f%%",
+        explode=my_explode,
+    )
+    return fig
+def plot_piecharts(sorted_cosine_scores_models):
+    scores_list = []
+    categories = st.session_state.categories.split(" ")
+    index = 0
+    for model in sorted_cosine_scores_models:
+        scores_list.append(sorted_cosine_scores_models[model])
+        # scores_list[index] = np.array([scores_list[index][ind2][1] for ind2 in range(len(scores_list[index]))])
+        index += 1
+    if len(sorted_cosine_scores_models) == 2:
+        fig, (ax1, ax2) = plt.subplots(2)
+        categories_sorted = [
+            categories[scores_list[0][index][0]] for index in range(len(scores_list[0]))
+        ]
+        sorted_scores = np.array(
+            [scores_list[0][index][1] for index in range(len(scores_list[0]))]
+        )
+        ax1.pie(sorted_scores, labels=categories_sorted, autopct="%1.1f%%")
+        categories_sorted = [
+            categories[scores_list[1][index][0]] for index in range(len(scores_list[1]))
+        ]
+        sorted_scores = np.array(
+            [scores_list[1][index][1] for index in range(len(scores_list[1]))]
+        )
+        ax2.pie(sorted_scores, labels=categories_sorted, autopct="%1.1f%%")
+    st.pyplot(fig)
+def plot_alatirchart(sorted_cosine_scores_models):
+    models = list(sorted_cosine_scores_models.keys())
+    tabs = st.tabs(models)
+    figs = {}
+    for model in models:
+        figs[model] = plot_piechart_helper(sorted_cosine_scores_models[model])
+    for index in range(len(tabs)):
+        with tabs[index]:
+            st.pyplot(figs[models[index]])
+### Text Search ###
+st.sidebar.title("GloVe Twitter")
+st.sidebar.markdown(
+    """
+GloVe is an unsupervised learning algorithm for obtaining vector representations for words. Pretrained on
+2 billion tweets with vocabulary size of 1.2 million. Download from [Stanford NLP](http://nlp.stanford.edu/data/glove.twitter.27B.zip).
+Jeffrey Pennington, Richard Socher, and Christopher D. Manning. 2014. *GloVe: Global Vectors for Word Representation*.
+"""
+)
+# initialize Session State variable
+if 'categories' not in st.session_state:
+    st.session_state['categories'] = "Flowers Colors Cars Weather Food"
+if 'text_search' not in st.session_state:
+    st.session_state['text_search'] = "Roses are red, trucks are blue, and Seattle is grey right now"
+model_type = st.sidebar.selectbox("Choose the model", ("25d", "50d"), index=1)
+st.title("In Class practice 1 demo")
+st.subheader(
+    "Pass in space separated categories you want this search demo to be about."
+)
+# st.selectbox(label="Pick the categories you want this search demo to be about...",
+# options=("Flowers Colors Cars Weather Food", "Chocolate Milk", "Anger Joy Sad Frustration Worry Happiness", "Positive Negative"),
+# key="categories"
+# )
+# categories of user input
+categories = st.text_input(
+    label="Categories", value=st.session_state.categories
+)
+st.session_state.categories = categories.split(" ")
+print(st.session_state.get("categories"))
+print(type(st.session_state.get("categories")))
+# print("Categories = ", categories)
+# st.session_state.categories = categories
+st.subheader("Pass in an input word or even a sentence")
+text_search = st.text_input(
+    label="Input your sentence",
+    st.session_state.text_search,
+)
+st.session_state.text_search = text_search
+# Download glove embeddings if it doesn't exist
+embeddings_path = "embeddings_" + str(model_type) + "_temp.npy"
+word_index_dict_path = "word_index_dict_" + str(model_type) + "_temp.pkl"
+if not os.path.isfile(embeddings_path) or not os.path.isfile(word_index_dict_path):
+    print("Model type = ", model_type)
+    glove_path = "Data/glove_" + str(model_type) + ".pkl"
+    print("glove_path = ", glove_path)
+    # Download embeddings from google drive
+    with st.spinner("Downloading glove embeddings..."):
+        download_glove_embeddings_gdrive(model_type)
+# Load glove embeddings
+word_index_dict, embeddings = embeddings_model.load_glove_embeddings(model_type)
+category_embeddings = {category: embeddings_model.get_sentence_transformer_embedding(category) for category in
+                       st.session_state.categories}
+search_using_cos = Search(embeddings_model)
+# Find closest word to an input word
+if st.session_state.get("text_search"):
+    # sentence transformer  Embedding
+    print("sentence transformer  Embedding")
+    embeddings_metadata = {
+        "word_index_dict": word_index_dict,
+        "embeddings": embeddings,
+        "model_type": model_type,
+        "text_search": st.session_state.text_search
+    }
+    with st.spinner("Obtaining Cosine similarity ..."):
+        sorted_cosine_sim_transformer = search_using_cos.get_topK_similar_categories(
+            st.session_state.text_search, category_embeddings
+        )
+    # Results and Plot Pie Chart for Glove
+    print("Categories are: ", st.session_state.categories)
+    st.subheader(
+        "Closest word I have between: "
+        + st.session_state.categories
+        + " as per different Embeddings"
+    )
+    # print(sorted_cosine_sim_glove)
+    print(sorted_cosine_sim_transformer)
+    print(list(sorted_cosine_sim_transformer.keys())[0])
+    st.write(
+        f"Closest category using sentence transformer embeddings : {list(sorted_cosine_sim_transformer.keys())[0]}")
+    plot_alatirchart(
+        {
+            "sentence_transformer_384": sorted_cosine_sim_transformer,
+        }
+    )
+    st.write("")
+    st.write(
+        "Demo developed by Edward Xu"
+    )