Spaces:

jfataphd
/

OncoDigger

Runtime error

File size: 3,336 Bytes

import streamlit as st
import time
import json
from gensim.models import Word2Vec
import pandas as pd
import matplotlib.pyplot as plt
import squarify
import numpy as np

# Define the HTML and CSS styles
st.markdown(
    """
    <style>
    body {
        background-color: #000000;
        # color: #ffffff;
    }
    .stApp {
        background-color: #000000;
        # color: #ffffff;
    }
    </style>
    """,
    unsafe_allow_html=True
)

st.header(":white[Word2Vec App for Clotting Pubmed Database.]")

text_input_value = st.text_input(":white[Enter some text]")
query = text_input_value
query = query.lower()
# query = input ("Enter your keyword(s):")

if query:
    model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
    words = list(model.wv.key_to_index)
    X = model.wv[model.wv.key_to_index]
    model2 = model.wv[query]
    df = pd.DataFrame(X)


# def findRelationships(query, df):
    table = model.wv.most_similar_cosmul(query, topn=10000)
    table = (pd.DataFrame(table))
    table.index.name = 'Rank'
    table.columns = ['Word', 'SIMILARITY']
    print()
    print("Similarity to " + str(query))
    pd.set_option('display.max_rows', None)
    print(table.head(50))
    table.head(10).to_csv("clotting_sim1.csv", index=True)
    # short_table = table.head(50)
    # print(table)
    st.subheader(f":white[Similar Words to {query}]")

    # calculate the sizes of the squares in the treemap
    short_table = table.head(20)
    short_table.index += 1
    short_table.index = 1 / short_table.index
    sizes = short_table.index.tolist()

    cmap = plt.cm.Greens(np.linspace(0.05, .5, len(sizes)))
    color = [cmap[i] for i in range(len(sizes))]

    short_table.set_index('Word', inplace=True)
    squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, pad=.005, text_kwargs={'fontsize': 6})
    # # plot the treemap using matplotlib
    plt.axis('off')
    fig = plt.gcf()
    # # display the treemap in Streamlit
    st.pyplot(fig)
    plt.clf()

    # st.write(short_table)
    #

    print()
    print("Human genes similar to " + str(query))
    df1 = table
    df2 = pd.read_csv('Human_Genes.csv')
    m = df1.Word.isin(df2.symbol)
    df1 = df1[m]
    df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
    df1["Human Gene"] = df1["Human Gene"].str.upper()
    print(df1.head(50))
    print()
    df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
    # time.sleep(2)
    st.subheader(f":white[Similar Genes to {query}]")

    df1 = df1.head(20)
    df1.index = 1/df1.index
    sizes = df1.index.tolist()

    cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
    color2 = [cmap2[i] for i in range(len(sizes))]

    df1.set_index('Human Gene', inplace=True)
    squarify.plot(sizes=sizes, label=df1.index.tolist(), color=color2, pad=.005, text_kwargs={'fontsize': 8})
    #
    # # plot the treemap using matplotlib

    plt.axis('off')
    fig2 = plt.gcf()
    # plt.show()
    #
    # # display the treemap in Streamlit
    st.pyplot(fig2)



# findRelationships(query, df)







# model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
# similar_words = model.most_similar(word)
# output = json.dumps({"word": word, "similar_words": similar_words})
# st.write(output)