Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,9 @@ import time
|
|
3 |
import json
|
4 |
from gensim.models import Word2Vec
|
5 |
import pandas as pd
|
6 |
-
|
7 |
-
|
|
|
8 |
|
9 |
# Define the HTML and CSS styles
|
10 |
html_temp = """
|
@@ -20,8 +21,8 @@ st.markdown(html_temp, unsafe_allow_html=True)
|
|
20 |
st.write("This is my Streamlit app with HTML and CSS formatting.")
|
21 |
|
22 |
query = st.text_input("Enter a word")
|
23 |
-
# query = input ("Enter your keyword(s):")
|
24 |
query = query.lower()
|
|
|
25 |
|
26 |
if query:
|
27 |
model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
|
@@ -39,28 +40,31 @@ if query:
|
|
39 |
print()
|
40 |
print("Similarity to " + str(query))
|
41 |
pd.set_option('display.max_rows', None)
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
data=csv,
|
47 |
-
file_name='clotting_sim1.csv',
|
48 |
-
mime='text/csv'
|
49 |
-
)
|
50 |
-
|
51 |
-
json = table.head(50).to_json(index=True).encode('utf-8')
|
52 |
-
st.download_button(
|
53 |
-
label=f"Download words similar to {query} in .js format",
|
54 |
-
data=json,
|
55 |
-
file_name='clotting_sim1.js',
|
56 |
-
mime='json'
|
57 |
-
)
|
58 |
-
|
59 |
-
print(table.head(10))
|
60 |
-
table.head(50).to_csv("clotting_sim1.csv", index=True)
|
61 |
-
table.head(50).to_json("clotting_sim1.js", index=True)
|
62 |
st.header(f"Similar Words to {query}")
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
#
|
65 |
|
66 |
print()
|
@@ -70,36 +74,44 @@ if query:
|
|
70 |
m = df1.Word.isin(df2.symbol)
|
71 |
df1 = df1[m]
|
72 |
df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
|
73 |
-
|
74 |
-
|
75 |
-
st.download_button(
|
76 |
-
label=f"Download genes similar to {query} in .csv format",
|
77 |
-
data=csv2,
|
78 |
-
file_name='clotting_sim2.csv',
|
79 |
-
mime='text/csv'
|
80 |
-
)
|
81 |
-
|
82 |
-
json2 = df1.head(50).to_json(index=True).encode('utf-8')
|
83 |
-
st.download_button(
|
84 |
-
label=f"Download words similar to {query} in .js format",
|
85 |
-
data=json2,
|
86 |
-
file_name='clotting_sim1.js',
|
87 |
-
mime='json'
|
88 |
-
)
|
89 |
-
print(df1.head(10))
|
90 |
-
df1.head(50).to_csv("clotting_sim2.csv", index=True)
|
91 |
-
df1.head(50).to_json("clotting_sim2.js", index=True)
|
92 |
print()
|
|
|
|
|
93 |
st.header(f"Similar Genes to {query}")
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
|
97 |
-
# arrow_dataset = Dataset.from_pandas(df1.head(50))
|
98 |
-
# arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2")
|
99 |
|
100 |
-
# arrow_dataset_reloaded = load_from_disk('sim2.js')
|
101 |
-
# arrow_dataset_reloaded
|
102 |
-
|
103 |
-
|
104 |
|
|
|
|
|
|
|
|
|
105 |
|
|
|
3 |
import json
|
4 |
from gensim.models import Word2Vec
|
5 |
import pandas as pd
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import squarify
|
8 |
+
import numpy as np
|
9 |
|
10 |
# Define the HTML and CSS styles
|
11 |
html_temp = """
|
|
|
21 |
st.write("This is my Streamlit app with HTML and CSS formatting.")
|
22 |
|
23 |
query = st.text_input("Enter a word")
|
|
|
24 |
query = query.lower()
|
25 |
+
# query = input ("Enter your keyword(s):")
|
26 |
|
27 |
if query:
|
28 |
model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
|
|
|
40 |
print()
|
41 |
print("Similarity to " + str(query))
|
42 |
pd.set_option('display.max_rows', None)
|
43 |
+
print(table.head(50))
|
44 |
+
table.head(10).to_csv("clotting_sim1.csv", index=True)
|
45 |
+
# short_table = table.head(50)
|
46 |
+
# print(table)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
st.header(f"Similar Words to {query}")
|
48 |
+
|
49 |
+
# calculate the sizes of the squares in the treemap
|
50 |
+
short_table = table.head(20)
|
51 |
+
short_table.index += 1
|
52 |
+
short_table.index = 1 / short_table.index
|
53 |
+
sizes = short_table.index.tolist()
|
54 |
+
|
55 |
+
cmap = plt.cm.Greens(np.linspace(0.05, .5, len(sizes)))
|
56 |
+
color = [cmap[i] for i in range(len(sizes))]
|
57 |
+
|
58 |
+
short_table.set_index('Word', inplace=True)
|
59 |
+
squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, pad=.005, text_kwargs={'fontsize': 6})
|
60 |
+
# # plot the treemap using matplotlib
|
61 |
+
plt.axis('off')
|
62 |
+
fig = plt.gcf()
|
63 |
+
# # display the treemap in Streamlit
|
64 |
+
st.pyplot(fig)
|
65 |
+
plt.clf()
|
66 |
+
|
67 |
+
# st.write(short_table)
|
68 |
#
|
69 |
|
70 |
print()
|
|
|
74 |
m = df1.Word.isin(df2.symbol)
|
75 |
df1 = df1[m]
|
76 |
df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
|
77 |
+
df1["Human Gene"] = df1["Human Gene"].str.upper()
|
78 |
+
print(df1.head(50))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
print()
|
80 |
+
df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
|
81 |
+
# time.sleep(2)
|
82 |
st.header(f"Similar Genes to {query}")
|
83 |
+
|
84 |
+
df1 = df1.head(20)
|
85 |
+
df1.index = 1/df1.index
|
86 |
+
sizes = df1.index.tolist()
|
87 |
+
|
88 |
+
cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
|
89 |
+
color2 = [cmap2[i] for i in range(len(sizes))]
|
90 |
+
|
91 |
+
df1.set_index('Human Gene', inplace=True)
|
92 |
+
squarify.plot(sizes=sizes, label=df1.index.tolist(), color=color2, pad=.005, text_kwargs={'fontsize': 8})
|
93 |
+
#
|
94 |
+
# # plot the treemap using matplotlib
|
95 |
+
|
96 |
+
plt.axis('off')
|
97 |
+
fig2 = plt.gcf()
|
98 |
+
# plt.show()
|
99 |
+
#
|
100 |
+
# # display the treemap in Streamlit
|
101 |
+
st.pyplot(fig2)
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
# findRelationships(query, df)
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
|
110 |
|
|
|
|
|
111 |
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
# model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
|
114 |
+
# similar_words = model.most_similar(word)
|
115 |
+
# output = json.dumps({"word": word, "similar_words": similar_words})
|
116 |
+
# st.write(output)
|
117 |
|