Spaces:
Runtime error
Runtime error
import streamlit as st | |
import time | |
import json | |
from gensim.models import Word2Vec | |
import pandas as pd | |
from datasets import load_dataset | |
from datasets import Dataset | |
# Define the HTML and CSS styles | |
html_temp = """ | |
<div style="background-color:black;padding:10px"> | |
<h1 style="color:white;text-align:center;">My Streamlit App with HTML and CSS</h1> | |
</div> | |
""" | |
# Display the HTML and CSS styles | |
st.markdown(html_temp, unsafe_allow_html=True) | |
# Add some text to the app | |
st.write("This is my Streamlit app with HTML and CSS formatting.") | |
query = st.text_input("Enter a word") | |
# query = input ("Enter your keyword(s):") | |
query = query.lower() | |
if query: | |
model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model! | |
words = list(model.wv.key_to_index) | |
X = model.wv[model.wv.key_to_index] | |
model2 = model.wv[query] | |
df = pd.DataFrame(X) | |
# def findRelationships(query, df): | |
table = model.wv.most_similar_cosmul(query, topn=10000) | |
table = (pd.DataFrame(table)) | |
table.index.name = 'Rank' | |
table.columns = ['Word', 'SIMILARITY'] | |
print() | |
print("Similarity to " + str(query)) | |
pd.set_option('display.max_rows', None) | |
csv = table.head(50).to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label=f"Download words similar to {query} in .csv format", | |
data=csv, | |
file_name='clotting_sim1.csv', | |
mime='text/csv' | |
) | |
json = table.head(50).to_json(index=True).encode('utf-8') | |
st.download_button( | |
label=f"Download words similar to {query} in .js format", | |
data=json, | |
file_name='clotting_sim1.js', | |
mime='json' | |
) | |
print(table.head(10)) | |
table.head(50).to_csv("clotting_sim1.csv", index=True) | |
table.head(50).to_json("clotting_sim1.js", index=True) | |
st.header(f"Similar Words to {query}") | |
st.write(table.head(50)) | |
# | |
print() | |
print("Human genes similar to " + str(query)) | |
df1 = table | |
df2 = pd.read_csv('Human_Genes.csv') | |
m = df1.Word.isin(df2.symbol) | |
df1 = df1[m] | |
df1.rename(columns={'Word': 'Human Gene'}, inplace=True) | |
csv2 = df1.head(50).to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label=f"Download genes similar to {query} in .csv format", | |
data=csv2, | |
file_name='clotting_sim2.csv', | |
mime='text/csv' | |
) | |
json2 = df1.head(50).to_json(index=True).encode('utf-8') | |
st.download_button( | |
label=f"Download words similar to {query} in .js format", | |
data=json2, | |
file_name='clotting_sim1.js', | |
mime='json' | |
) | |
print(df1.head(10)) | |
df1.head(50).to_csv("clotting_sim2.csv", index=True) | |
df1.head(50).to_json("clotting_sim2.js", index=True) | |
print() | |
st.header(f"Similar Genes to {query}") | |
st.write(df1.head(50)) | |
# arrow_dataset = Dataset.from_pandas(df1.head(50)) | |
# arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2") | |
# arrow_dataset_reloaded = load_from_disk('sim2.js') | |
# arrow_dataset_reloaded | |