aaditkapoorbionlp's picture
Update app.py
d88ecb8
raw
history blame
1.57 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import os
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances
@st.cache(allow_output_mutation=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained("stanford-crfm/pubmedgpt")
model = AutoModel.from_pretrained("stanford-crfm/pubmedgpt")
return tokenizer, model
tokenizer, model = load_model()
pipe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)
def get_embedding(desc):
return np.squeeze(pipe(desc)).mean(axis=0)
st.set_page_config(
page_title="Clinical Trials Best Match [Eye Diseases]",
page_icon="๐Ÿง‘โ€๐Ÿ’ป",
layout="wide",
)
# Constants
embs = []
# Heading
st.title('Clinical Trials Search')
# Gene File, 128 dim embeddings
data = np.load("data.npy")
@st.cache(allow_output_mutation=True)
def get_sim(emb_desc, data):
ids = []
scores = []
for i in data:
score = cosine_similarity(emb_desc, i['data'])
ids.append(i['ids'])
scores.append(score)
df = pd.DataFrame(data={"url": ids, "scores": scores}).sort_values(by='scores')
return df
st.subheader("๐Ÿ–ฎ Enter your clinical trial study description")
text = st.text_area('Example')
with st.spinner():
emb = get_embedding(text)
st.subheader("๐Ÿ’ป Hit Search")
if st.button("Compute"):
with st.spinner('Searching...'):
df = get_sim(emb, data=data)
st.dataframe(df)