Spaces:
Runtime error
Runtime error
File size: 1,573 Bytes
90c54f6 d88ecb8 90c54f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import os
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances
@st.cache(allow_output_mutation=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained("stanford-crfm/pubmedgpt")
model = AutoModel.from_pretrained("stanford-crfm/pubmedgpt")
return tokenizer, model
tokenizer, model = load_model()
pipe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)
def get_embedding(desc):
return np.squeeze(pipe(desc)).mean(axis=0)
st.set_page_config(
page_title="Clinical Trials Best Match [Eye Diseases]",
page_icon="🧑💻",
layout="wide",
)
# Constants
embs = []
# Heading
st.title('Clinical Trials Search')
# Gene File, 128 dim embeddings
data = np.load("data.npy")
@st.cache(allow_output_mutation=True)
def get_sim(emb_desc, data):
ids = []
scores = []
for i in data:
score = cosine_similarity(emb_desc, i['data'])
ids.append(i['ids'])
scores.append(score)
df = pd.DataFrame(data={"url": ids, "scores": scores}).sort_values(by='scores')
return df
st.subheader("🖮 Enter your clinical trial study description")
text = st.text_area('Example')
with st.spinner():
emb = get_embedding(text)
st.subheader("💻 Hit Search")
if st.button("Compute"):
with st.spinner('Searching...'):
df = get_sim(emb, data=data)
st.dataframe(df) |