atrytone's picture
Update app.py
b97f7c0
raw
history blame
5.41 kB
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import torch
def create_miread_embed(sents, bundle):
tokenizer = bundle[0]
model = bundle[1]
model.cpu()
tokens = tokenizer(sents,
max_length=512,
padding=True,
truncation=True,
return_tensors="pt"
)
device = torch.device('cpu')
tokens = tokens.to(device)
with torch.no_grad():
out = model.bert(**tokens)
feature = out.last_hidden_state[:, 0, :]
return feature.cpu()
def get_matches(query):
matches = vecdb.similarity_search_with_score(query, k=60)
return matches
def inference(query):
matches = get_matches(query)
auth_counts = {}
j_bucket = {}
n_table = []
a_table = []
scores = [round(match[1].item(), 3) for match in matches]
min_score = min(scores)
max_score = max(scores)
def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
for i, match in enumerate(matches):
doc = match[0]
score = round(normaliser(round(match[1].item(), 3)), 3)
title = doc.metadata['title']
author = eval(doc.metadata['authors'])[0].title()
date = doc.metadata.get('date', 'None')
link = doc.metadata.get('link', 'None')
submitter = doc.metadata.get('submitter', 'None')
# journal = doc.metadata.get('journal', 'None').strip()
journal = doc.metadata['journal']
if (journal is None or journal.strip() == ''):
journal = 'None'
else:
journal = journal.strip()
# For journals
if journal not in j_bucket:
j_bucket[journal] = score
else:
j_bucket[journal] += score
# For authors
record = [i+1,
score,
author,
title,
link,
date]
if auth_counts.get(author, 0) < 2:
n_table.append(record)
if auth_counts.get(author, 0) == 0:
auth_counts[author] = 1
else:
auth_counts[author] += 1
# For abstracts
record = [i+1,
title,
author,
submitter,
journal,
date,
link,
score
]
a_table.append(record)
if j_bucket.get('None',None):
del j_bucket['None']
j_table = sorted([[journal, round(score, 3)] for journal,
score in j_bucket.items()],
key=lambda x: x[1], reverse=True)
j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
j_output = gr.Dataframe.update(value=j_table, visible=True)
n_output = gr.Dataframe.update(value=n_table, visible=True)
a_output = gr.Dataframe.update(value=a_table, visible=True)
return [a_output, j_output, n_output]
model_name = "biodatlab/MIReAD-Neuro-Contrastive"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
vecdb = FAISS.load_local("nbdt_contr", faiss_embedder)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# NBDT Recommendation Engine for Editors")
gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")
abst = gr.Textbox(label="Abstract", lines=10)
action_btn = gr.Button(value="Find Matches")
with gr.Tab("Authors"):
n_output = gr.Dataframe(
headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
datatype=['number', 'number', 'str', 'str', 'str', 'str'],
col_count=(6, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Abstracts"):
a_output = gr.Dataframe(
headers=['No.', 'Title', 'Author', 'Corresponding Author',
'Journal', 'Date', 'Link', 'Score'],
datatype=['number', 'str', 'str', 'str',
'str', 'str', 'str', 'number'],
col_count=(8, "fixed"),
wrap=True,
visible=False
)
with gr.Tab("Journals"):
j_output = gr.Dataframe(
headers=['No.', 'Name', 'Score'],
datatype=['number', 'str', 'number'],
col_count=(3, "fixed"),
wrap=True,
visible=False
)
action_btn.click(fn=inference,
inputs=[
abst,
],
outputs=[a_output, j_output, n_output],
api_name="neurojane")
demo.launch(debug=True)