Spaces:

biodatlab
/

MEDLINE-Reviewer-RecSys

Runtime error

App Files Files Community

MEDLINE-Reviewer-RecSys / app.py

atrytone

Update app.py

4bc6ef5 over 1 year ago

raw

history blame

5.41 kB

	import gradio as gr
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	import torch


	def create_miread_embed(sents, bundle):
	tokenizer = bundle[0]
	model = bundle[1]
	model.cpu()
	tokens = tokenizer(sents,
	max_length=512,
	padding=True,
	truncation=True,
	return_tensors="pt"
	)
	device = torch.device('cpu')
	tokens = tokens.to(device)
	with torch.no_grad():
	out = model.bert(**tokens)
	feature = out.last_hidden_state[:, 0, :]
	return feature.cpu()


	def get_matches(query):
	matches = vecdb.similarity_search_with_score(query, k=60)
	return matches


	def inference(query):
	matches = get_matches(query)
	auth_counts = {}
	j_bucket = {}
	n_table = []
	a_table = []
	scores = [round(match[1].item(), 3) for match in matches]
	min_score = min(scores)
	max_score = max(scores)
	def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
	for i, match in enumerate(matches):
	doc = match[0]
	score = round(normaliser(round(match[1].item(), 3)), 3)
	title = doc.metadata['title']
	author = eval(doc.metadata['authors'])[0].title()
	date = doc.metadata.get('date', 'None')
	link = doc.metadata.get('link', 'None')
	submitter = doc.metadata.get('submitter', 'None')
	# journal = doc.metadata.get('journal', 'None').strip()
	journal = doc.metadata['journal']
	if (journal is None or journal.strip() == ''):
	journal = 'None'
	else:
	journal = journal.strip()
	# For journals
	if journal not in j_bucket:
	j_bucket[journal] = score
	else:
	j_bucket[journal] += score

	# For authors
	record = [i+1,
	score,
	author,
	title,
	link,
	date]
	if auth_counts.get(author, 0) < 2:
	n_table.append(record)
	if auth_counts.get(author, 0) == 0:
	auth_counts[author] = 1
	else:
	auth_counts[author] += 1

	# For abstracts
	record = [i+1,
	title,
	author,
	submitter,
	journal,
	date,
	link,
	score
	]
	a_table.append(record)

	if j_bucket.get('None',None):
	del j_bucket['None']
	j_table = sorted([[journal, round(score, 3)] for journal,
	score in j_bucket.items()],
	key=lambda x: x[1], reverse=True)
	j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
	j_output = gr.Dataframe.update(value=j_table, visible=True)
	n_output = gr.Dataframe.update(value=n_table, visible=True)
	a_output = gr.Dataframe.update(value=a_table, visible=True)

	return [a_output, j_output, n_output]


	model_name = "biodatlab/MIReAD-Neuro-Large"
	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'normalize_embeddings': False}
	faiss_embedder = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)

	vecdb = FAISS.load_local("medline_index", faiss_embedder)


	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# NBDT Recommendation Engine for Editors")
	gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
	It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
	To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
	Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
	The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.")

	abst = gr.Textbox(label="Abstract", lines=10)

	action_btn = gr.Button(value="Find Matches")

	with gr.Tab("Authors"):
	n_output = gr.Dataframe(
	headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
	datatype=['number', 'number', 'str', 'str', 'str', 'str'],
	col_count=(6, "fixed"),
	wrap=True,
	visible=False
	)
	with gr.Tab("Abstracts"):
	a_output = gr.Dataframe(
	headers=['No.', 'Title', 'Author', 'Corresponding Author',
	'Journal', 'Date', 'Link', 'Score'],
	datatype=['number', 'str', 'str', 'str',
	'str', 'str', 'str', 'number'],
	col_count=(8, "fixed"),
	wrap=True,
	visible=False
	)
	with gr.Tab("Journals"):
	j_output = gr.Dataframe(
	headers=['No.', 'Name', 'Score'],
	datatype=['number', 'str', 'number'],
	col_count=(3, "fixed"),
	wrap=True,
	visible=False
	)

	action_btn.click(fn=inference,
	inputs=[
	abst,
	],
	outputs=[a_output, j_output, n_output],
	api_name="neurojane")

	demo.launch(debug=True)