|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
import nltk |
|
|
from nltk.tokenize import sent_tokenize |
|
|
import openai |
|
|
import json |
|
|
import torch |
|
|
import time |
|
|
|
|
|
nltk.download('punkt') |
|
|
|
|
|
|
|
|
|
|
|
classifier_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt", multi_label=True) |
|
|
|
|
|
include_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt") |
|
|
|
|
|
def get_nlp_score_old(title, abstract, inclusion_criteria, exclusion_criteria, style, verbose=True): |
|
|
''' |
|
|
This function takes the title and abstract of a study along with the |
|
|
inclusion and exclusion criteria and returns the score on whether the study |
|
|
should be included or excluded from the review along with the reasoning |
|
|
in the form of dictionary of criteria:sentence pairs |
|
|
|
|
|
Inputs: title of study (str), abstract of study(str), |
|
|
inclusion_criteria of review (list of strs), and exclusion_criteria (list of strs), |
|
|
verbose controls whether the reasoning dictionary is returned |
|
|
Outputs: json of nlp score, reasoning dictionary (if verbose=True) |
|
|
''' |
|
|
|
|
|
|
|
|
def result_to_dict(result, threshold=0): |
|
|
''' |
|
|
Changes the results from the zero-shot-classification into a dictionary |
|
|
|
|
|
Inputs: zero-shot classification results, threshold for which scores should be excluded (0-1) |
|
|
|
|
|
Output: label:score dict |
|
|
''' |
|
|
result_dict = {label:score for label, score in zip(result["labels"], |
|
|
result["scores"]) if score > threshold} |
|
|
return result_dict |
|
|
|
|
|
def simplify_crit_openai(criteria, style): |
|
|
''' |
|
|
Simplifies the incoming criteria using chatgpt |
|
|
|
|
|
Inputs: Criteria |
|
|
|
|
|
Output: Simplified version of criteria |
|
|
''' |
|
|
if style == "none": |
|
|
return criteria |
|
|
if len(criteria) > 30: |
|
|
if style == "keyphrase": |
|
|
prompt = "Turn this criteria into a simple label: \n\n" + criteria |
|
|
elif style == "keyword": |
|
|
prompt = "Summarize this criteria using keywords \n\n" + criteria |
|
|
|
|
|
response = openai.Completion.create( |
|
|
model="text-davinci-002", |
|
|
prompt=prompt, |
|
|
temperature=0, |
|
|
max_tokens=120, |
|
|
n=10 |
|
|
) |
|
|
print(response["choices"][0]["text"].replace("\n","")) |
|
|
return response["choices"][0]["text"].replace("\n","") |
|
|
else: |
|
|
return criteria |
|
|
def simplify_crit_spacy(criteria): |
|
|
|
|
|
rake_nltk_var.extract_keywords_from_text(criteria) |
|
|
keyword_extracted = rake_nltk_var.get_ranked_phrases() |
|
|
|
|
|
return doc.ents |
|
|
|
|
|
inclusion_criteria = inclusion_criteria.split("|") |
|
|
exclusion_criteria = exclusion_criteria.split("|") |
|
|
|
|
|
|
|
|
criteria = inclusion_criteria + exclusion_criteria |
|
|
|
|
|
simpl_crit = [simplify_crit_openai(crit.strip().strip("\n"), style) for crit in criteria] |
|
|
|
|
|
crit_dict = dict(zip(simpl_crit,criteria)) |
|
|
|
|
|
|
|
|
combined = title.strip("\n") + ". " + abstract.strip("\n") |
|
|
|
|
|
|
|
|
if verbose == True: |
|
|
|
|
|
text = sent_tokenize(combined) |
|
|
|
|
|
sent_cat = {crit : [] for crit in criteria} |
|
|
|
|
|
|
|
|
for sentence in text: |
|
|
|
|
|
result = classifier_pipeline(sentence, simpl_crit) |
|
|
|
|
|
rel_label = result_to_dict(result, threshold=.75) |
|
|
for crit in rel_label.keys(): |
|
|
og_crit = crit_dict[crit] |
|
|
sent_cat[og_crit].append(sentence) |
|
|
|
|
|
else: |
|
|
sent_cat = [] |
|
|
|
|
|
|
|
|
incl_results = include_pipeline(combined, simpl_crit) |
|
|
|
|
|
nlp_score = 0 |
|
|
|
|
|
for i, (label, score) in enumerate(zip(incl_results["labels"], incl_results["scores"])): |
|
|
if criteria[i] in inclusion_criteria: |
|
|
nlp_score += score |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result_dict = {"nlp_score": nlp_score, "reasoning": sent_cat} |
|
|
|
|
|
return round(result_dict['nlp_score']*10), result_dict['reasoning'] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown( |
|
|
''' |
|
|
# SLR Test Environment |
|
|
This dashboard is for testing the nlp algorithm for the SLR tool |
|
|
Separate Criteria by | e.g. (China | Machine Learning | Landslide) |
|
|
''' |
|
|
) |
|
|
with gr.Row(): |
|
|
title = gr.Textbox(label="Title") |
|
|
abstr = gr.Textbox(label="Abstract") |
|
|
incl = gr.Textbox(label="Inclusion Criteria") |
|
|
excl = gr.Textbox(label="Exclusion Criteria") |
|
|
style = gr.Radio(["none", "keyword", 'keyphrase'], label="Criteria Preprocessing", info="How the criteria are preproccessed before going into the NLP algorithm") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
score = gr.Textbox(label="NLP Score") |
|
|
criteria = gr.Textbox(label="Reasoning") |
|
|
|
|
|
btn = gr.Button("Submit") |
|
|
btn.click(fn=get_nlp_score_old, inputs=[title, abstr, incl, excl, style], outputs=[score, criteria]) |
|
|
|
|
|
demo.launch() |
|
|
|