File size: 5,889 Bytes
ba75e25 3728e7c ba75e25 3728e7c ba75e25 6907fbc ba75e25 6907fbc ba75e25 6907fbc ba75e25 6907fbc ba75e25 6907fbc e66a550 6907fbc 33faca6 ba75e25 e9f0397 ba75e25 6907fbc ba75e25 87eb455 ba75e25 6907fbc ba75e25 19fe454 6907fbc ba75e25 6907fbc ba75e25 6907fbc ba75e25 a5ad45b ba75e25 e4f13c3 ba75e25 148e02d ba75e25 a99a6f6 ba75e25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import gradio as gr
from transformers import pipeline
import nltk
from nltk.tokenize import sent_tokenize
import openai
import json
import torch
import time
nltk.download('punkt')
# Zero-shot-classification models
# this model is for classifying the sentences since sentences can belong to multiple criteria
classifier_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt", multi_label=True)
# this model is for classifying the whole text as include or exclude
include_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt")
def get_nlp_score_old(title, abstract, inclusion_criteria, exclusion_criteria, style, verbose=True):
'''
This function takes the title and abstract of a study along with the
inclusion and exclusion criteria and returns the score on whether the study
should be included or excluded from the review along with the reasoning
in the form of dictionary of criteria:sentence pairs
Inputs: title of study (str), abstract of study(str),
inclusion_criteria of review (list of strs), and exclusion_criteria (list of strs),
verbose controls whether the reasoning dictionary is returned
Outputs: json of nlp score, reasoning dictionary (if verbose=True)
'''
# get start time
#st = time.time()
def result_to_dict(result, threshold=0):
'''
Changes the results from the zero-shot-classification into a dictionary
Inputs: zero-shot classification results, threshold for which scores should be excluded (0-1)
Output: label:score dict
'''
result_dict = {label:score for label, score in zip(result["labels"],
result["scores"]) if score > threshold}
return result_dict
def simplify_crit_openai(criteria, style):
'''
Simplifies the incoming criteria using chatgpt
Inputs: Criteria
Output: Simplified version of criteria
'''
if style == "none":
return criteria
if len(criteria) > 30:
if style == "keyphrase":
prompt = "Turn this criteria into a simple label: \n\n" + criteria
elif style == "keyword":
prompt = "Summarize this criteria using keywords \n\n" + criteria
response = openai.Completion.create(
model="text-davinci-002",
prompt=prompt,
temperature=0,
max_tokens=120,
n=10
)
print(response["choices"][0]["text"].replace("\n",""))
return response["choices"][0]["text"].replace("\n","")
else:
return criteria
def simplify_crit_spacy(criteria):
rake_nltk_var.extract_keywords_from_text(criteria)
keyword_extracted = rake_nltk_var.get_ranked_phrases()
return doc.ents
inclusion_criteria = inclusion_criteria.split("|")
exclusion_criteria = exclusion_criteria.split("|")
# combines inclusion and exclusion criteria into one. This is so we dont have to run the classification twice
criteria = inclusion_criteria + exclusion_criteria
simpl_crit = [simplify_crit_openai(crit.strip().strip("\n"), style) for crit in criteria]
crit_dict = dict(zip(simpl_crit,criteria))
# combines the title and the abstract into one str and extracts the sentences from the new str
combined = title.strip("\n") + ". " + abstract.strip("\n")
# if true then does the reasoning dictionary else returns empty list
if verbose == True:
text = sent_tokenize(combined)
sent_cat = {crit : [] for crit in criteria}
# categorizes the sentences to return which sentences match with which criteria
for sentence in text:
# zero-shot classifcation
result = classifier_pipeline(sentence, simpl_crit)
# gets only the relavent criteria for the sentnece (score above .75)
rel_label = result_to_dict(result, threshold=.75)
for crit in rel_label.keys():
og_crit = crit_dict[crit]
sent_cat[og_crit].append(sentence)
else:
sent_cat = []
# runs the zero-shot classification across both the abstract and title
incl_results = include_pipeline(combined, simpl_crit)
nlp_score = 0
for i, (label, score) in enumerate(zip(incl_results["labels"], incl_results["scores"])):
if criteria[i] in inclusion_criteria:
nlp_score += score
# get end time
#et = time.time()
#time it took to run the whole algorithm
#elapsed = et-st
result_dict = {"nlp_score": nlp_score, "reasoning": sent_cat}
return round(result_dict['nlp_score']*10), result_dict['reasoning']
with gr.Blocks() as demo:
gr.Markdown(
'''
# SLR Test Environment
This dashboard is for testing the nlp algorithm for the SLR tool
Separate Criteria by | e.g. (China | Machine Learning | Landslide)
'''
)
with gr.Row():
title = gr.Textbox(label="Title")
abstr = gr.Textbox(label="Abstract")
incl = gr.Textbox(label="Inclusion Criteria")
excl = gr.Textbox(label="Exclusion Criteria")
style = gr.Radio(["none", "keyword", 'keyphrase'], label="Criteria Preprocessing", info="How the criteria are preproccessed before going into the NLP algorithm")
with gr.Row():
with gr.Column():
score = gr.Textbox(label="NLP Score")
criteria = gr.Textbox(label="Reasoning")
btn = gr.Button("Submit")
btn.click(fn=get_nlp_score_old, inputs=[title, abstr, incl, excl, style], outputs=[score, criteria])
demo.launch()
|