import gradio as gr from transformers import pipeline import nltk from nltk.tokenize import sent_tokenize import openai import json import torch import time nltk.download('punkt') # Zero-shot-classification models # this model is for classifying the sentences since sentences can belong to multiple criteria classifier_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt", multi_label=True) # this model is for classifying the whole text as include or exclude include_pipeline = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli", framework="pt") def get_nlp_score_old(title, abstract, inclusion_criteria, exclusion_criteria, style, verbose=True): ''' This function takes the title and abstract of a study along with the inclusion and exclusion criteria and returns the score on whether the study should be included or excluded from the review along with the reasoning in the form of dictionary of criteria:sentence pairs Inputs: title of study (str), abstract of study(str), inclusion_criteria of review (list of strs), and exclusion_criteria (list of strs), verbose controls whether the reasoning dictionary is returned Outputs: json of nlp score, reasoning dictionary (if verbose=True) ''' # get start time #st = time.time() def result_to_dict(result, threshold=0): ''' Changes the results from the zero-shot-classification into a dictionary Inputs: zero-shot classification results, threshold for which scores should be excluded (0-1) Output: label:score dict ''' result_dict = {label:score for label, score in zip(result["labels"], result["scores"]) if score > threshold} return result_dict def simplify_crit_openai(criteria, style): ''' Simplifies the incoming criteria using chatgpt Inputs: Criteria Output: Simplified version of criteria ''' if style == "none": return criteria if len(criteria) > 30: if style == "keyphrase": prompt = "Turn this criteria into a simple label: \n\n" + criteria elif style == "keyword": prompt = "Summarize this criteria using keywords \n\n" + criteria response = openai.Completion.create( model="text-davinci-002", prompt=prompt, temperature=0, max_tokens=120, n=10 ) print(response["choices"][0]["text"].replace("\n","")) return response["choices"][0]["text"].replace("\n","") else: return criteria def simplify_crit_spacy(criteria): rake_nltk_var.extract_keywords_from_text(criteria) keyword_extracted = rake_nltk_var.get_ranked_phrases() return doc.ents inclusion_criteria = inclusion_criteria.split("|") exclusion_criteria = exclusion_criteria.split("|") # combines inclusion and exclusion criteria into one. This is so we dont have to run the classification twice criteria = inclusion_criteria + exclusion_criteria simpl_crit = [simplify_crit_openai(crit.strip().strip("\n"), style) for crit in criteria] crit_dict = dict(zip(simpl_crit,criteria)) # combines the title and the abstract into one str and extracts the sentences from the new str combined = title.strip("\n") + ". " + abstract.strip("\n") # if true then does the reasoning dictionary else returns empty list if verbose == True: text = sent_tokenize(combined) sent_cat = {crit : [] for crit in criteria} # categorizes the sentences to return which sentences match with which criteria for sentence in text: # zero-shot classifcation result = classifier_pipeline(sentence, simpl_crit) # gets only the relavent criteria for the sentnece (score above .75) rel_label = result_to_dict(result, threshold=.75) for crit in rel_label.keys(): og_crit = crit_dict[crit] sent_cat[og_crit].append(sentence) else: sent_cat = [] # runs the zero-shot classification across both the abstract and title incl_results = include_pipeline(combined, simpl_crit) nlp_score = 0 for i, (label, score) in enumerate(zip(incl_results["labels"], incl_results["scores"])): if criteria[i] in inclusion_criteria: nlp_score += score # get end time #et = time.time() #time it took to run the whole algorithm #elapsed = et-st result_dict = {"nlp_score": nlp_score, "reasoning": sent_cat} return round(result_dict['nlp_score']*10), result_dict['reasoning'] with gr.Blocks() as demo: gr.Markdown( ''' # SLR Test Environment This dashboard is for testing the nlp algorithm for the SLR tool Separate Criteria by | e.g. (China | Machine Learning | Landslide) ''' ) with gr.Row(): title = gr.Textbox(label="Title") abstr = gr.Textbox(label="Abstract") incl = gr.Textbox(label="Inclusion Criteria") excl = gr.Textbox(label="Exclusion Criteria") style = gr.Radio(["none", "keyword", 'keyphrase'], label="Criteria Preprocessing", info="How the criteria are preproccessed before going into the NLP algorithm") with gr.Row(): with gr.Column(): score = gr.Textbox(label="NLP Score") criteria = gr.Textbox(label="Reasoning") btn = gr.Button("Submit") btn.click(fn=get_nlp_score_old, inputs=[title, abstr, incl, excl, style], outputs=[score, criteria]) demo.launch()