|
import torch |
|
import nltk |
|
import re |
|
import numpy as np |
|
import gradio as gr |
|
from typing import Dict |
|
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler |
|
from transformers import BertForSequenceClassification,BertTokenizer |
|
|
|
|
|
model = BertForSequenceClassification.from_pretrained("owaiskha9654/PICO_Evidence_Classification_Model", num_labels=4) |
|
tokenizer = BertTokenizer.from_pretrained('owaiskha9654/PICO_Evidence_Classification_Model', do_lower_case=True) |
|
nltk.download('punkt') |
|
|
|
examples = [ """In addition to their essential catalytic role in protein biosynthesis, aminoacyl tRNA synthetases participate into numerous other functions, including regulation of gene expression and amino acid biosynthesis via transamidation pathways. labor 48 labor induced amniotomy oxytocin. Herein, we describe a class of aminoacyl tRNA synthetase like HisZ proteins based on the catalytic core of the contemporary class II histidyl tRNA synthetase whose members lack aminoacylation activity but are instead essential components of the first enzyme in histidine biosynthesis ATP phosphoribosyltransferase (HisG).""", """total 164 female breast cancer patients treated anthracyclines received anthracycline-based chemotherapy dexrazoxane maximum cycles. participants received amlodipine 5 daily lisinopril 5 daily secondary outcomes death noncardiac repeat revascularization procedures pci cabg""", """ Prediction of the function of HisZ in Lactococcus lactis was assisted by comparative genomics, a technique that revealed a link between the presence or the absence of HisZ and a systematic variation in the length of the HisG polypeptide.""", """HisZ is required for histidine prototrophy, and three other lines of evidence support the direct involvement of HisZ in the transferase function. 1) Genetic experiments demonstrate that complementation of an in-frame deletion of HisG from Escherichia coli (which does not possess HisZ) requires both HisG and HisZ from L. lactis. 2) Coelution of HisG and HisZ during affinity chromatography provides evidence of direct physical interaction. 3) Both HisG and HisZ are required for catalysis of the ATP phosphoribosyltransferase reaction. This observation of a common protein domain linking amino acid biosynthesis and protein synthesis implies an early connection between the biosynthesis of amino acids and proteins."""] |
|
|
|
|
|
def PICO_Tagger(text): |
|
evidence_dict = {'Not Relevant to the Evidence (Label N)': 0, |
|
'Population/Problem (element P)': 1, |
|
'Intervention and Comparison (element I and C)': 2, |
|
'Outcome (element O)': 3} |
|
|
|
evidence_dict_inverse = {v: k for k, v in evidence_dict.items()} |
|
|
|
text = re.sub(r"(@\[A-Za-z0-9]+)|([^.0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text) |
|
sent_text = nltk.sent_tokenize(text) |
|
inputs = tokenizer(sent_text, return_tensors="pt",padding=True) |
|
inputs = inputs.to('cpu') |
|
outputs = model(**inputs) |
|
pred_logits = outputs.logits |
|
pred_label = torch.sigmoid(pred_logits) |
|
out = torch.argmax(pred_label, dim=1).detach().cpu().numpy().tolist() |
|
output=[] |
|
for i in out: |
|
output.append(evidence_dict_inverse[i]) |
|
|
|
sent_text_len = [len(i) for i in sent_text] |
|
|
|
rendered_output =[] |
|
temp =0 |
|
for index,values in enumerate(sent_text_len): |
|
dict1={} |
|
dict1['start'] = temp |
|
temp=temp+values+1 |
|
dict1['end']=temp |
|
dict1['entity'] = output[index] |
|
rendered_output.append(dict1) |
|
|
|
return {"text": text, "entities": rendered_output} |
|
|
|
|
|
title = "Utilizing BioBERT for PICO Evidence Summarization" |
|
description = "The traditional machine learning models give a lot of pain when we do not have sufficient labeled data for the specific task or domain we care about to train a reliable model. Transfer learning allows us to deal with these scenarios by leveraging the already existing labeled data of some related task or domain. We try to store this knowledge gained in solving the source task in the source domain and apply it to our problem of interest. In this work, I have utilized the dataset from the paper published in IEEE Journal of Biomedical and Health Informatics <a href=\"https://ieeexplore.ieee.org/document/9056501\">Aceso: PICO-Guided Evidence Summarization on Medical Literature</a>" |
|
|
|
text1 = ( |
|
"<center> Author: Owais Ahmad Β©2022 Data Scientist at <b> Thoucentric </b> <a href=\"https://www.linkedin.com/in/owaiskhan9654/\">Visit Profile</a> <br></center>" |
|
|
|
"<center> Model Trained Kaggle Kernel <a href=\"https://www.kaggle.com/code/owaiskhan9654/utilizing-biobert-for-pico-evidence-summarization\">Link</a> <br></center>" |
|
|
|
"<center> Kaggle Profile <a href=\"https://www.kaggle.com/owaiskhan9654\">Link</a> <br> </center>" |
|
|
|
"<center> PICO Evidence Model Deployed Repository <a href=\"https://huggingface.co/owaiskha9654/PICO_Evidence_Classification_Model\">Link</a> <br></center>" |
|
) |
|
|
|
PICO_APP = gr.Interface(PICO_Tagger, |
|
gr.Textbox(placeholder="Enter Article sentence here..."), |
|
gr.HighlightedText(), |
|
examples=examples,allow_flagging='never', |
|
analytics_enabled=False, |
|
title=title, |
|
description=description, |
|
article=text1,) |
|
|
|
PICO_APP.launch() |