owaiskha9654 commited on
Commit
c93e970
β€’
1 Parent(s): cfed7f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import nltk
3
+ import re
4
+ import numpy as np
5
+ import gradio as gr
6
+ from typing import Dict
7
+ from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
8
+ from transformers import BertForSequenceClassification,BertTokenizer
9
+
10
+
11
+ #Loading Model and Tokenizer from Hugging Face Spaces
12
+ model = BertForSequenceClassification.from_pretrained("owaiskha9654/PICO_Evidence_Classification_Model", num_labels=4)
13
+ tokenizer = BertTokenizer.from_pretrained('owaiskha9654/PICO_Evidence_Classification_Model', do_lower_case=True)
14
+ nltk.download('punkt')
15
+
16
+ examples = [ """In addition to their essential catalytic role in protein biosynthesis,
17
+ aminoacyl tRNA synthetases participate into numerous other functions,
18
+ including regulation of gene expression and amino acid biosynthesis via
19
+ transamidation pathways. labor 48 labor induced amniotomy oxytocin.
20
+ Herein, we describe a class of aminoacyl tRNA
21
+ synthetase like HisZ proteins based on the catalytic core of the contemporary
22
+ class II histidyl tRNA synthetase whose members lack aminoacylation activity
23
+ but are instead essential components of the first enzyme in histidine biosynthesis
24
+ ATP phosphoribosyltransferase (HisG).""",
25
+ """total 164 female breast cancer patients treated anthracyclines received anthracycline-based
26
+ chemotherapy = 85 = 79 dexrazoxane maximum cycles. participants received amlodipine 5 daily lisinopril 5 daily
27
+ secondary outcomes death noncardiac repeat revascularization procedures pci cabg"""
28
+ """ Prediction of the function of HisZ in Lactococcus
29
+ lactis was assisted by comparative genomics, a technique that revealed a link between the
30
+ presence or the absence of HisZ and a systematic variation in the length of the HisG polypeptide.""",
31
+ """HisZ is required for histidine prototrophy, and three other lines of evidence support the direct
32
+ involvement of HisZ in the transferase function. 1) Genetic experiments demonstrate that
33
+ complementation of an in-frame deletion of HisG from Escherichia coli (which does not possess HisZ)
34
+ requires both HisG and HisZ from L. lactis. 2) Coelution of HisG and HisZ during affinity chromatography
35
+ provides evidence of direct physical interaction. 3) Both HisG and HisZ are required for catalysis of
36
+ the ATP phosphoribosyltransferase reaction. This observation of a common protein domain
37
+ linking amino acid biosynthesis and protein synthesis implies an early connection between
38
+ the biosynthesis of amino acids and proteins."""]
39
+
40
+
41
+ def PICO_Tagger(text):
42
+ evidence_dict = {'Not Relevant to the Evidence (Label N)': 0,
43
+ 'Population/Problem (element P)': 1,
44
+ 'Intervention and Comparison (element I and C)': 2,
45
+ 'Outcome (element O)': 3}
46
+
47
+ evidence_dict_inverse = {v: k for k, v in evidence_dict.items()}
48
+
49
+ text = re.sub(r"(@\[A-Za-z0-9]+)|([^.0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text)
50
+ sent_text = nltk.sent_tokenize(text)
51
+ inputs = tokenizer(sent_text, return_tensors="pt",padding=True)
52
+ inputs = inputs.to('cpu')
53
+ outputs = model(**inputs)
54
+ pred_logits = outputs.logits
55
+ pred_label = torch.sigmoid(pred_logits)
56
+ out = torch.argmax(pred_label, dim=1).detach().cpu().numpy().tolist()
57
+ output=[]
58
+ for i in out:
59
+ output.append(evidence_dict_inverse[i])
60
+
61
+ sent_text_len = [len(i) for i in sent_text]
62
+
63
+ rendered_output =[]
64
+ temp =0
65
+ for index,values in enumerate(sent_text_len):
66
+ dict1={}
67
+ dict1['start'] = temp
68
+ temp=temp+values+1
69
+ dict1['end']=temp
70
+ dict1['entity'] = output[index]
71
+ rendered_output.append(dict1)
72
+
73
+ return {"text": text, "entities": rendered_output}
74
+
75
+
76
+ title = "Utilizing BioBERT for PICO Evidence Summarization"
77
+ description = "The traditional machine learning models give a lot of pain when we do not have sufficient labeled data for the specific task or domain we care about to train a reliable model. Transfer learning allows us to deal with these scenarios by leveraging the already existing labeled data of some related task or domain. We try to store this knowledge gained in solving the source task in the source domain and apply it to our problem of interest.In this work, I have utilized the dataset from the paper published in IEEE Journal of Biomedical and Health Informatics <a href=\"https://ieeexplore.ieee.org/document/9056501\">Aceso: PICO-Guided Evidence Summarization on Medical Literature</a>"
78
+
79
+ text1 = (
80
+ "<center> Author: Owais Ahmad Data Scientist at <b> Thoucentric </b> <a href=\"https://www.linkedin.com/in/owaiskhan9654/\">Visit Profile</a> <br></center>"
81
+
82
+ "<center> Model Trained Kaggle Kernel <a href=\"https://www.kaggle.com/code/owaiskhan9654/utilizing-biobert-for-pico-evidence-summarization\">Link</a> <br></center>"
83
+
84
+ "<center> Kaggle Profile <a href=\"https://www.kaggle.com/owaiskhan9654\">Link</a> <br> </center>"
85
+
86
+ "<center> PICO Evidence Model Deployed Repository <a href=\"https://huggingface.co/owaiskha9654/PICO_Evidence_Classification_Model\">Link</a> <br></center>"
87
+ )
88
+
89
+ PICO_APP = gr.Interface(PICO_Tagger,
90
+ gr.Textbox(placeholder="Enter Article sentence here..."),
91
+ gr.HighlightedText(),
92
+ examples=examples,allow_flagging='never',
93
+ analytics_enabled=False,
94
+ title=title,
95
+ description=description,
96
+ article=text1,)
97
+
98
+ PICO_APP.launch()