Spaces:
Sleeping
Sleeping
""" | |
Ryan Tietjen | |
Sep 2024 | |
Demo application for paper abstract fragmentaion demonstration | |
""" | |
import gradio as gr | |
import tensorflow as tf | |
from tensorflow import keras | |
from keras import layers | |
from timeit import default_timer as timer | |
from process_input import split_abstract | |
from process_input import split_abstract_original | |
from process_input import split_sentences_by_characters | |
import pandas as pd | |
import tensorflow_hub as hub | |
from model import EmbeddingLayer | |
from process_input import encode_labels | |
sample_list = [] | |
example1 = f"""The aim of this study was to verify in bruxism patients the possible efficacy of auricular stimulation in reducing the hypertonicity of some masticatory muscles. | |
Forty-three bruxism patients were randomly allocated to 3 groups : acupuncture , needle contact for 10 seconds , no treatment ( control ). | |
Helkimo 's clinical dysfunction index ( CDI ) and anamnestic dysfunction index ( ADI ) were used to assess the functional state of the masticatory system. | |
The resting electrical activity of the anterior temporalis ( AT ) , masseter ( MM ) , digastric ( DA ) and sternocleidomastoid ( SCM ) muscles was measured , according to Jankelson , with surface electrodes at baseline , after stimulation and continually for 30 minutes ( 120 measurements in total ). | |
The electromyographical variations in the 3 groups were studied with t test for independent samples. | |
Acupuncture and needle contact were superior to control in reducing the muscle hypertonicity of all muscles except SCM. | |
In the comparison between acupuncture and needle contact the former showed better results only for the right TA and left DA ( p = 0.000 ). | |
In this study it was possible to measure the efficacy of the stimulation of only one point or area , which is an ideal model for research in acupuncture. | |
The auricular area we chose for stimulation was never used before for the purpose of relaxing masticatory muscles. | |
Acupuncture and needle contact for 10 seconds showed similar effects.""" | |
example2 = """To investigate the efficacy of 6 weeks of daily low-dose oral prednisolone in improving pain , mobility , and systemic low-grade inflammation in the short term and whether the effect would be sustained at 12 weeks in older adults with moderate to severe knee osteoarthritis ( OA ) . | |
A total of 125 patients with primary knee OA were randomized 1:1 ; 63 received 7.5 mg/day of prednisolone and 62 received placebo for 6 weeks . | |
Outcome measures included pain reduction and improvement in function scores and systemic inflammation markers . | |
Pain was assessed using the visual analog pain scale ( 0-100 mm ) . | |
Secondary outcome measures included the Western Ontario and McMaster Universities Osteoarthritis Index scores , patient global assessment ( PGA ) of the severity of knee OA , and 6-min walk distance ( 6MWD ) . | |
Serum levels of interleukin 1 ( IL-1 ) , IL-6 , tumor necrosis factor ( TNF ) - , and high-sensitivity C-reactive protein ( hsCRP ) were measured . | |
There was a clinically relevant reduction in the intervention group compared to the placebo group for knee pain , physical function , PGA , and 6MWD at 6 weeks . | |
The mean difference between treatment arms ( 95 % CI ) was 10.9 ( 4.8-18 .0 ) , p < 0.001 ; 9.5 ( 3.7-15 .4 ) , p < 0.05 ; 15.7 ( 5.3-26 .1 ) , p < 0.001 ; and 86.9 ( 29.8-144 .1 ) , p < 0.05 , respectively . | |
Further , there was a clinically relevant reduction in the serum levels of IL-1 , IL-6 , TNF - , and hsCRP at 6 weeks in the intervention group when compared to the placebo group . | |
These differences remained significant at 12 weeks . | |
The Outcome Measures in Rheumatology Clinical Trials-Osteoarthritis Research Society International responder rate was 65 % in the intervention group and 34 % in the placebo group ( p < 0.05 ) . | |
Low-dose oral prednisolone had both a short-term and a longer sustained effect resulting in less knee pain , better physical function , and attenuation of systemic inflammation in older patients with knee OA ( ClinicalTrials.gov identifier NCT01619163 ) .""" | |
sample_list.append(example1) | |
sample_list.append(example2) | |
def format_non_empty_lists(objective, background, methods, results, conclusion): | |
""" | |
This function checks each provided list and formats a string with the list name and its contents | |
only if the list is not empty. | |
Parameters: | |
- objective (list): List containing sentences classified as 'Objective'. | |
- background (list): List containing sentences classified as 'Background'. | |
- methods (list): List containing sentences classified as 'Methods'. | |
- results (list): List containing sentences classified as 'Results'. | |
- conclusion (list): List containing sentences classified as 'Conclusion'. | |
Returns: | |
- str: A formatted string that contains the non-empty list names and their contents. | |
""" | |
output = "" | |
lists = { | |
'Objective': objective, | |
'Background': background, | |
'Methods': methods, | |
'Results': results, | |
'Conclusion': conclusion | |
} | |
for name, content in lists.items(): | |
if content: # Check if the list is not empty | |
output += f"{name}:\n" # Append the category name followed by a newline | |
for item in content: | |
output += f" - {item}\n" # Append each item in the list, formatted as a list | |
output += "\n" # Append a newline for better separation between categories | |
return output.strip() | |
def fragment_single_abstract(abstract): | |
""" | |
Processes a single abstract by fragmenting it into structured sections based on predefined categories | |
such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model | |
to predict the category of each sentence in the abstract. | |
The process involves several steps: | |
1. Splitting the abstract into sentences. | |
2. Encoding these sentences using a custom embedding layer. | |
3. Classifying each sentence into one of the predefined categories. | |
4. Grouping the sentences by their predicted categories. | |
Parameters: | |
abstract (str): The abstract text that needs to be processed and categorized. | |
Returns: | |
tuple: A tuple containing two elements: | |
- A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions') | |
and values as lists of sentences belonging to these categories. Only non-empty categories are returned. | |
- The time taken to process the abstract (in seconds). | |
Example: | |
```python | |
abstract_text = "This study aims to evaluate the effectiveness of..." | |
categorized_abstract, processing_time = fragment_single_abstract(abstract_text) | |
print("Categorized Abstract:", categorized_abstract) | |
print("Processing Time:", processing_time) | |
``` | |
Note: | |
- This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer' | |
are available and correctly configured to be loaded. | |
- The function uses pandas for data manipulation, TensorFlow for machine learning operations, | |
and TensorFlow's data API for batching and prefetching data for model predictions. | |
""" | |
start_time = timer() | |
original_abstract = split_abstract_original(abstract) | |
df_original = pd.DataFrame(original_abstract) | |
sentences_original = df_original["text"].tolist() | |
abstract_split = split_abstract(abstract) | |
df = pd.DataFrame(abstract_split) | |
sentences = df["text"].tolist() | |
labels = encode_labels(df["target"]) | |
objective = [] | |
background = [] | |
methods = [] | |
results = [] | |
conclusion = [] | |
embed_layer = EmbeddingLayer() | |
model = tf.keras.models.load_model("200k_10_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer}) | |
data_by_character = split_sentences_by_characters(sentences) | |
line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15) | |
total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20) | |
sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character)) | |
labels_dataset = tf.data.Dataset.from_tensor_slices(labels) | |
dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE) | |
predictions = tf.argmax(model.predict(dataset), axis=1) | |
for i, prediction in enumerate(predictions): | |
if prediction == 3: | |
objective.append(sentences_original[i]) | |
elif prediction == 2: | |
methods.append(sentences_original[i]) | |
elif prediction == 4: | |
results.append(sentences_original[i]) | |
elif prediction == 1: | |
conclusion.append(sentences_original[i]) | |
elif prediction == 0: | |
background.append(sentences_original[i]) | |
end_time = timer() | |
return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time | |
title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen" | |
description = f""" | |
This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion. | |
The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/pdf/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture | |
was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251) | |
This model achieved a testing accuracy of 88.2% and a F1 score of 88%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation). | |
How to use: | |
-Paste the given abstract into the box below. | |
-Make sure to separate each sentence by a new line (this helps avoid ambiguity). | |
-Click submit, and allow the model to run! | |
""" | |
demo = gr.Interface( | |
fn=fragment_single_abstract, | |
inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."), | |
outputs=[ | |
gr.Textbox(label="Fragmented Abstract"), | |
gr.Number(label="Time to process (s)"), | |
], | |
examples=sample_list, | |
title=title, | |
description=description, | |
) | |
demo.launch(share=False) |