tracer / summarize.py
mishtert's picture
Upload summarize.py
0acf0ea
import streamlit as st
from tqdm import tqdm
from utils.summarize_utils.summ_utils import count_caps, dupe_check, purpose_issue, route_miss
tqdm.pandas()
# Warning message configuration
import logging, sys
logging.disable(sys.maxsize)
import warnings
warnings.filterwarnings('ignore')
# pwd
from utils.summarize_utils.summwrap import get_data, get_summ
from utils.summarize_utils.headlinewrap import get_headline
import pandas as pd
# import numpy as np
pd.set_option('display.max_colwidth', 800)
# study_id_list = [
# 'NCT04545554'
# ]
def get_summary_app(study_id):
study_id_list = study_id.split(",")
print(study_id.split(","))
df = get_data(study_id_list)
df.head(2)
df['LastUpdatePostDate'] = pd.to_datetime(df['LastUpdatePostDate']).dt.strftime('%d %b %Y')
df['summary'] = df.progress_apply(lambda x: get_summ(
x['OverallStatus'],
x['DesignAllocation'],
x['DesignMasking'],
x['Phase'],
x['DesignInterventionModel'],
x['OrgStudyId'],
x['SecondaryId'],
x['LocationCountry'],
x['EnrollmentCount'],
x['OfficialTitle'],
x['BriefSummary'],
x['DetailedDescription'],
x['WhyStopped'],
x['NCTId'],
x['LastUpdatePostDate']
),
axis=1
)
df['headline'] = df.progress_apply(lambda x: get_headline(
x['OverallStatus'],
x['Phase'],
x['OrgStudyId'],
x['SecondaryId'],
x['Condition']
),
axis=1
)
df['Review_Required'] = df['summary'].apply(lambda summary: purpose_issue(summary))
df['Review_Required'] = df.apply(lambda row: dupe_check(row['summary'], row['Review_Required']), axis=1)
df['Review_Required'] = df.apply(lambda row: count_caps(row['summary'], row['Review_Required']), axis=1)
df['Review_Required'] = df.apply(
lambda row: route_miss(row['summary'], row['Review_Required'], row['InterventionDescription']), axis=1)
final_df = df[['NCTId', 'summary', 'Review_Required', 'headline']]
ids = df['NCTId'].to_markdown()
summary_output = df['summary'].to_markdown()
headline_output = df['headline'].to_markdown()
return headline_output, summary_output
# df[['OverallStatus','Phase','OrgStudyId','SecondaryId','Condition','headline']]
# df.head()
# df.to_csv('summary_output_sample.csv',index=False, encoding='UTF-8')