File size: 2,522 Bytes
0acf0ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
from tqdm import tqdm
from utils.summarize_utils.summ_utils import count_caps, dupe_check, purpose_issue, route_miss
tqdm.pandas()
# Warning message configuration
import logging, sys
logging.disable(sys.maxsize)
import warnings
warnings.filterwarnings('ignore')
# pwd
from utils.summarize_utils.summwrap import get_data, get_summ
from utils.summarize_utils.headlinewrap import get_headline
import pandas as pd
# import numpy as np
pd.set_option('display.max_colwidth', 800)
# study_id_list = [
# 'NCT04545554'
# ]
def get_summary_app(study_id):
study_id_list = study_id.split(",")
print(study_id.split(","))
df = get_data(study_id_list)
df.head(2)
df['LastUpdatePostDate'] = pd.to_datetime(df['LastUpdatePostDate']).dt.strftime('%d %b %Y')
df['summary'] = df.progress_apply(lambda x: get_summ(
x['OverallStatus'],
x['DesignAllocation'],
x['DesignMasking'],
x['Phase'],
x['DesignInterventionModel'],
x['OrgStudyId'],
x['SecondaryId'],
x['LocationCountry'],
x['EnrollmentCount'],
x['OfficialTitle'],
x['BriefSummary'],
x['DetailedDescription'],
x['WhyStopped'],
x['NCTId'],
x['LastUpdatePostDate']
),
axis=1
)
df['headline'] = df.progress_apply(lambda x: get_headline(
x['OverallStatus'],
x['Phase'],
x['OrgStudyId'],
x['SecondaryId'],
x['Condition']
),
axis=1
)
df['Review_Required'] = df['summary'].apply(lambda summary: purpose_issue(summary))
df['Review_Required'] = df.apply(lambda row: dupe_check(row['summary'], row['Review_Required']), axis=1)
df['Review_Required'] = df.apply(lambda row: count_caps(row['summary'], row['Review_Required']), axis=1)
df['Review_Required'] = df.apply(
lambda row: route_miss(row['summary'], row['Review_Required'], row['InterventionDescription']), axis=1)
final_df = df[['NCTId', 'summary', 'Review_Required', 'headline']]
ids = df['NCTId'].to_markdown()
summary_output = df['summary'].to_markdown()
headline_output = df['headline'].to_markdown()
return headline_output, summary_output
# df[['OverallStatus','Phase','OrgStudyId','SecondaryId','Condition','headline']]
# df.head()
# df.to_csv('summary_output_sample.csv',index=False, encoding='UTF-8')
|