File size: 2,522 Bytes
0acf0ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import streamlit as st
from tqdm import tqdm

from utils.summarize_utils.summ_utils import count_caps, dupe_check, purpose_issue, route_miss

tqdm.pandas()

# Warning message configuration
import logging, sys

logging.disable(sys.maxsize)
import warnings

warnings.filterwarnings('ignore')

# pwd

from utils.summarize_utils.summwrap import get_data, get_summ
from utils.summarize_utils.headlinewrap import get_headline
import pandas as pd

# import numpy as np

pd.set_option('display.max_colwidth', 800)


# study_id_list = [
#  'NCT04545554'
# ]
def get_summary_app(study_id):
    study_id_list = study_id.split(",")
    print(study_id.split(","))
    df = get_data(study_id_list)
    df.head(2)
    df['LastUpdatePostDate'] = pd.to_datetime(df['LastUpdatePostDate']).dt.strftime('%d %b %Y')

    df['summary'] = df.progress_apply(lambda x: get_summ(
        x['OverallStatus'],
        x['DesignAllocation'],
        x['DesignMasking'],
        x['Phase'],
        x['DesignInterventionModel'],
        x['OrgStudyId'],
        x['SecondaryId'],
        x['LocationCountry'],
        x['EnrollmentCount'],
        x['OfficialTitle'],
        x['BriefSummary'],
        x['DetailedDescription'],
        x['WhyStopped'],
        x['NCTId'],
        x['LastUpdatePostDate']
    ),
                                      axis=1
                                      )

    df['headline'] = df.progress_apply(lambda x: get_headline(
        x['OverallStatus'],
        x['Phase'],
        x['OrgStudyId'],
        x['SecondaryId'],
        x['Condition']
    ),
                                       axis=1
                                       )

    df['Review_Required'] = df['summary'].apply(lambda summary: purpose_issue(summary))
    df['Review_Required'] = df.apply(lambda row: dupe_check(row['summary'], row['Review_Required']), axis=1)
    df['Review_Required'] = df.apply(lambda row: count_caps(row['summary'], row['Review_Required']), axis=1)
    df['Review_Required'] = df.apply(
        lambda row: route_miss(row['summary'], row['Review_Required'], row['InterventionDescription']), axis=1)
    final_df = df[['NCTId', 'summary', 'Review_Required', 'headline']]
    ids = df['NCTId'].to_markdown()
    summary_output = df['summary'].to_markdown()
    headline_output = df['headline'].to_markdown()

    return headline_output, summary_output
# df[['OverallStatus','Phase','OrgStudyId','SecondaryId','Condition','headline']]

# df.head()
# df.to_csv('summary_output_sample.csv',index=False, encoding='UTF-8')