mishtert commited on
Commit
0acf0ea
1 Parent(s): 7b7b4e7

Upload summarize.py

Browse files
Files changed (1) hide show
  1. summarize.py +81 -0
summarize.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from tqdm import tqdm
3
+
4
+ from utils.summarize_utils.summ_utils import count_caps, dupe_check, purpose_issue, route_miss
5
+
6
+ tqdm.pandas()
7
+
8
+ # Warning message configuration
9
+ import logging, sys
10
+
11
+ logging.disable(sys.maxsize)
12
+ import warnings
13
+
14
+ warnings.filterwarnings('ignore')
15
+
16
+ # pwd
17
+
18
+ from utils.summarize_utils.summwrap import get_data, get_summ
19
+ from utils.summarize_utils.headlinewrap import get_headline
20
+ import pandas as pd
21
+
22
+ # import numpy as np
23
+
24
+ pd.set_option('display.max_colwidth', 800)
25
+
26
+
27
+ # study_id_list = [
28
+ # 'NCT04545554'
29
+ # ]
30
+ def get_summary_app(study_id):
31
+ study_id_list = study_id.split(",")
32
+ print(study_id.split(","))
33
+ df = get_data(study_id_list)
34
+ df.head(2)
35
+ df['LastUpdatePostDate'] = pd.to_datetime(df['LastUpdatePostDate']).dt.strftime('%d %b %Y')
36
+
37
+ df['summary'] = df.progress_apply(lambda x: get_summ(
38
+ x['OverallStatus'],
39
+ x['DesignAllocation'],
40
+ x['DesignMasking'],
41
+ x['Phase'],
42
+ x['DesignInterventionModel'],
43
+ x['OrgStudyId'],
44
+ x['SecondaryId'],
45
+ x['LocationCountry'],
46
+ x['EnrollmentCount'],
47
+ x['OfficialTitle'],
48
+ x['BriefSummary'],
49
+ x['DetailedDescription'],
50
+ x['WhyStopped'],
51
+ x['NCTId'],
52
+ x['LastUpdatePostDate']
53
+ ),
54
+ axis=1
55
+ )
56
+
57
+ df['headline'] = df.progress_apply(lambda x: get_headline(
58
+ x['OverallStatus'],
59
+ x['Phase'],
60
+ x['OrgStudyId'],
61
+ x['SecondaryId'],
62
+ x['Condition']
63
+ ),
64
+ axis=1
65
+ )
66
+
67
+ df['Review_Required'] = df['summary'].apply(lambda summary: purpose_issue(summary))
68
+ df['Review_Required'] = df.apply(lambda row: dupe_check(row['summary'], row['Review_Required']), axis=1)
69
+ df['Review_Required'] = df.apply(lambda row: count_caps(row['summary'], row['Review_Required']), axis=1)
70
+ df['Review_Required'] = df.apply(
71
+ lambda row: route_miss(row['summary'], row['Review_Required'], row['InterventionDescription']), axis=1)
72
+ final_df = df[['NCTId', 'summary', 'Review_Required', 'headline']]
73
+ ids = df['NCTId'].to_markdown()
74
+ summary_output = df['summary'].to_markdown()
75
+ headline_output = df['headline'].to_markdown()
76
+
77
+ return headline_output, summary_output
78
+ # df[['OverallStatus','Phase','OrgStudyId','SecondaryId','Condition','headline']]
79
+
80
+ # df.head()
81
+ # df.to_csv('summary_output_sample.csv',index=False, encoding='UTF-8')