swarit222 commited on
Commit
f254909
·
verified ·
1 Parent(s): 3618129

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -20
app.py CHANGED
@@ -1,38 +1,249 @@
1
  import gradio as gr
2
- from main2 import search_trials # Importing from main2.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def run_search(age, sex, state, keywords):
5
- results = search_trials(
6
  user_age=age,
7
  user_sex=sex,
8
  user_state=state,
9
- user_keywords=keywords
 
10
  )
11
- return results
 
 
 
 
 
12
 
13
- with gr.Blocks() as demo:
14
- gr.Markdown("# Clinical Trials Search Tool")
15
- gr.Markdown(
16
- "Find **recruiting US clinical trials** that match your **age**, **sex**, "
17
- "**state**, and optional **keywords**."
18
- )
 
 
19
 
20
- with gr.Row():
21
- age_input = gr.Number(label="Your Age", value=30)
22
- sex_input = gr.Dropdown(["Male", "Female"], label="Sex", value="Male")
 
 
23
 
24
- with gr.Row():
25
- state_input = gr.Dropdown(["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"], label="State (full name or abbreviation)", value="California")
26
- keywords_input = gr.Textbox(label="Keywords (comma separated)", placeholder="e.g., cancer, diabetes")
 
 
 
 
27
 
28
- search_btn = gr.Button("Search Trials")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- output_table = gr.Dataframe(label="Matching Trials", interactive=False)
 
 
 
 
 
 
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  search_btn.click(
33
- fn=run_search,
34
  inputs=[age_input, sex_input, state_input, keywords_input],
35
- outputs=output_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
38
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ import numpy as np
6
+ from main2 import search_trials # Import your updated search_trials
7
+
8
+ PAGE_SIZE = 5
9
+ PREVIEW_WORDS = 100 # Number of words in collapsed preview
10
+
11
+ US_STATES = [
12
+ "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware",
13
+ "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky",
14
+ "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
15
+ "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
16
+ "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
17
+ "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
18
+ "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "District of Columbia"
19
+ ]
20
+
21
+ def split_sentences(text):
22
+ return [s.strip() for s in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', text) if s.strip()]
23
+
24
+ def build_input_text(row):
25
+ text_parts = [
26
+ f"Intervention Name: {row.get('InterventionName', '')}",
27
+ f"Intervention Description: {row.get('InterventionDescription', '')}",
28
+ f"Brief Summary: {row.get('BriefSummary', '')}",
29
+ f"Eligibility Criteria: {row.get('EligibilityCriteria', '')}",
30
+ f"Primary Outcome Measure: {row.get('PrimaryOutcomeMeasure', '')}",
31
+ f"Primary Outcome Description: {row.get('PrimaryOutcomeDescription', '')}",
32
+ f"Start Date: {row.get('StartDate', '')}",
33
+ f"Primary Completion Date: {row.get('PrimaryCompletionDate', '')}"
34
+ ]
35
+ return " ".join([part for part in text_parts if part.strip()])
36
+
37
+ def generate_summary(row, max_sentences=7, min_sentence_length=5):
38
+ text = build_input_text(row)
39
+ if not text.strip():
40
+ return ""
41
+ sentences = split_sentences(text)
42
+ sentences = [s for s in sentences if len(s.split()) >= min_sentence_length]
43
+ if not sentences:
44
+ return ""
45
+ if len(sentences) <= max_sentences:
46
+ return " ".join(sentences)
47
+ vectorizer = TfidfVectorizer(stop_words="english")
48
+ tfidf_matrix = vectorizer.fit_transform(sentences)
49
+ scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
50
+ position_weights = np.linspace(1.5, 1.0, num=len(sentences))
51
+ combined_scores = scores * position_weights
52
+ top_indices = combined_scores.argsort()[-max_sentences:][::-1]
53
+ top_indices = sorted(top_indices)
54
+ summary_sentences = []
55
+ for i in top_indices:
56
+ s = sentences[i]
57
+ if re.match(r"^(Start Date|Primary Completion Date|Intervention Name|Primary Outcome Measure|Primary Outcome Description):", s):
58
+ continue
59
+ summary_sentences.append(s)
60
+ if len(summary_sentences) < max_sentences:
61
+ for i in top_indices:
62
+ if len(summary_sentences) >= max_sentences:
63
+ break
64
+ if sentences[i] not in summary_sentences:
65
+ summary_sentences.append(sentences[i])
66
+ return " ".join(summary_sentences[:max_sentences])
67
 
68
  def run_search(age, sex, state, keywords):
69
+ df = search_trials(
70
  user_age=age,
71
  user_sex=sex,
72
  user_state=state,
73
+ user_keywords=keywords,
74
+ generate_summaries=False
75
  )
76
+ if df.empty:
77
+ return pd.DataFrame(), 0, None
78
+ total_pages = (len(df) + PAGE_SIZE - 1) // PAGE_SIZE
79
+ page_df = df.iloc[:PAGE_SIZE].copy()
80
+ page_df['LaymanSummary'] = ""
81
+ return page_df, total_pages, df
82
 
83
+ def load_page(page_num, full_df):
84
+ if full_df is None or full_df.empty:
85
+ return pd.DataFrame()
86
+ start = page_num * PAGE_SIZE
87
+ end = start + PAGE_SIZE
88
+ page_df = full_df.iloc[start:end].copy()
89
+ page_df['LaymanSummary'] = page_df.apply(generate_summary, axis=1)
90
+ return page_df
91
 
92
+ def update_page_controls(page_num, total_pages):
93
+ prev_visible = gr.update(visible=page_num > 0)
94
+ next_visible = gr.update(visible=page_num < total_pages - 1)
95
+ page_text = f"Page {page_num + 1} of {total_pages}" if total_pages > 0 else ""
96
+ return prev_visible, next_visible, page_text
97
 
98
+ def hide_empty_columns(df):
99
+ cols_to_keep = []
100
+ for col in df.columns:
101
+ col_values = df[col].dropna().astype(str).str.strip()
102
+ if not col_values.empty and any(val != "" for val in col_values):
103
+ cols_to_keep.append(col)
104
+ return df[cols_to_keep]
105
 
106
+ def df_to_html_with_readmore(df: pd.DataFrame) -> str:
107
+ if df.empty:
108
+ return "<p>No matching trials found.</p>"
109
+ from html import escape
110
+ if "LaymanSummary" in df.columns:
111
+ cols = list(df.columns)
112
+ cols.insert(0, cols.pop(cols.index("LaymanSummary")))
113
+ df = df[cols]
114
+ df = hide_empty_columns(df)
115
+ html = ['''
116
+ <style>
117
+ table {
118
+ width: 100%;
119
+ border-collapse: collapse;
120
+ font-family: Arial, sans-serif;
121
+ }
122
+ th {
123
+ background-color: #007bff;
124
+ color: white;
125
+ padding: 12px;
126
+ text-align: left;
127
+ border: 1px solid #ddd;
128
+ }
129
+ td {
130
+ border: 1px solid #ddd;
131
+ padding: 12px;
132
+ vertical-align: top;
133
+ white-space: normal;
134
+ max-width: 1000px; /* 2.5x original 400px */
135
+ min-width: 1000px; /* force width */
136
+ word-wrap: break-word;
137
+ }
138
+ details summary {
139
+ cursor: pointer;
140
+ color: #007bff;
141
+ font-weight: bold;
142
+ }
143
+ details summary:after {
144
+ content: " (Read More)";
145
+ color: #0056b3;
146
+ font-weight: normal;
147
+ }
148
+ details[open] summary {
149
+ display: none; /* hide preview when expanded */
150
+ }
151
+ details div.full-text {
152
+ display: none;
153
+ }
154
+ details[open] div.full-text {
155
+ display: block;
156
+ margin-top: 8px;
157
+ }
158
+ </style>
159
+ ''']
160
+ html.append('<table><thead><tr>')
161
+ for col in df.columns:
162
+ html.append(f'<th>{escape(col)}</th>')
163
+ html.append('</tr></thead><tbody>')
164
+ for _, row in df.iterrows():
165
+ html.append('<tr>')
166
+ for col in df.columns:
167
+ val = str(row[col])
168
+ words = val.split()
169
+ if len(words) > PREVIEW_WORDS:
170
+ short_text = escape(" ".join(words[:PREVIEW_WORDS]) + "...")
171
+ full_text = escape(val)
172
+ cell_html = f'''
173
+ <div>
174
+ <details>
175
+ <summary>{short_text}</summary>
176
+ <div class="full-text">{full_text}</div>
177
+ </details>
178
+ </div>
179
+ '''
180
+ else:
181
+ cell_html = f'<div>{escape(val)}</div>'
182
+ html.append(f'<td>{cell_html}</td>')
183
+ html.append('</tr>')
184
+ html.append('</tbody></table>')
185
+ return "".join(html)
186
 
187
+ def on_search(age, sex, state, keywords):
188
+ df_page, total_pages, full_df = run_search(age, sex, state, keywords)
189
+ page_num = 0
190
+ if not df_page.empty:
191
+ df_page = load_page(page_num, full_df)
192
+ prev_vis, next_vis, page_text = update_page_controls(page_num, total_pages)
193
+ html_output = df_to_html_with_readmore(df_page)
194
+ return html_output, page_text, prev_vis, next_vis, page_num, total_pages, full_df, gr.update(visible=False), gr.update(visible=True)
195
 
196
+ def on_page_change(increment, page_num, total_pages, full_df):
197
+ if full_df is None or full_df.empty:
198
+ return "<p>No matching trials found.</p>", "", gr.update(visible=False), gr.update(visible=False), 0
199
+ new_page = max(0, min(page_num + increment, total_pages - 1))
200
+ page_df = load_page(new_page, full_df)
201
+ prev_vis, next_vis, page_text = update_page_controls(new_page, total_pages)
202
+ html_output = df_to_html_with_readmore(page_df)
203
+ return html_output, page_text, prev_vis, next_vis, new_page
204
+
205
+ def show_input_page():
206
+ return gr.update(visible=True), gr.update(visible=False)
207
+
208
+ with gr.Blocks() as demo:
209
+ gr.Markdown("# Clinical Trials Search Tool with Pagination and Inline Read More")
210
+ with gr.Column(visible=True) as input_page:
211
+ gr.Markdown("Find **recruiting US clinical trials** that match your **age**, **sex**, **state**, and optional **keywords**.")
212
+ with gr.Row():
213
+ age_input = gr.Number(label="Your Age", value=30)
214
+ sex_input = gr.Dropdown(["Male", "Female", "All"], label="Sex", value="All")
215
+ with gr.Row():
216
+ state_input = gr.Dropdown(US_STATES, label="State", value="California")
217
+ keywords_input = gr.Textbox(label="Keywords", placeholder="e.g., Cancer, Diabetes")
218
+ search_btn = gr.Button("Search Trials")
219
+ with gr.Column(visible=False) as results_page:
220
+ output_html = gr.HTML()
221
+ total_pages_text = gr.Textbox(value="", interactive=False)
222
+ with gr.Row():
223
+ prev_btn = gr.Button("Previous Page")
224
+ next_btn = gr.Button("Next Page")
225
+ back_btn = gr.Button("Back")
226
+ page_num_state = gr.State(0)
227
+ total_pages_state = gr.State(0)
228
+ full_results_state = gr.State(None)
229
  search_btn.click(
230
+ fn=on_search,
231
  inputs=[age_input, sex_input, state_input, keywords_input],
232
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state, total_pages_state, full_results_state, input_page, results_page]
233
+ )
234
+ next_btn.click(
235
+ fn=on_page_change,
236
+ inputs=[gr.State(1), page_num_state, total_pages_state, full_results_state],
237
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
238
+ )
239
+ prev_btn.click(
240
+ fn=on_page_change,
241
+ inputs=[gr.State(-1), page_num_state, total_pages_state, full_results_state],
242
+ outputs=[output_html, total_pages_text, prev_btn, next_btn, page_num_state]
243
+ )
244
+ back_btn.click(
245
+ fn=show_input_page,
246
+ outputs=[input_page, results_page]
247
  )
248
 
249
  if __name__ == "__main__":