jfataphd commited on
Commit
d4a2975
1 Parent(s): f2f40f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -34
app.py CHANGED
@@ -63,7 +63,7 @@ st.header(f":blue[{database_name} Pubmed corpus.]")
63
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
64
  query = text_input_value
65
  query = query.lower()
66
- query = re.sub("[,.?!&*;:]", "", query)
67
  matches = [" "]
68
  if any([x in query for x in matches]):
69
  st.write("Please only enter one term or a term without spaces")
@@ -171,9 +171,25 @@ if query:
171
  print()
172
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
173
  # time.sleep(2)
174
-
175
-
176
- df10 = df1.head(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  df10.index = (1 / df10.index)*10000
178
  sizes = df10.index.tolist()
179
 
@@ -191,51 +207,62 @@ if query:
191
  fig2.patch.set_facecolor('#CCFFFF')
192
  #
193
  df3 = df1.copy()
 
194
  df3.reset_index(inplace=True)
195
  df3 = df3.rename(columns={'Human Gene': 'symbol2'})
196
  # Use df.query to get a subset of df1 based on ids in df2
197
- subset = df3.head(10).query('symbol2 in @df2.symbol2')
198
  # Use merge to join the two DataFrames on id
199
  result = pd.merge(subset, df2, on='symbol2')
200
  # Show the result
201
  # print(result)
202
 
203
  df = df10
204
-
205
- # Define the `text` column for labels and `href` column for links
206
- df['text'] = df10.index
207
- df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
208
  '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10.index]
209
- df['href2'] = [f'https://www.ncbi.nlm.nih.gov/gene/?term=' + c for c in df10.index]
210
- df['name'] = [c for c in result['Approved name']]
211
- df['database'] = database_name
212
- # print(df['name'])
213
-
214
- # Create the treemap using `px.treemap`
215
- fig = px.treemap(df, path=[df10.index], values=sizes, hover_data=['SIMILARITY'],
216
- custom_data=['href', 'name', 'database', 'href2'])
217
- # fig.update_traces(texttemplate="Gene: <b>%{label}</b><br>Gene "
218
- # "name: %{customdata[1]}<br><a href='%{customdata[0]}'>PubMed Abstracts with %{label} "
219
- # "in %{customdata[2]}</a><br><a href='%{customdata[3]}'>NCBI gene information for %{label} "
220
- # "in %{customdata[2]}</a>")
221
- fig.update(layout_coloraxis_showscale=False)
222
- fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF")
223
- fig.update_annotations(visible=False)
224
- fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
225
  hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
226
- texttemplate="<b>%{label}</b><br>%{customdata[1]}<br><a href='%{customdata[0]}'>PubMed"
 
 
227
  "</a><br><a href='%{customdata[3]}'>NCBI"
228
- "</a>")
229
- fig.update_layout(uniformtext=dict(minsize=10, mode='hide'), treemapcolorway=["lightblue"])
230
- # # display the treemap in Streamlit
231
- # with treemap2:
232
- st.subheader(f"Top 10 Genes closely related to {query}")
233
  # st.pyplot(fig2)
234
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
235
 
236
- csv = df1.head(100).to_csv().encode('utf-8')
237
- st.download_button(label="download top 100 genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
 
238
  mime='text/csv')
 
 
 
239
  st.markdown("---")
240
  st.subheader("Cancer-related videos")
241
  if query:
 
63
  text_input_value = st.text_input(f"Enter one term to search within the {database_name} corpus")
64
  query = text_input_value
65
  query = query.lower()
66
+ query = re.sub("[,.?!&*;: ]", "", query)
67
  matches = [" "]
68
  if any([x in query for x in matches]):
69
  st.write("Please only enter one term or a term without spaces")
 
171
  print()
172
  # df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
173
  # time.sleep(2)
174
+ # Create the slider with increments of 5 up to 100
175
+
176
+ st.markdown(f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap with the slider below to visualize "
177
+ f"<span style='color:red; font-style: italic;'>genes</span> contextually "
178
+ f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
179
+ f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
180
+ unsafe_allow_html=True)
181
+ value = st.slider("", 0, 100, step=5)
182
+ if value > 0:
183
+ # st.subheader(f"Top {value} genes closely related to {query}: "
184
+ # f"Click on the Pubmed and NCBI links for more gene information")
185
+
186
+ st.markdown(
187
+ f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value} "
188
+ f"</span>genes similar to "
189
+ f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Pubmed and NCBI links for more gene information</span></p></b>",
190
+ unsafe_allow_html=True)
191
+
192
+ df10 = df1.head(value)
193
  df10.index = (1 / df10.index)*10000
194
  sizes = df10.index.tolist()
195
 
 
207
  fig2.patch.set_facecolor('#CCFFFF')
208
  #
209
  df3 = df1.copy()
210
+ df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value)["SIMILARITY"].round(2).astype(str)
211
  df3.reset_index(inplace=True)
212
  df3 = df3.rename(columns={'Human Gene': 'symbol2'})
213
  # Use df.query to get a subset of df1 based on ids in df2
214
+ subset = df3.head(value).query('symbol2 in @df2.symbol2')
215
  # Use merge to join the two DataFrames on id
216
  result = pd.merge(subset, df2, on='symbol2')
217
  # Show the result
218
  # print(result)
219
 
220
  df = df10
221
+ try:
222
+ # Define the `text` column for labels and `href` column for links
223
+ df['text'] = df10.index
224
+ df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
225
  '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10.index]
226
+ df['href2'] = [f'https://www.ncbi.nlm.nih.gov/gene/?term=' + c for c in df10.index]
227
+
228
+ df['name'] = [c for c in result['Approved name']]
229
+
230
+ df['database'] = database_name
231
+
232
+ # print(df['name'])
233
+
234
+ # Create the treemap using `px.treemap`
235
+ fig = px.treemap(df, path=[df10.index], values=sizes,
236
+ custom_data=['href', 'name', 'database', 'href2'], hover_name=(df3.head(value)['SIMILARITY']))
237
+
238
+ fig.update(layout_coloraxis_showscale=False)
239
+ fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
240
+ fig.update_annotations(visible=False)
241
+ fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
242
  hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
243
+ texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{label}</span></b><br><span "
244
+ "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
245
+ "<a href='%{customdata[0]}'>PubMed"
246
  "</a><br><a href='%{customdata[3]}'>NCBI"
247
+ "</span></a>")
248
+ fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightblue"])
249
+ # # display the treemap in Streamlit
250
+ # with treemap2:
251
+
252
  # st.pyplot(fig2)
253
+ st.plotly_chart(fig, use_container_width=True)
254
+
255
+ st.caption("Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
256
+ st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
257
+
258
 
259
+
260
+ csv = df1.head(value).to_csv().encode('utf-8')
261
+ st.download_button(label=f"download top {value} genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
262
  mime='text/csv')
263
+ except:
264
+ st.warning(
265
+ f"This selection exceeds the number of similar genes related to {query} within the {database_name} corpus")
266
  st.markdown("---")
267
  st.subheader("Cancer-related videos")
268
  if query: