m7n commited on
Commit
394512b
·
verified ·
1 Parent(s): 4f40d11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -9
app.py CHANGED
@@ -176,27 +176,27 @@ def predict(text_input, progress=gr.Progress()):
176
  print(records_df)
177
 
178
 
179
- progress(0.2, desc="Embedding Data...")
180
  texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
181
 
182
  embeddings = create_embeddings(texts_to_embedd)
183
  print(embeddings)
184
 
185
- progress(0.3, desc="Project into UMAP-embedding...")
186
  umap_embeddings = mapper.transform(embeddings)
187
  records_df[['x','y']] = umap_embeddings
188
 
189
  basedata_df['color'] = '#ced4d211'
190
  records_df['color'] = '#a81a26'
191
 
192
- progress(0.4, desc="Set up data...")
193
 
194
- stacked_df = pd.concat([records_df, basedata_df], axis=0, ignore_index=True)
195
  stacked_df = stacked_df.fillna("Unlabelled")
196
  stacked_df = stacked_df.reset_index(drop=True)
197
  print(stacked_df)
198
 
199
-
200
 
201
 
202
  file_name = f"{datetime.utcnow().strftime('%s')}.html"
@@ -205,16 +205,76 @@ def predict(text_input, progress=gr.Progress()):
205
 
206
  #
207
 
208
- progress(0.5, desc="Plotting...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
 
210
  plot = datamapplot.create_interactive_plot(
211
  stacked_df[['x','y']].values,
212
  np.array(stacked_df['cluster_1_labels']),np.array(stacked_df['cluster_2_labels']),np.array(stacked_df['cluster_3_labels']),
213
  hover_text=[str(ix) + ', ' + str(row['parsed_publication']) + str(row['title']) for ix, row in stacked_df.iterrows()],
214
- font_family="Roboto Condensed",marker_color_array=stacked_df['color']
215
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- progress(0.6, desc="Saving plot...")
218
  plot.save(file_path)
219
 
220
  progress(1.0, desc="Done!")
@@ -222,6 +282,15 @@ def predict(text_input, progress=gr.Progress()):
222
  link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
223
  return link, iframe
224
 
 
 
 
 
 
 
 
 
 
225
  with gr.Blocks() as block:
226
  gr.Markdown("""
227
  ## Gradio + FastAPI + Static Server
@@ -240,6 +309,14 @@ The Gradio app generates dynamic HTML files and stores them in a static director
240
 
241
 
242
 
 
 
 
 
 
 
 
 
243
  def setup_basemap_data():
244
  # get data.
245
  print("getting basemap data...")
 
176
  print(records_df)
177
 
178
 
179
+ progress(0.3, desc="Embedding Data...")
180
  texts_to_embedd = [title + tokenizer.sep_token + publication + tokenizer.sep_token + abstract for title, publication, abstract in zip(records_df['title'],records_df['parsed_publication'], records_df['abstract'])]
181
 
182
  embeddings = create_embeddings(texts_to_embedd)
183
  print(embeddings)
184
 
185
+ progress(0.5, desc="Project into UMAP-embedding...")
186
  umap_embeddings = mapper.transform(embeddings)
187
  records_df[['x','y']] = umap_embeddings
188
 
189
  basedata_df['color'] = '#ced4d211'
190
  records_df['color'] = '#a81a26'
191
 
192
+ progress(0.6, desc="Set up data...")
193
 
194
+ stacked_df = pd.concat([basedata_df,records_df], axis=0, ignore_index=True)
195
  stacked_df = stacked_df.fillna("Unlabelled")
196
  stacked_df = stacked_df.reset_index(drop=True)
197
  print(stacked_df)
198
 
199
+ extra_data = pd.DataFrame(stacked_df['doi'])
200
 
201
 
202
  file_name = f"{datetime.utcnow().strftime('%s')}.html"
 
205
 
206
  #
207
 
208
+ progress(0.7, desc="Plotting...")
209
+
210
+ custom_css = """
211
+
212
+
213
+ #title-container {
214
+ background: #edededaa;
215
+ border-radius: 2px;
216
+
217
+ box-shadow: 2px 3px 10px #aaaaaa00;
218
+ }
219
+
220
+
221
+
222
+ #search-container {
223
+ position: fixed !important;
224
+ top: 20px !important;
225
+ right: 20px !important;
226
+ left: auto !important;
227
+ width: 200px !important;
228
+ z-index: 9999 !important;
229
+ }
230
+
231
+ #search {
232
+ // padding: 8px 8px !important;
233
+ // border: none !important;
234
+ // border-radius: 20px !important;
235
+ background-color: #ffffffaa !important;
236
+ font-family: 'Roboto Condensed', sans-serif !important;
237
+ font-size: 14px;
238
+ // box-shadow: 0 0px 0px #aaaaaa00 !important;
239
+ }
240
+
241
+
242
+
243
+ """
244
 
245
+
246
  plot = datamapplot.create_interactive_plot(
247
  stacked_df[['x','y']].values,
248
  np.array(stacked_df['cluster_1_labels']),np.array(stacked_df['cluster_2_labels']),np.array(stacked_df['cluster_3_labels']),
249
  hover_text=[str(ix) + ', ' + str(row['parsed_publication']) + str(row['title']) for ix, row in stacked_df.iterrows()],
250
+ marker_color_array=stacked_df['color'],
251
+
252
+ use_medoids=True,
253
+ width=1100,
254
+ height=900,
255
+ # title='The Science of <span style="color:#ab0b00;"> Consciousness </span>',
256
+ # sub_title=f'<div style="margin-top:20px;"> Large sample, n={len(dataset_df_filtered)}, embeddings with specter 2 & UMAP, labels: Claude 3.5 Sonnet </div>',
257
+ point_radius_min_pixels=1,
258
+ text_outline_width=5,
259
+ point_hover_color='#ab0b00',
260
+ point_radius_max_pixels=7,
261
+ color_label_text=False,
262
+ font_family="Roboto Condensed",
263
+ font_weight=700,
264
+ tooltip_font_weight=600,
265
+ tooltip_font_family="Roboto Condensed",
266
+ extra_point_data=extra_data,
267
+ on_click="window.open(`{doi}`)",
268
+ custom_css=custom_css,
269
+ initial_zoom_fraction=.5,
270
+ enable_search=True)
271
+
272
+
273
+
274
+
275
+
276
 
277
+ progress(0.9, desc="Saving plot...")
278
  plot.save(file_path)
279
 
280
  progress(1.0, desc="Done!")
 
282
  link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
283
  return link, iframe
284
 
285
+
286
+
287
+
288
+
289
+
290
+ ################ MAIN BLOCK #####################
291
+
292
+
293
+
294
  with gr.Blocks() as block:
295
  gr.Markdown("""
296
  ## Gradio + FastAPI + Static Server
 
309
 
310
 
311
 
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
  def setup_basemap_data():
321
  # get data.
322
  print("getting basemap data...")