rjadr commited on
Commit
3ba5f94
β€’
1 Parent(s): e2c87f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -64
app.py CHANGED
@@ -30,15 +30,13 @@ def load_dataset():
30
  dataset = datasets.load_dataset('rjadr/ditaduranuncamais', split='train', use_auth_token=token)
31
  dataset.add_faiss_index(column="txt_embs")
32
  dataset.add_faiss_index(column="img_embs")
33
- dataset = dataset.remove_columns(['Post Created','Post Created Time','Like and View Counts Disabled','Link','Photo','Title','Sponsor Id','Sponsor Name','Download URL', 'image', 'Views', 'text_full'])
34
  return dataset
35
 
36
  @st.cache_data(show_spinner=False)
37
  def load_dataframe(_dataset):
38
  dataframe = _dataset.remove_columns(['txt_embs', 'img_embs']).to_pandas()
39
  dataframe['image_base64'] = dataframe['image_base64'].str.decode('utf-8')
40
- dataframe['Overperforming Score (weighted β€” Likes 1x Comments 1x )'] = dataframe['Overperforming Score (weighted β€” Likes 1x Comments 1x )'].str.replace(',','').astype(float)
41
- dataframe['Total Interactions'] = dataframe['Total Interactions'].str.replace(',','').astype(int)
42
  return dataframe
43
 
44
  @st.cache_resource(show_spinner=True)
@@ -166,7 +164,7 @@ def postprocess_results(scores, samples):
166
  samples_df["score"].max() - samples_df["score"].min())) * 100
167
  samples_df["score"] = samples_df["score"].astype(int)
168
  samples_df.reset_index(inplace=True, drop=True)
169
- samples_df = samples_df[['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name'] + [col for col in samples_df.columns if col not in ['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name']]]
170
  return samples_df.drop(columns=['txt_embs', 'img_embs'])
171
 
172
  @st.cache_data
@@ -268,76 +266,36 @@ if check_password():
268
  tab1, tab2, tab3 = st.tabs(["Data exploration", "Semantic search", "Stats"])
269
 
270
  with tab1:
271
- # Initialization
272
- if 'rows_per_page' not in st.session_state:
273
- st.session_state['rows_per_page'] = 25
274
- if 'page_number' not in st.session_state:
275
- st.session_state['page_number'] = 1
276
-
277
- filtered_df = filter_dataframe(df)
278
- max_page = -(-len(filtered_df) // st.session_state['rows_per_page']) # ceiling division
279
-
280
- start_index = st.session_state['rows_per_page'] * (st.session_state['page_number'] - 1)
281
- end_index = start_index + st.session_state['rows_per_page']
282
- sub_df = filtered_df.iloc[start_index:end_index]
283
- # sort columms order: Post Created Date, image_base64, Description, Image Text, Account, User Name and then the rest
284
- sub_df = sub_df[['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name'] + [col for col in sub_df.columns if col not in ['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name']]]
285
-
286
- col1, col2, col3, col4 = st.columns(4)
287
-
288
- with col4:
289
- rows_per_page = st.selectbox('Rows per page', [25, 50, 75, 100, 150, 200], index=0, key='rows_per_page_select')
290
- if rows_per_page != st.session_state['rows_per_page']:
291
- st.session_state['rows_per_page'] = rows_per_page
292
- st.session_state['page_number'] = 1 # Reset page number when rows per page changes
293
- st.experimental_rerun()
294
-
295
- with col2:
296
- page_select = st.selectbox('Jump to page', options=range(1, max_page + 1), index=st.session_state['page_number']-1, key='page_number_select')
297
- if page_select != st.session_state['page_number']:
298
- st.session_state['page_number'] = page_select
299
- st.experimental_rerun()
300
-
301
- with col1:
302
- if st.button('Previous'):
303
- st.session_state['page_number'] = max(1, st.session_state['page_number'] - 1)
304
- st.experimental_rerun()
305
-
306
- with col3:
307
- if st.button('Next'):
308
- st.session_state['page_number'] = min(max_page, st.session_state['page_number'] + 1)
309
- st.experimental_rerun()
310
-
311
  st.dataframe(
312
- data=sub_df,
 
313
  column_config={
314
- "image_base64": st.column_config.ImageColumn(
315
- "image", help="Instagram image"
316
  ),
317
  "URL": st.column_config.LinkColumn(
318
- "link", help="Instagram link", width="small"
319
  )
320
  },
321
- # hide_index=True,
322
  )
323
 
324
-
325
  with tab2:
326
  tabs = ["Text to Text", "Text to Image", "Image to Image", "Image to Text"]
327
  selected_tab = st.radio("Select a search type", tabs)
328
 
329
  if selected_tab == "Text to Text":
330
  text_to_text_input = st.text_input("Enter text")
331
- text_to_text_k_top = st.slider("Number of results", 1, 20, 8)
332
  if st.button("Search"):
333
  st.dataframe(
334
  data=text_to_text(text_to_text_input, text_to_text_k_top),
335
  column_config={
336
- "image_base64": st.column_config.ImageColumn(
337
- "image", help="Instagram image"
338
  ),
339
  "URL": st.column_config.LinkColumn(
340
- "link", help="Instagram link", width="small"
341
  )
342
  },
343
  hide_index=True,
@@ -345,50 +303,50 @@ if check_password():
345
 
346
  elif selected_tab == "Text to Image":
347
  text_to_image_input = st.text_input("Enter text")
348
- text_to_image_k_top = st.slider("Number of results", 1, 20, 8)
349
  if st.button("Search"):
350
  st.dataframe(
351
  data=text_to_image(text_to_image_input, text_to_image_k_top),
352
  column_config={
353
  "image_base64": st.column_config.ImageColumn(
354
- "image", help="Instagram image"
355
  ),
356
  "URL": st.column_config.LinkColumn(
357
- "link", help="Instagram link", width="small"
358
  )
359
  },
360
  hide_index=True,
361
  )
362
 
363
  elif selected_tab == "Image to Image":
364
- image_to_image_k_top = st.slider("Number of results", 1, 20, 8)
365
  image_to_image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
366
  if st.button("Search"):
367
  st.dataframe(
368
  data=image_to_image(image_to_image_input, image_to_image_k_top),
369
  column_config={
370
  "image_base64": st.column_config.ImageColumn(
371
- "image", help="Instagram image"
372
  ),
373
  "URL": st.column_config.LinkColumn(
374
- "link", help="Instagram link", width="small"
375
  )
376
  },
377
  hide_index=True,
378
  )
379
 
380
  elif selected_tab == "Image to Text":
381
- image_to_text_k_top = st.slider("Number of results", 1, 20, 8)
382
  image_to_text_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
383
  if st.button("Search"):
384
  st.dataframe(
385
  data=image_to_text(image_to_text_input, image_to_text_k_top),
386
  column_config={
387
  "image_base64": st.column_config.ImageColumn(
388
- "image", help="Instagram image"
389
  ),
390
  "URL": st.column_config.LinkColumn(
391
- "link", help="Instagram link", width="small"
392
  )
393
  },
394
  hide_index=True,
@@ -413,7 +371,7 @@ if check_password():
413
  # Dropdown to select time resampling
414
  resample_time = st.selectbox('Select Time Resampling', list(resample_dict.keys()))
415
 
416
- df_filtered = df.set_index('Post Created Date')
417
 
418
  # Slider for date range selection
419
  min_date = df_filtered.index.min().date()
 
30
  dataset = datasets.load_dataset('rjadr/ditaduranuncamais', split='train', use_auth_token=token)
31
  dataset.add_faiss_index(column="txt_embs")
32
  dataset.add_faiss_index(column="img_embs")
33
+ dataset = dataset.remove_columns(['Post Created','Like and View Counts Disabled','Link','Download URL','Views'])
34
  return dataset
35
 
36
  @st.cache_data(show_spinner=False)
37
  def load_dataframe(_dataset):
38
  dataframe = _dataset.remove_columns(['txt_embs', 'img_embs']).to_pandas()
39
  dataframe['image_base64'] = dataframe['image_base64'].str.decode('utf-8')
 
 
40
  return dataframe
41
 
42
  @st.cache_resource(show_spinner=True)
 
164
  samples_df["score"].max() - samples_df["score"].min())) * 100
165
  samples_df["score"] = samples_df["score"].astype(int)
166
  samples_df.reset_index(inplace=True, drop=True)
167
+ samples_df = samples_df[['Post Created', 'image', 'Description', 'Image Text', 'Account', 'User Name'] + [col for col in samples_df.columns if col not in ['Post Created', 'image', 'Description', 'Image Text', 'Account', 'User Name']]]
168
  return samples_df.drop(columns=['txt_embs', 'img_embs'])
169
 
170
  @st.cache_data
 
266
  tab1, tab2, tab3 = st.tabs(["Data exploration", "Semantic search", "Stats"])
267
 
268
  with tab1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  st.dataframe(
270
+ data=filter_dataframe(df),
271
+ # use_container_width=True,
272
  column_config={
273
+ "image": st.column_config.ImageColumn(
274
+ "Image", help="Instagram image"
275
  ),
276
  "URL": st.column_config.LinkColumn(
277
+ "Link", help="Instagram link", width="small"
278
  )
279
  },
280
+ hide_index=True,
281
  )
282
 
 
283
  with tab2:
284
  tabs = ["Text to Text", "Text to Image", "Image to Image", "Image to Text"]
285
  selected_tab = st.radio("Select a search type", tabs)
286
 
287
  if selected_tab == "Text to Text":
288
  text_to_text_input = st.text_input("Enter text")
289
+ text_to_text_k_top = st.slider("Number of results", 1, 60, 8)
290
  if st.button("Search"):
291
  st.dataframe(
292
  data=text_to_text(text_to_text_input, text_to_text_k_top),
293
  column_config={
294
+ "image": st.column_config.ImageColumn(
295
+ "Image", help="Instagram image"
296
  ),
297
  "URL": st.column_config.LinkColumn(
298
+ "Link", help="Instagram link", width="small"
299
  )
300
  },
301
  hide_index=True,
 
303
 
304
  elif selected_tab == "Text to Image":
305
  text_to_image_input = st.text_input("Enter text")
306
+ text_to_image_k_top = st.slider("Number of results", 1, 60, 8)
307
  if st.button("Search"):
308
  st.dataframe(
309
  data=text_to_image(text_to_image_input, text_to_image_k_top),
310
  column_config={
311
  "image_base64": st.column_config.ImageColumn(
312
+ "Image", help="Instagram image"
313
  ),
314
  "URL": st.column_config.LinkColumn(
315
+ "Link", help="Instagram link", width="small"
316
  )
317
  },
318
  hide_index=True,
319
  )
320
 
321
  elif selected_tab == "Image to Image":
322
+ image_to_image_k_top = st.slider("Number of results", 1, 60, 8)
323
  image_to_image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
324
  if st.button("Search"):
325
  st.dataframe(
326
  data=image_to_image(image_to_image_input, image_to_image_k_top),
327
  column_config={
328
  "image_base64": st.column_config.ImageColumn(
329
+ "Image", help="Instagram image"
330
  ),
331
  "URL": st.column_config.LinkColumn(
332
+ "Link", help="Instagram link", width="small"
333
  )
334
  },
335
  hide_index=True,
336
  )
337
 
338
  elif selected_tab == "Image to Text":
339
+ image_to_text_k_top = st.slider("Number of results", 1, 60, 8)
340
  image_to_text_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
341
  if st.button("Search"):
342
  st.dataframe(
343
  data=image_to_text(image_to_text_input, image_to_text_k_top),
344
  column_config={
345
  "image_base64": st.column_config.ImageColumn(
346
+ "Image", help="Instagram image"
347
  ),
348
  "URL": st.column_config.LinkColumn(
349
+ "Link", help="Instagram link", width="small"
350
  )
351
  },
352
  hide_index=True,
 
371
  # Dropdown to select time resampling
372
  resample_time = st.selectbox('Select Time Resampling', list(resample_dict.keys()))
373
 
374
+ df_filtered = df.set_index('Post Created')
375
 
376
  # Slider for date range selection
377
  min_date = df_filtered.index.min().date()