Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -30,15 +30,13 @@ def load_dataset():
|
|
30 |
dataset = datasets.load_dataset('rjadr/ditaduranuncamais', split='train', use_auth_token=token)
|
31 |
dataset.add_faiss_index(column="txt_embs")
|
32 |
dataset.add_faiss_index(column="img_embs")
|
33 |
-
dataset = dataset.remove_columns(['Post Created','
|
34 |
return dataset
|
35 |
|
36 |
@st.cache_data(show_spinner=False)
|
37 |
def load_dataframe(_dataset):
|
38 |
dataframe = _dataset.remove_columns(['txt_embs', 'img_embs']).to_pandas()
|
39 |
dataframe['image_base64'] = dataframe['image_base64'].str.decode('utf-8')
|
40 |
-
dataframe['Overperforming Score (weighted β Likes 1x Comments 1x )'] = dataframe['Overperforming Score (weighted β Likes 1x Comments 1x )'].str.replace(',','').astype(float)
|
41 |
-
dataframe['Total Interactions'] = dataframe['Total Interactions'].str.replace(',','').astype(int)
|
42 |
return dataframe
|
43 |
|
44 |
@st.cache_resource(show_spinner=True)
|
@@ -166,7 +164,7 @@ def postprocess_results(scores, samples):
|
|
166 |
samples_df["score"].max() - samples_df["score"].min())) * 100
|
167 |
samples_df["score"] = samples_df["score"].astype(int)
|
168 |
samples_df.reset_index(inplace=True, drop=True)
|
169 |
-
samples_df = samples_df[['Post Created
|
170 |
return samples_df.drop(columns=['txt_embs', 'img_embs'])
|
171 |
|
172 |
@st.cache_data
|
@@ -268,76 +266,36 @@ if check_password():
|
|
268 |
tab1, tab2, tab3 = st.tabs(["Data exploration", "Semantic search", "Stats"])
|
269 |
|
270 |
with tab1:
|
271 |
-
# Initialization
|
272 |
-
if 'rows_per_page' not in st.session_state:
|
273 |
-
st.session_state['rows_per_page'] = 25
|
274 |
-
if 'page_number' not in st.session_state:
|
275 |
-
st.session_state['page_number'] = 1
|
276 |
-
|
277 |
-
filtered_df = filter_dataframe(df)
|
278 |
-
max_page = -(-len(filtered_df) // st.session_state['rows_per_page']) # ceiling division
|
279 |
-
|
280 |
-
start_index = st.session_state['rows_per_page'] * (st.session_state['page_number'] - 1)
|
281 |
-
end_index = start_index + st.session_state['rows_per_page']
|
282 |
-
sub_df = filtered_df.iloc[start_index:end_index]
|
283 |
-
# sort columms order: Post Created Date, image_base64, Description, Image Text, Account, User Name and then the rest
|
284 |
-
sub_df = sub_df[['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name'] + [col for col in sub_df.columns if col not in ['Post Created Date', 'image_base64', 'Description', 'Image Text', 'Account', 'User Name']]]
|
285 |
-
|
286 |
-
col1, col2, col3, col4 = st.columns(4)
|
287 |
-
|
288 |
-
with col4:
|
289 |
-
rows_per_page = st.selectbox('Rows per page', [25, 50, 75, 100, 150, 200], index=0, key='rows_per_page_select')
|
290 |
-
if rows_per_page != st.session_state['rows_per_page']:
|
291 |
-
st.session_state['rows_per_page'] = rows_per_page
|
292 |
-
st.session_state['page_number'] = 1 # Reset page number when rows per page changes
|
293 |
-
st.experimental_rerun()
|
294 |
-
|
295 |
-
with col2:
|
296 |
-
page_select = st.selectbox('Jump to page', options=range(1, max_page + 1), index=st.session_state['page_number']-1, key='page_number_select')
|
297 |
-
if page_select != st.session_state['page_number']:
|
298 |
-
st.session_state['page_number'] = page_select
|
299 |
-
st.experimental_rerun()
|
300 |
-
|
301 |
-
with col1:
|
302 |
-
if st.button('Previous'):
|
303 |
-
st.session_state['page_number'] = max(1, st.session_state['page_number'] - 1)
|
304 |
-
st.experimental_rerun()
|
305 |
-
|
306 |
-
with col3:
|
307 |
-
if st.button('Next'):
|
308 |
-
st.session_state['page_number'] = min(max_page, st.session_state['page_number'] + 1)
|
309 |
-
st.experimental_rerun()
|
310 |
-
|
311 |
st.dataframe(
|
312 |
-
data=
|
|
|
313 |
column_config={
|
314 |
-
"
|
315 |
-
"
|
316 |
),
|
317 |
"URL": st.column_config.LinkColumn(
|
318 |
-
"
|
319 |
)
|
320 |
},
|
321 |
-
|
322 |
)
|
323 |
|
324 |
-
|
325 |
with tab2:
|
326 |
tabs = ["Text to Text", "Text to Image", "Image to Image", "Image to Text"]
|
327 |
selected_tab = st.radio("Select a search type", tabs)
|
328 |
|
329 |
if selected_tab == "Text to Text":
|
330 |
text_to_text_input = st.text_input("Enter text")
|
331 |
-
text_to_text_k_top = st.slider("Number of results", 1,
|
332 |
if st.button("Search"):
|
333 |
st.dataframe(
|
334 |
data=text_to_text(text_to_text_input, text_to_text_k_top),
|
335 |
column_config={
|
336 |
-
"
|
337 |
-
"
|
338 |
),
|
339 |
"URL": st.column_config.LinkColumn(
|
340 |
-
"
|
341 |
)
|
342 |
},
|
343 |
hide_index=True,
|
@@ -345,50 +303,50 @@ if check_password():
|
|
345 |
|
346 |
elif selected_tab == "Text to Image":
|
347 |
text_to_image_input = st.text_input("Enter text")
|
348 |
-
text_to_image_k_top = st.slider("Number of results", 1,
|
349 |
if st.button("Search"):
|
350 |
st.dataframe(
|
351 |
data=text_to_image(text_to_image_input, text_to_image_k_top),
|
352 |
column_config={
|
353 |
"image_base64": st.column_config.ImageColumn(
|
354 |
-
"
|
355 |
),
|
356 |
"URL": st.column_config.LinkColumn(
|
357 |
-
"
|
358 |
)
|
359 |
},
|
360 |
hide_index=True,
|
361 |
)
|
362 |
|
363 |
elif selected_tab == "Image to Image":
|
364 |
-
image_to_image_k_top = st.slider("Number of results", 1,
|
365 |
image_to_image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
366 |
if st.button("Search"):
|
367 |
st.dataframe(
|
368 |
data=image_to_image(image_to_image_input, image_to_image_k_top),
|
369 |
column_config={
|
370 |
"image_base64": st.column_config.ImageColumn(
|
371 |
-
"
|
372 |
),
|
373 |
"URL": st.column_config.LinkColumn(
|
374 |
-
"
|
375 |
)
|
376 |
},
|
377 |
hide_index=True,
|
378 |
)
|
379 |
|
380 |
elif selected_tab == "Image to Text":
|
381 |
-
image_to_text_k_top = st.slider("Number of results", 1,
|
382 |
image_to_text_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
383 |
if st.button("Search"):
|
384 |
st.dataframe(
|
385 |
data=image_to_text(image_to_text_input, image_to_text_k_top),
|
386 |
column_config={
|
387 |
"image_base64": st.column_config.ImageColumn(
|
388 |
-
"
|
389 |
),
|
390 |
"URL": st.column_config.LinkColumn(
|
391 |
-
"
|
392 |
)
|
393 |
},
|
394 |
hide_index=True,
|
@@ -413,7 +371,7 @@ if check_password():
|
|
413 |
# Dropdown to select time resampling
|
414 |
resample_time = st.selectbox('Select Time Resampling', list(resample_dict.keys()))
|
415 |
|
416 |
-
df_filtered = df.set_index('Post Created
|
417 |
|
418 |
# Slider for date range selection
|
419 |
min_date = df_filtered.index.min().date()
|
|
|
30 |
dataset = datasets.load_dataset('rjadr/ditaduranuncamais', split='train', use_auth_token=token)
|
31 |
dataset.add_faiss_index(column="txt_embs")
|
32 |
dataset.add_faiss_index(column="img_embs")
|
33 |
+
dataset = dataset.remove_columns(['Post Created','Like and View Counts Disabled','Link','Download URL','Views'])
|
34 |
return dataset
|
35 |
|
36 |
@st.cache_data(show_spinner=False)
|
37 |
def load_dataframe(_dataset):
|
38 |
dataframe = _dataset.remove_columns(['txt_embs', 'img_embs']).to_pandas()
|
39 |
dataframe['image_base64'] = dataframe['image_base64'].str.decode('utf-8')
|
|
|
|
|
40 |
return dataframe
|
41 |
|
42 |
@st.cache_resource(show_spinner=True)
|
|
|
164 |
samples_df["score"].max() - samples_df["score"].min())) * 100
|
165 |
samples_df["score"] = samples_df["score"].astype(int)
|
166 |
samples_df.reset_index(inplace=True, drop=True)
|
167 |
+
samples_df = samples_df[['Post Created', 'image', 'Description', 'Image Text', 'Account', 'User Name'] + [col for col in samples_df.columns if col not in ['Post Created', 'image', 'Description', 'Image Text', 'Account', 'User Name']]]
|
168 |
return samples_df.drop(columns=['txt_embs', 'img_embs'])
|
169 |
|
170 |
@st.cache_data
|
|
|
266 |
tab1, tab2, tab3 = st.tabs(["Data exploration", "Semantic search", "Stats"])
|
267 |
|
268 |
with tab1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
st.dataframe(
|
270 |
+
data=filter_dataframe(df),
|
271 |
+
# use_container_width=True,
|
272 |
column_config={
|
273 |
+
"image": st.column_config.ImageColumn(
|
274 |
+
"Image", help="Instagram image"
|
275 |
),
|
276 |
"URL": st.column_config.LinkColumn(
|
277 |
+
"Link", help="Instagram link", width="small"
|
278 |
)
|
279 |
},
|
280 |
+
hide_index=True,
|
281 |
)
|
282 |
|
|
|
283 |
with tab2:
|
284 |
tabs = ["Text to Text", "Text to Image", "Image to Image", "Image to Text"]
|
285 |
selected_tab = st.radio("Select a search type", tabs)
|
286 |
|
287 |
if selected_tab == "Text to Text":
|
288 |
text_to_text_input = st.text_input("Enter text")
|
289 |
+
text_to_text_k_top = st.slider("Number of results", 1, 60, 8)
|
290 |
if st.button("Search"):
|
291 |
st.dataframe(
|
292 |
data=text_to_text(text_to_text_input, text_to_text_k_top),
|
293 |
column_config={
|
294 |
+
"image": st.column_config.ImageColumn(
|
295 |
+
"Image", help="Instagram image"
|
296 |
),
|
297 |
"URL": st.column_config.LinkColumn(
|
298 |
+
"Link", help="Instagram link", width="small"
|
299 |
)
|
300 |
},
|
301 |
hide_index=True,
|
|
|
303 |
|
304 |
elif selected_tab == "Text to Image":
|
305 |
text_to_image_input = st.text_input("Enter text")
|
306 |
+
text_to_image_k_top = st.slider("Number of results", 1, 60, 8)
|
307 |
if st.button("Search"):
|
308 |
st.dataframe(
|
309 |
data=text_to_image(text_to_image_input, text_to_image_k_top),
|
310 |
column_config={
|
311 |
"image_base64": st.column_config.ImageColumn(
|
312 |
+
"Image", help="Instagram image"
|
313 |
),
|
314 |
"URL": st.column_config.LinkColumn(
|
315 |
+
"Link", help="Instagram link", width="small"
|
316 |
)
|
317 |
},
|
318 |
hide_index=True,
|
319 |
)
|
320 |
|
321 |
elif selected_tab == "Image to Image":
|
322 |
+
image_to_image_k_top = st.slider("Number of results", 1, 60, 8)
|
323 |
image_to_image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
324 |
if st.button("Search"):
|
325 |
st.dataframe(
|
326 |
data=image_to_image(image_to_image_input, image_to_image_k_top),
|
327 |
column_config={
|
328 |
"image_base64": st.column_config.ImageColumn(
|
329 |
+
"Image", help="Instagram image"
|
330 |
),
|
331 |
"URL": st.column_config.LinkColumn(
|
332 |
+
"Link", help="Instagram link", width="small"
|
333 |
)
|
334 |
},
|
335 |
hide_index=True,
|
336 |
)
|
337 |
|
338 |
elif selected_tab == "Image to Text":
|
339 |
+
image_to_text_k_top = st.slider("Number of results", 1, 60, 8)
|
340 |
image_to_text_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
341 |
if st.button("Search"):
|
342 |
st.dataframe(
|
343 |
data=image_to_text(image_to_text_input, image_to_text_k_top),
|
344 |
column_config={
|
345 |
"image_base64": st.column_config.ImageColumn(
|
346 |
+
"Image", help="Instagram image"
|
347 |
),
|
348 |
"URL": st.column_config.LinkColumn(
|
349 |
+
"Link", help="Instagram link", width="small"
|
350 |
)
|
351 |
},
|
352 |
hide_index=True,
|
|
|
371 |
# Dropdown to select time resampling
|
372 |
resample_time = st.selectbox('Select Time Resampling', list(resample_dict.keys()))
|
373 |
|
374 |
+
df_filtered = df.set_index('Post Created')
|
375 |
|
376 |
# Slider for date range selection
|
377 |
min_date = df_filtered.index.min().date()
|