Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
82b97bd
1
Parent(s):
a1f9248
Update app.py
Browse files
app.py
CHANGED
@@ -47,7 +47,7 @@ DF_PREVIEW_ROWS = 100
|
|
47 |
# -------------
|
48 |
|
49 |
def setup_streamlit():
|
50 |
-
st.set_page_config(page_title="Keyword Relevance Test
|
51 |
st.title("Keyword Relevance Test Using Vector Embedding")
|
52 |
st.divider()
|
53 |
|
@@ -107,7 +107,7 @@ def calculate_relevancy_scores(df, model_type):
|
|
107 |
return df
|
108 |
|
109 |
def process_gsc_data(df):
|
110 |
-
#
|
111 |
df_sorted = df.sort_values(['impressions'], ascending=[False])
|
112 |
|
113 |
# Keep only the highest impression query for each page
|
@@ -245,74 +245,152 @@ def show_google_sign_in(auth_url):
|
|
245 |
with st.sidebar:
|
246 |
if st.button("Sign in with Google"):
|
247 |
st.write('Please click the link below to sign in:')
|
248 |
-
st.markdown(f'
|
249 |
-
|
250 |
-
def
|
251 |
-
st.
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
-
|
259 |
-
st.
|
260 |
-
|
|
|
|
|
|
|
261 |
|
262 |
-
|
263 |
-
st.
|
264 |
-
|
|
|
|
|
|
|
|
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
#
|
276 |
-
st.
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
def
|
281 |
setup_streamlit()
|
282 |
-
init_session_state()
|
283 |
client_config = load_config()
|
284 |
|
285 |
-
if
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
if code:
|
294 |
-
flow.fetch_token(authorization_response=st.experimental_get_query_params()["code"])
|
295 |
-
credentials = flow.credentials
|
296 |
-
st.session_state.site_properties = list_gsc_properties(credentials)
|
297 |
-
show_config_options()
|
298 |
|
299 |
-
if st.session_state.selected_property:
|
300 |
-
if st.sidebar.button("Fetch Data"):
|
301 |
-
start_date, end_date = calc_date_range(
|
302 |
-
st.session_state.selected_date_range,
|
303 |
-
st.session_state.custom_start_date,
|
304 |
-
st.session_state.custom_end_date
|
305 |
-
)
|
306 |
-
report = fetch_data_loading(
|
307 |
-
st.session_state.selected_property,
|
308 |
-
st.session_state.selected_search_type,
|
309 |
-
start_date,
|
310 |
-
end_date,
|
311 |
-
st.session_state.selected_dimensions,
|
312 |
-
st.session_state.selected_device
|
313 |
-
)
|
314 |
-
show_dataframe(report)
|
315 |
-
download_csv_link(report)
|
316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
if __name__ == "__main__":
|
318 |
-
|
|
|
47 |
# -------------
|
48 |
|
49 |
def setup_streamlit():
|
50 |
+
st.set_page_config(page_title="Keyword Relevance Test", layout="wide")
|
51 |
st.title("Keyword Relevance Test Using Vector Embedding")
|
52 |
st.divider()
|
53 |
|
|
|
107 |
return df
|
108 |
|
109 |
def process_gsc_data(df):
|
110 |
+
# Remove the filter for queries below position 10
|
111 |
df_sorted = df.sort_values(['impressions'], ascending=[False])
|
112 |
|
113 |
# Keep only the highest impression query for each page
|
|
|
245 |
with st.sidebar:
|
246 |
if st.button("Sign in with Google"):
|
247 |
st.write('Please click the link below to sign in:')
|
248 |
+
st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
|
249 |
+
|
250 |
+
def show_property_selector(properties, account):
|
251 |
+
selected_property = st.selectbox(
|
252 |
+
"Select a Search Console Property:",
|
253 |
+
properties,
|
254 |
+
index=properties.index(
|
255 |
+
st.session_state.selected_property) if st.session_state.selected_property in properties else 0,
|
256 |
+
key='selected_property_selector',
|
257 |
+
on_change=property_change
|
258 |
+
)
|
259 |
+
return account[selected_property]
|
260 |
+
|
261 |
+
def show_search_type_selector():
|
262 |
+
return st.selectbox(
|
263 |
+
"Select Search Type:",
|
264 |
+
SEARCH_TYPES,
|
265 |
+
index=SEARCH_TYPES.index(st.session_state.selected_search_type),
|
266 |
+
key='search_type_selector'
|
267 |
+
)
|
268 |
|
269 |
+
def show_model_type_selector():
|
270 |
+
return st.selectbox(
|
271 |
+
"Select the embedding model:",
|
272 |
+
["english", "multilingual"],
|
273 |
+
key='model_type_selector'
|
274 |
+
)
|
275 |
|
276 |
+
def show_date_range_selector():
|
277 |
+
return st.selectbox(
|
278 |
+
"Select Date Range:",
|
279 |
+
DATE_RANGE_OPTIONS,
|
280 |
+
index=DATE_RANGE_OPTIONS.index(st.session_state.selected_date_range),
|
281 |
+
key='date_range_selector'
|
282 |
+
)
|
283 |
|
284 |
+
def show_custom_date_inputs():
|
285 |
+
st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
|
286 |
+
st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
|
287 |
+
|
288 |
+
def show_dimensions_selector(search_type):
|
289 |
+
available_dimensions = update_dimensions(search_type)
|
290 |
+
return st.multiselect(
|
291 |
+
"Select Dimensions:",
|
292 |
+
available_dimensions,
|
293 |
+
default=st.session_state.selected_dimensions,
|
294 |
+
key='dimensions_selector'
|
295 |
+
)
|
296 |
|
297 |
+
def show_paginated_dataframe(report, rows_per_page=20):
|
298 |
+
# Convert 'position' column to integer
|
299 |
+
report['position'] = report['position'].astype(int)
|
300 |
+
|
301 |
+
# Create a clickable URL column
|
302 |
+
def make_clickable(url):
|
303 |
+
return f'<a href="{url}" target="_blank">{url}</a>'
|
304 |
+
|
305 |
+
report['clickable_url'] = report['page'].apply(make_clickable)
|
306 |
+
|
307 |
+
# Reorder columns to put clickable_url first
|
308 |
+
columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
|
309 |
+
report = report[columns]
|
310 |
|
311 |
+
# Add sorting functionality
|
312 |
+
sort_column = st.selectbox("Sort by:", columns[1:]) # Exclude 'clickable_url' from sorting options
|
313 |
+
sort_order = st.radio("Sort order:", ("Descending", "Ascending"))
|
314 |
+
|
315 |
+
ascending = sort_order == "Ascending"
|
316 |
+
report = report.sort_values(by=sort_column, ascending=ascending)
|
317 |
+
|
318 |
+
total_rows = len(report)
|
319 |
+
total_pages = (total_rows - 1) // rows_per_page + 1
|
320 |
+
|
321 |
+
if 'current_page' not in st.session_state:
|
322 |
+
st.session_state.current_page = 1
|
323 |
+
|
324 |
+
col1, col2, col3 = st.columns([1,3,1])
|
325 |
+
with col1:
|
326 |
+
if st.button("Previous", disabled=st.session_state.current_page == 1):
|
327 |
+
st.session_state.current_page -= 1
|
328 |
+
with col2:
|
329 |
+
st.write(f"Page {st.session_state.current_page} of {total_pages}")
|
330 |
+
with col3:
|
331 |
+
if st.button("Next", disabled=st.session_state.current_page == total_pages):
|
332 |
+
st.session_state.current_page += 1
|
333 |
+
|
334 |
+
start_idx = (st.session_state.current_page - 1) * rows_per_page
|
335 |
+
end_idx = start_idx + rows_per_page
|
336 |
+
|
337 |
+
# Use st.markdown to display the dataframe with clickable links
|
338 |
+
st.markdown(report.iloc[start_idx:end_idx].to_html(escape=False, index=False), unsafe_allow_html=True)
|
339 |
+
# -------------
|
340 |
+
# Main Streamlit App Function
|
341 |
+
# -------------
|
342 |
|
343 |
+
def main():
|
344 |
setup_streamlit()
|
|
|
345 |
client_config = load_config()
|
346 |
|
347 |
+
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
348 |
+
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
349 |
+
|
350 |
+
# Directly access query parameters using st.query_params
|
351 |
+
query_params = st.query_params
|
352 |
+
|
353 |
+
# Retrieve the 'code' parameter
|
354 |
+
auth_code = query_params.get("code", None)
|
|
|
|
|
|
|
|
|
|
|
355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
+
if auth_code and 'credentials' not in st.session_state:
|
358 |
+
st.session_state.auth_flow.fetch_token(code=auth_code)
|
359 |
+
st.session_state.credentials = st.session_state.auth_flow.credentials
|
360 |
+
|
361 |
+
if 'credentials' not in st.session_state:
|
362 |
+
show_google_sign_in(st.session_state.auth_url)
|
363 |
+
else:
|
364 |
+
init_session_state()
|
365 |
+
account = auth_search_console(client_config, st.session_state.credentials)
|
366 |
+
properties = list_gsc_properties(st.session_state.credentials)
|
367 |
+
|
368 |
+
if properties:
|
369 |
+
webproperty = show_property_selector(properties, account)
|
370 |
+
search_type = show_search_type_selector()
|
371 |
+
date_range_selection = show_date_range_selector()
|
372 |
+
model_type = show_model_type_selector() # Add this line
|
373 |
+
if date_range_selection == 'Custom Range':
|
374 |
+
show_custom_date_inputs()
|
375 |
+
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
376 |
+
else:
|
377 |
+
start_date, end_date = calc_date_range(date_range_selection)
|
378 |
+
|
379 |
+
selected_dimensions = show_dimensions_selector(search_type)
|
380 |
+
|
381 |
+
if 'report_data' not in st.session_state:
|
382 |
+
st.session_state.report_data = None
|
383 |
+
|
384 |
+
if st.button("Fetch Data"):
|
385 |
+
with st.spinner('Fetching data...'):
|
386 |
+
st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type) # Update this line
|
387 |
+
|
388 |
+
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
389 |
+
show_paginated_dataframe(st.session_state.report_data)
|
390 |
+
download_csv_link(st.session_state.report_data)
|
391 |
+
elif st.session_state.report_data is not None:
|
392 |
+
st.warning("No data found for the selected criteria.")
|
393 |
+
|
394 |
+
|
395 |
if __name__ == "__main__":
|
396 |
+
main()
|