Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
a1f9248
1
Parent(s):
ee5283f
Update app.py
Browse files
app.py
CHANGED
@@ -47,9 +47,8 @@ DF_PREVIEW_ROWS = 100
|
|
47 |
# -------------
|
48 |
|
49 |
def setup_streamlit():
|
50 |
-
st.set_page_config(page_title="
|
51 |
-
st.title("
|
52 |
-
#st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
|
53 |
st.divider()
|
54 |
|
55 |
def init_session_state():
|
@@ -108,11 +107,8 @@ def calculate_relevancy_scores(df, model_type):
|
|
108 |
return df
|
109 |
|
110 |
def process_gsc_data(df):
|
111 |
-
# Filter for queries below position 10
|
112 |
-
df_filtered = df[df['position'] > 10].copy()
|
113 |
-
|
114 |
# Sort by impressions in descending order
|
115 |
-
df_sorted =
|
116 |
|
117 |
# Keep only the highest impression query for each page
|
118 |
df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
|
@@ -249,145 +245,74 @@ def show_google_sign_in(auth_url):
|
|
249 |
with st.sidebar:
|
250 |
if st.button("Sign in with Google"):
|
251 |
st.write('Please click the link below to sign in:')
|
252 |
-
st.markdown(f'
|
253 |
-
|
254 |
-
def show_property_selector(properties, account):
|
255 |
-
selected_property = st.selectbox(
|
256 |
-
"Select a Search Console Property:",
|
257 |
-
properties,
|
258 |
-
index=properties.index(
|
259 |
-
st.session_state.selected_property) if st.session_state.selected_property in properties else 0,
|
260 |
-
key='selected_property_selector',
|
261 |
-
on_change=property_change
|
262 |
-
)
|
263 |
-
return account[selected_property]
|
264 |
-
|
265 |
-
def show_search_type_selector():
|
266 |
-
return st.selectbox(
|
267 |
-
"Select Search Type:",
|
268 |
-
SEARCH_TYPES,
|
269 |
-
index=SEARCH_TYPES.index(st.session_state.selected_search_type),
|
270 |
-
key='search_type_selector'
|
271 |
-
)
|
272 |
|
273 |
-
def
|
274 |
-
|
275 |
-
"Select the embedding model:",
|
276 |
-
["english", "multilingual"],
|
277 |
-
key='model_type_selector'
|
278 |
-
)
|
279 |
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
index=DATE_RANGE_OPTIONS.index(st.session_state.selected_date_range),
|
285 |
-
key='date_range_selector'
|
286 |
-
)
|
287 |
|
288 |
-
|
289 |
-
st.
|
290 |
-
|
291 |
-
|
292 |
-
def show_dimensions_selector(search_type):
|
293 |
-
available_dimensions = update_dimensions(search_type)
|
294 |
-
return st.multiselect(
|
295 |
-
"Select Dimensions:",
|
296 |
-
available_dimensions,
|
297 |
-
default=st.session_state.selected_dimensions,
|
298 |
-
key='dimensions_selector'
|
299 |
-
)
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
#
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
if 'current_page' not in st.session_state:
|
319 |
-
st.session_state.current_page = 1
|
320 |
-
|
321 |
-
col1, col2, col3 = st.columns([1,3,1])
|
322 |
-
with col1:
|
323 |
-
if st.button("Previous", disabled=st.session_state.current_page == 1):
|
324 |
-
st.session_state.current_page -= 1
|
325 |
-
with col2:
|
326 |
-
st.write(f"Page {st.session_state.current_page} of {total_pages}")
|
327 |
-
with col3:
|
328 |
-
if st.button("Next", disabled=st.session_state.current_page == total_pages):
|
329 |
-
st.session_state.current_page += 1
|
330 |
-
|
331 |
-
start_idx = (st.session_state.current_page - 1) * rows_per_page
|
332 |
-
end_idx = start_idx + rows_per_page
|
333 |
-
|
334 |
-
# Use st.markdown to display the dataframe with clickable links
|
335 |
-
st.markdown(report.iloc[start_idx:end_idx].to_html(escape=False, index=False), unsafe_allow_html=True)
|
336 |
-
# -------------
|
337 |
-
# Main Streamlit App Function
|
338 |
-
# -------------
|
339 |
|
340 |
-
def
|
341 |
setup_streamlit()
|
|
|
342 |
client_config = load_config()
|
343 |
|
344 |
-
if
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
354 |
-
if auth_code and 'credentials' not in st.session_state:
|
355 |
-
st.session_state.auth_flow.fetch_token(code=auth_code)
|
356 |
-
st.session_state.credentials = st.session_state.auth_flow.credentials
|
357 |
-
|
358 |
-
if 'credentials' not in st.session_state:
|
359 |
-
show_google_sign_in(st.session_state.auth_url)
|
360 |
-
else:
|
361 |
-
init_session_state()
|
362 |
-
account = auth_search_console(client_config, st.session_state.credentials)
|
363 |
-
properties = list_gsc_properties(st.session_state.credentials)
|
364 |
-
|
365 |
-
if properties:
|
366 |
-
webproperty = show_property_selector(properties, account)
|
367 |
-
search_type = show_search_type_selector()
|
368 |
-
date_range_selection = show_date_range_selector()
|
369 |
-
model_type = show_model_type_selector() # Add this line
|
370 |
-
if date_range_selection == 'Custom Range':
|
371 |
-
show_custom_date_inputs()
|
372 |
-
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
373 |
-
else:
|
374 |
-
start_date, end_date = calc_date_range(date_range_selection)
|
375 |
-
|
376 |
-
selected_dimensions = show_dimensions_selector(search_type)
|
377 |
-
|
378 |
-
if 'report_data' not in st.session_state:
|
379 |
-
st.session_state.report_data = None
|
380 |
-
|
381 |
-
if st.button("Fetch Data"):
|
382 |
-
with st.spinner('Fetching data...'):
|
383 |
-
st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type) # Update this line
|
384 |
-
|
385 |
-
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
386 |
-
show_paginated_dataframe(st.session_state.report_data)
|
387 |
-
download_csv_link(st.session_state.report_data)
|
388 |
-
elif st.session_state.report_data is not None:
|
389 |
-
st.warning("No data found for the selected criteria.")
|
390 |
-
|
391 |
-
|
392 |
if __name__ == "__main__":
|
393 |
-
|
|
|
47 |
# -------------
|
48 |
|
49 |
def setup_streamlit():
|
50 |
+
st.set_page_config(page_title="Keyword Relevance Test Using Vector Embedding", layout="wide")
|
51 |
+
st.title("Keyword Relevance Test Using Vector Embedding")
|
|
|
52 |
st.divider()
|
53 |
|
54 |
def init_session_state():
|
|
|
107 |
return df
|
108 |
|
109 |
def process_gsc_data(df):
|
|
|
|
|
|
|
110 |
# Sort by impressions in descending order
|
111 |
+
df_sorted = df.sort_values(['impressions'], ascending=[False])
|
112 |
|
113 |
# Keep only the highest impression query for each page
|
114 |
df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
|
|
|
245 |
with st.sidebar:
|
246 |
if st.button("Sign in with Google"):
|
247 |
st.write('Please click the link below to sign in:')
|
248 |
+
st.markdown(f'<a href="{auth_url}" target="_blank">Sign in with Google</a>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
+
def show_config_options():
|
251 |
+
st.sidebar.header("Configuration Options")
|
|
|
|
|
|
|
|
|
252 |
|
253 |
+
# Site Property Selection
|
254 |
+
st.sidebar.subheader("1. Select Site Property")
|
255 |
+
site_properties = st.session_state.site_properties
|
256 |
+
selected_property = st.sidebar.selectbox("Select Site Property", options=site_properties, index=0, key='selected_property_selector', on_change=property_change)
|
|
|
|
|
|
|
257 |
|
258 |
+
# Search Type Selection
|
259 |
+
st.sidebar.subheader("2. Select Search Type")
|
260 |
+
selected_search_type = st.sidebar.selectbox("Search Type", options=SEARCH_TYPES, index=0, key='selected_search_type')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
+
# Date Range Selection
|
263 |
+
st.sidebar.subheader("3. Select Date Range")
|
264 |
+
selected_date_range = st.sidebar.selectbox("Date Range", options=DATE_RANGE_OPTIONS, index=0, key='selected_date_range')
|
265 |
+
|
266 |
+
# Custom Date Range Selection
|
267 |
+
if selected_date_range == 'Custom Range':
|
268 |
+
st.sidebar.date_input("Start Date", key='custom_start_date')
|
269 |
+
st.sidebar.date_input("End Date", key='custom_end_date')
|
270 |
+
|
271 |
+
# Device Type Selection
|
272 |
+
st.sidebar.subheader("4. Select Device Type")
|
273 |
+
selected_device = st.sidebar.selectbox("Device Type", options=DEVICE_OPTIONS, index=0, key='selected_device')
|
274 |
+
|
275 |
+
# Dimension Selection
|
276 |
+
st.sidebar.subheader("5. Select Dimensions")
|
277 |
+
selected_dimensions = update_dimensions(selected_search_type)
|
278 |
+
st.sidebar.multiselect("Dimensions", options=selected_dimensions, default=selected_dimensions[:2], key='selected_dimensions')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
+
def run():
|
281 |
setup_streamlit()
|
282 |
+
init_session_state()
|
283 |
client_config = load_config()
|
284 |
|
285 |
+
if IS_LOCAL:
|
286 |
+
credentials = auth_search_console(client_config, None)
|
287 |
+
st.session_state.site_properties = list_gsc_properties(credentials)
|
288 |
+
show_config_options()
|
289 |
+
else:
|
290 |
+
flow, auth_url = google_auth(client_config)
|
291 |
+
show_google_sign_in(auth_url)
|
292 |
+
code = st.experimental_get_query_params().get("code")
|
293 |
+
if code:
|
294 |
+
flow.fetch_token(authorization_response=st.experimental_get_query_params()["code"])
|
295 |
+
credentials = flow.credentials
|
296 |
+
st.session_state.site_properties = list_gsc_properties(credentials)
|
297 |
+
show_config_options()
|
298 |
|
299 |
+
if st.session_state.selected_property:
|
300 |
+
if st.sidebar.button("Fetch Data"):
|
301 |
+
start_date, end_date = calc_date_range(
|
302 |
+
st.session_state.selected_date_range,
|
303 |
+
st.session_state.custom_start_date,
|
304 |
+
st.session_state.custom_end_date
|
305 |
+
)
|
306 |
+
report = fetch_data_loading(
|
307 |
+
st.session_state.selected_property,
|
308 |
+
st.session_state.selected_search_type,
|
309 |
+
start_date,
|
310 |
+
end_date,
|
311 |
+
st.session_state.selected_dimensions,
|
312 |
+
st.session_state.selected_device
|
313 |
+
)
|
314 |
+
show_dataframe(report)
|
315 |
+
download_csv_link(report)
|
316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
if __name__ == "__main__":
|
318 |
+
run()
|