poemsforaphrodite commited on
Commit
a1f9248
1 Parent(s): ee5283f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -138
app.py CHANGED
@@ -47,9 +47,8 @@ DF_PREVIEW_ROWS = 100
47
  # -------------
48
 
49
  def setup_streamlit():
50
- st.set_page_config(page_title="GSC", layout="wide")
51
- st.title("GSC")
52
- #st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
53
  st.divider()
54
 
55
  def init_session_state():
@@ -108,11 +107,8 @@ def calculate_relevancy_scores(df, model_type):
108
  return df
109
 
110
  def process_gsc_data(df):
111
- # Filter for queries below position 10
112
- df_filtered = df[df['position'] > 10].copy()
113
-
114
  # Sort by impressions in descending order
115
- df_sorted = df_filtered.sort_values(['impressions'], ascending=[False])
116
 
117
  # Keep only the highest impression query for each page
118
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
@@ -249,145 +245,74 @@ def show_google_sign_in(auth_url):
249
  with st.sidebar:
250
  if st.button("Sign in with Google"):
251
  st.write('Please click the link below to sign in:')
252
- st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
253
-
254
- def show_property_selector(properties, account):
255
- selected_property = st.selectbox(
256
- "Select a Search Console Property:",
257
- properties,
258
- index=properties.index(
259
- st.session_state.selected_property) if st.session_state.selected_property in properties else 0,
260
- key='selected_property_selector',
261
- on_change=property_change
262
- )
263
- return account[selected_property]
264
-
265
- def show_search_type_selector():
266
- return st.selectbox(
267
- "Select Search Type:",
268
- SEARCH_TYPES,
269
- index=SEARCH_TYPES.index(st.session_state.selected_search_type),
270
- key='search_type_selector'
271
- )
272
 
273
- def show_model_type_selector():
274
- return st.selectbox(
275
- "Select the embedding model:",
276
- ["english", "multilingual"],
277
- key='model_type_selector'
278
- )
279
 
280
- def show_date_range_selector():
281
- return st.selectbox(
282
- "Select Date Range:",
283
- DATE_RANGE_OPTIONS,
284
- index=DATE_RANGE_OPTIONS.index(st.session_state.selected_date_range),
285
- key='date_range_selector'
286
- )
287
 
288
- def show_custom_date_inputs():
289
- st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
290
- st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
291
-
292
- def show_dimensions_selector(search_type):
293
- available_dimensions = update_dimensions(search_type)
294
- return st.multiselect(
295
- "Select Dimensions:",
296
- available_dimensions,
297
- default=st.session_state.selected_dimensions,
298
- key='dimensions_selector'
299
- )
300
 
301
- def show_paginated_dataframe(report, rows_per_page=20):
302
- # Convert 'position' column to integer
303
- report['position'] = report['position'].astype(int)
304
-
305
- # Create a clickable URL column
306
- def make_clickable(url):
307
- return f'<a href="{url}" target="_blank">{url}</a>'
308
-
309
- report['clickable_url'] = report['page'].apply(make_clickable)
310
-
311
- # Reorder columns to put clickable_url first and sort by impressions
312
- columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
313
- report = report[columns].sort_values('impressions', ascending=False)
314
-
315
- total_rows = len(report)
316
- total_pages = (total_rows - 1) // rows_per_page + 1
317
-
318
- if 'current_page' not in st.session_state:
319
- st.session_state.current_page = 1
320
-
321
- col1, col2, col3 = st.columns([1,3,1])
322
- with col1:
323
- if st.button("Previous", disabled=st.session_state.current_page == 1):
324
- st.session_state.current_page -= 1
325
- with col2:
326
- st.write(f"Page {st.session_state.current_page} of {total_pages}")
327
- with col3:
328
- if st.button("Next", disabled=st.session_state.current_page == total_pages):
329
- st.session_state.current_page += 1
330
-
331
- start_idx = (st.session_state.current_page - 1) * rows_per_page
332
- end_idx = start_idx + rows_per_page
333
-
334
- # Use st.markdown to display the dataframe with clickable links
335
- st.markdown(report.iloc[start_idx:end_idx].to_html(escape=False, index=False), unsafe_allow_html=True)
336
- # -------------
337
- # Main Streamlit App Function
338
- # -------------
339
 
340
- def main():
341
  setup_streamlit()
 
342
  client_config = load_config()
343
 
344
- if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
345
- st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
346
-
347
- # Directly access query parameters using st.query_params
348
- query_params = st.query_params
349
-
350
- # Retrieve the 'code' parameter
351
- auth_code = query_params.get("code", None)
 
 
 
 
 
352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
- if auth_code and 'credentials' not in st.session_state:
355
- st.session_state.auth_flow.fetch_token(code=auth_code)
356
- st.session_state.credentials = st.session_state.auth_flow.credentials
357
-
358
- if 'credentials' not in st.session_state:
359
- show_google_sign_in(st.session_state.auth_url)
360
- else:
361
- init_session_state()
362
- account = auth_search_console(client_config, st.session_state.credentials)
363
- properties = list_gsc_properties(st.session_state.credentials)
364
-
365
- if properties:
366
- webproperty = show_property_selector(properties, account)
367
- search_type = show_search_type_selector()
368
- date_range_selection = show_date_range_selector()
369
- model_type = show_model_type_selector() # Add this line
370
- if date_range_selection == 'Custom Range':
371
- show_custom_date_inputs()
372
- start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
373
- else:
374
- start_date, end_date = calc_date_range(date_range_selection)
375
-
376
- selected_dimensions = show_dimensions_selector(search_type)
377
-
378
- if 'report_data' not in st.session_state:
379
- st.session_state.report_data = None
380
-
381
- if st.button("Fetch Data"):
382
- with st.spinner('Fetching data...'):
383
- st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type) # Update this line
384
-
385
- if st.session_state.report_data is not None and not st.session_state.report_data.empty:
386
- show_paginated_dataframe(st.session_state.report_data)
387
- download_csv_link(st.session_state.report_data)
388
- elif st.session_state.report_data is not None:
389
- st.warning("No data found for the selected criteria.")
390
-
391
-
392
  if __name__ == "__main__":
393
- main()
 
47
  # -------------
48
 
49
  def setup_streamlit():
50
+ st.set_page_config(page_title="Keyword Relevance Test Using Vector Embedding", layout="wide")
51
+ st.title("Keyword Relevance Test Using Vector Embedding")
 
52
  st.divider()
53
 
54
  def init_session_state():
 
107
  return df
108
 
109
  def process_gsc_data(df):
 
 
 
110
  # Sort by impressions in descending order
111
+ df_sorted = df.sort_values(['impressions'], ascending=[False])
112
 
113
  # Keep only the highest impression query for each page
114
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
 
245
  with st.sidebar:
246
  if st.button("Sign in with Google"):
247
  st.write('Please click the link below to sign in:')
248
+ st.markdown(f'<a href="{auth_url}" target="_blank">Sign in with Google</a>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ def show_config_options():
251
+ st.sidebar.header("Configuration Options")
 
 
 
 
252
 
253
+ # Site Property Selection
254
+ st.sidebar.subheader("1. Select Site Property")
255
+ site_properties = st.session_state.site_properties
256
+ selected_property = st.sidebar.selectbox("Select Site Property", options=site_properties, index=0, key='selected_property_selector', on_change=property_change)
 
 
 
257
 
258
+ # Search Type Selection
259
+ st.sidebar.subheader("2. Select Search Type")
260
+ selected_search_type = st.sidebar.selectbox("Search Type", options=SEARCH_TYPES, index=0, key='selected_search_type')
 
 
 
 
 
 
 
 
 
261
 
262
+ # Date Range Selection
263
+ st.sidebar.subheader("3. Select Date Range")
264
+ selected_date_range = st.sidebar.selectbox("Date Range", options=DATE_RANGE_OPTIONS, index=0, key='selected_date_range')
265
+
266
+ # Custom Date Range Selection
267
+ if selected_date_range == 'Custom Range':
268
+ st.sidebar.date_input("Start Date", key='custom_start_date')
269
+ st.sidebar.date_input("End Date", key='custom_end_date')
270
+
271
+ # Device Type Selection
272
+ st.sidebar.subheader("4. Select Device Type")
273
+ selected_device = st.sidebar.selectbox("Device Type", options=DEVICE_OPTIONS, index=0, key='selected_device')
274
+
275
+ # Dimension Selection
276
+ st.sidebar.subheader("5. Select Dimensions")
277
+ selected_dimensions = update_dimensions(selected_search_type)
278
+ st.sidebar.multiselect("Dimensions", options=selected_dimensions, default=selected_dimensions[:2], key='selected_dimensions')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ def run():
281
  setup_streamlit()
282
+ init_session_state()
283
  client_config = load_config()
284
 
285
+ if IS_LOCAL:
286
+ credentials = auth_search_console(client_config, None)
287
+ st.session_state.site_properties = list_gsc_properties(credentials)
288
+ show_config_options()
289
+ else:
290
+ flow, auth_url = google_auth(client_config)
291
+ show_google_sign_in(auth_url)
292
+ code = st.experimental_get_query_params().get("code")
293
+ if code:
294
+ flow.fetch_token(authorization_response=st.experimental_get_query_params()["code"])
295
+ credentials = flow.credentials
296
+ st.session_state.site_properties = list_gsc_properties(credentials)
297
+ show_config_options()
298
 
299
+ if st.session_state.selected_property:
300
+ if st.sidebar.button("Fetch Data"):
301
+ start_date, end_date = calc_date_range(
302
+ st.session_state.selected_date_range,
303
+ st.session_state.custom_start_date,
304
+ st.session_state.custom_end_date
305
+ )
306
+ report = fetch_data_loading(
307
+ st.session_state.selected_property,
308
+ st.session_state.selected_search_type,
309
+ start_date,
310
+ end_date,
311
+ st.session_state.selected_dimensions,
312
+ st.session_state.selected_device
313
+ )
314
+ show_dataframe(report)
315
+ download_csv_link(report)
316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  if __name__ == "__main__":
318
+ run()