poemsforaphrodite commited on
Commit
09e6287
·
verified ·
1 Parent(s): 4068829

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -63
app.py CHANGED
@@ -1,5 +1,3 @@
1
- import logging
2
-
3
  # Standard library imports
4
  import datetime
5
  import base64
@@ -20,26 +18,20 @@ from bs4 import BeautifulSoup
20
  from apify_client import ApifyClient
21
  import urllib.parse
22
 
23
- # Configure logging
24
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
25
 
26
  load_dotenv()
27
- logging.info("Environment variables loaded")
28
- logger = logging.getLogger(__name__)
29
 
30
  # Initialize Cohere client
31
  APIFY_API_TOKEN = os.environ.get('APIFY_API_TOKEN')
32
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
33
  co = cohere.Client(COHERE_API_KEY)
34
- logging.info("Cohere client initialized")
35
  if not APIFY_API_TOKEN:
36
- logger.error("APIFY_API_TOKEN is not set in the environment variables.")
37
  st.error("APIFY_API_TOKEN is not set in the environment variables. Please set it and restart the application.")
38
 
39
  # Initialize the ApifyClient with the API token
40
  client = ApifyClient(APIFY_API_TOKEN)
41
  # Initialize the ApifyClient with the API token
42
- logger.info("ApifyClient initialized")
43
 
44
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
45
  IS_LOCAL = False
@@ -89,11 +81,23 @@ def init_session_state():
89
  st.session_state.custom_start_date = datetime.date.today() - datetime.timedelta(days=7)
90
  if 'custom_end_date' not in st.session_state:
91
  st.session_state.custom_end_date = datetime.date.today()
92
- logging.info("Session state initialized")
93
 
94
  # -------------
95
  # Data Processing Functions
96
  # -------------
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def get_serp_results(query):
99
  if not APIFY_API_TOKEN:
@@ -116,24 +120,24 @@ def get_serp_results(query):
116
  #logger.debug(f"Calling Apify Actor with input: {run_input}")
117
  # Run the Actor and wait for it to finish
118
  run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
119
- logger.info(f"Apify Actor run completed. Run ID: {run.get('id')}")
120
 
121
  # Fetch results from the run's dataset
122
 
123
  #logger.debug(f"Fetching results from dataset ID: {run.get('defaultDatasetId')}")
124
  results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
125
- logger.info(f"Fetched {len(results)} results from Apify dataset")
126
 
127
  if results and 'organicResults' in results[0]:
128
  urls = [item['url'] for item in results[0]['organicResults']]
129
- logger.info(f"Extracted {len(urls)} URLs from organic results")
130
  return urls
131
  else:
132
- logger.warning("No organic results found in the SERP data.")
133
  st.warning("No organic results found in the SERP data.")
134
  return []
135
  except Exception as e:
136
- logger.exception(f"Error fetching SERP results: {str(e)}")
137
  st.error(f"Error fetching SERP results: {str(e)}")
138
  return []
139
 
@@ -141,7 +145,7 @@ def get_serp_results(query):
141
 
142
 
143
  def fetch_content(url):
144
- logger.info(f"Fetching content from URL: {url}")
145
  try:
146
  # Decode URL-encoded characters
147
  decoded_url = urllib.parse.unquote(url)
@@ -152,29 +156,29 @@ def fetch_content(url):
152
  #logger.debug(f"Fetched {len(content)} characters from {url}")
153
  return content
154
  except requests.RequestException as e:
155
- logger.error(f"Error fetching content from {url}: {e}")
156
  st.warning(f"Error fetching content from {url}: {e}")
157
  return ""
158
 
159
  def calculate_relevance_score(page_content, query, co):
160
- logger.info(f"Calculating relevance score for query: {query}")
161
  try:
162
  if not page_content:
163
- logger.warning("Empty page content. Returning score 0.")
164
  return 0
165
 
166
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
167
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
168
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
169
- logger.debug(f"Relevance score calculated: {score}")
170
  return score
171
  except Exception as e:
172
- logger.exception(f"Error calculating relevance score: {str(e)}")
173
  st.error(f"Error calculating relevance score: {str(e)}")
174
  return 0
175
 
176
  def analyze_competitors(row, co):
177
- logger.info(f"Analyzing competitors for query: {row['query']}")
178
  query = row['query']
179
  our_url = row['page']
180
 
@@ -184,14 +188,14 @@ def analyze_competitors(row, co):
184
 
185
  # Calculate score for our page first
186
  our_content = fetch_content(our_url)
187
- print(out_url)
188
  print(our_content)
189
  if our_content:
190
  our_score = calculate_relevance_score(our_content, query, co)
191
  results.append({'url': our_url, 'relevancy_score': our_score})
192
- logger.info(f"Our URL: {our_url}, Score: {our_score}")
193
  else:
194
- logger.warning(f"No content fetched for our URL: {our_url}")
195
 
196
  # Calculate scores for competitor pages
197
  for url in competitor_urls:
@@ -199,25 +203,25 @@ def analyze_competitors(row, co):
199
  # logger.debug(f"Processing competitor URL: {url}")
200
  content = fetch_content(url)
201
  if not content:
202
- logger.warning(f"No content fetched for competitor URL: {url}")
203
  continue
204
 
205
  score = calculate_relevance_score(content, query, co)
206
 
207
- logger.info(f"Competitor URL: {url}, Score: {score}")
208
  results.append({'url': url, 'relevancy_score': score})
209
  except Exception as e:
210
- logger.error(f"Error processing URL {url}: {str(e)}")
211
  st.error(f"Error processing URL {url}: {str(e)}")
212
 
213
  results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
214
 
215
- logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
216
  return results_df
217
 
218
  def show_competitor_analysis(row, co):
219
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
220
- logger.info(f"Competitor analysis requested for page: {row['page']}")
221
  with st.spinner('Analyzing competitors...'):
222
  results_df = analyze_competitors(row, co)
223
  st.write("Relevancy Score Comparison:")
@@ -226,7 +230,7 @@ def show_competitor_analysis(row, co):
226
  our_data = results_df[results_df['url'] == row['page']]
227
  if our_data.empty:
228
  st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
229
- logger.error(f"Our page '{row['page']}' is missing from the results.")
230
 
231
  # Additional debugging information
232
  # st.write("Debugging Information:")
@@ -241,7 +245,7 @@ def show_competitor_analysis(row, co):
241
  total_results = len(results_df)
242
  our_score = our_data['relevancy_score'].values[0]
243
 
244
- logger.info(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
245
  st.write(f"Our page ('{row['page']}') ranks {our_rank} out of {total_results} in terms of relevancy score.")
246
  st.write(f"Our relevancy score: {our_score:.4f}")
247
 
@@ -280,7 +284,7 @@ def analyze_competitors(row, co):
280
 
281
  return results_df
282
  def process_gsc_data(df):
283
- logging.info("Processing GSC data")
284
  df_sorted = df.sort_values(['impressions'], ascending=[False])
285
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
286
 
@@ -290,7 +294,7 @@ def process_gsc_data(df):
290
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
291
 
292
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
293
- logging.info("GSC data processed successfully")
294
  return result
295
 
296
  # -------------
@@ -298,7 +302,7 @@ def process_gsc_data(df):
298
  # -------------
299
 
300
  def load_config():
301
- logging.info("Loading Google client configuration")
302
  client_config = {
303
  "web": {
304
  "client_id": os.environ["CLIENT_ID"],
@@ -308,25 +312,25 @@ def load_config():
308
  "redirect_uris": ["https://poemsforaphrodite-gscpro.hf.space/"],
309
  }
310
  }
311
- logging.info("Google client configuration loaded")
312
  return client_config
313
 
314
  def init_oauth_flow(client_config):
315
- logging.info("Initializing OAuth flow")
316
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
317
  flow = Flow.from_client_config(
318
  client_config,
319
  scopes=scopes,
320
  redirect_uri=client_config["web"]["redirect_uris"][0]
321
  )
322
- logging.info("OAuth flow initialized")
323
  return flow
324
 
325
  def google_auth(client_config):
326
- logging.info("Starting Google authentication")
327
  flow = init_oauth_flow(client_config)
328
  auth_url, _ = flow.authorization_url(prompt="consent")
329
- logging.info("Google authentication URL generated")
330
  return flow, auth_url
331
 
332
  def auth_search_console(client_config, credentials):
@@ -348,7 +352,7 @@ def auth_search_console(client_config, credentials):
348
  # -------------
349
 
350
  def list_gsc_properties(credentials):
351
- logging.info("Listing GSC properties")
352
  service = build('webmasters', 'v3', credentials=credentials)
353
  site_list = service.sites().list().execute()
354
  properties = [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
@@ -362,16 +366,16 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
362
  query = query.filter('device', 'equals', device_type.lower())
363
  try:
364
  df = query.limit(MAX_ROWS).get().to_dataframe()
365
- logging.info("GSC data fetched successfully")
366
  return process_gsc_data(df)
367
  except Exception as e:
368
- logging.error(f"Error fetching GSC data: {e}")
369
  show_error(e)
370
  return pd.DataFrame()
371
 
372
 
373
  def calculate_relevancy_scores(df, model_type):
374
- logging.info("Calculating relevancy scores")
375
  with st.spinner('Calculating relevancy scores...'):
376
  try:
377
  page_contents = [fetch_content(url) for url in df['page']]
@@ -379,9 +383,9 @@ def calculate_relevancy_scores(df, model_type):
379
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
380
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
381
  df = df.assign(relevancy_score=relevancy_scores)
382
- logging.info("Relevancy scores calculated successfully")
383
  except Exception as e:
384
- logging.error(f"Error calculating relevancy scores: {e}")
385
  st.warning(f"Error calculating relevancy scores: {e}")
386
  df = df.assign(relevancy_score=0)
387
  return df
@@ -417,7 +421,7 @@ def calc_date_range(selection, custom_start=None, custom_end=None):
417
  return date_range
418
 
419
  def show_error(e):
420
- logging.error(f"An error occurred: {e}")
421
  st.error(f"An error occurred: {e}")
422
 
423
  def property_change():
@@ -429,33 +433,33 @@ def property_change():
429
  # -------------
430
 
431
  def show_dataframe(report):
432
- logging.info("Showing dataframe preview")
433
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
434
  st.dataframe(report.head(DF_PREVIEW_ROWS))
435
 
436
  def download_csv_link(report):
437
- logging.info("Generating CSV download link")
438
  def to_csv(df):
439
  return df.to_csv(index=False, encoding='utf-8-sig')
440
  csv = to_csv(report)
441
  b64_csv = base64.b64encode(csv.encode()).decode()
442
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
443
  st.markdown(href, unsafe_allow_html=True)
444
- logging.info("CSV download link generated")
445
 
446
  # -------------
447
  # Streamlit UI Components
448
  # -------------
449
 
450
  def show_google_sign_in(auth_url):
451
- logging.info("Showing Google sign-in button")
452
  with st.sidebar:
453
  if st.button("Sign in with Google"):
454
  st.write('Please click the link below to sign in:')
455
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
456
 
457
  def show_property_selector(properties, account):
458
- logging.info("Showing property selector")
459
  selected_property = st.selectbox(
460
  "Select a Search Console Property:",
461
  properties,
@@ -467,7 +471,7 @@ def show_property_selector(properties, account):
467
  return account[selected_property]
468
 
469
  def show_search_type_selector():
470
- logging.info("Showing search type selector")
471
  return st.selectbox(
472
  "Select Search Type:",
473
  SEARCH_TYPES,
@@ -476,7 +480,7 @@ def show_search_type_selector():
476
  )
477
 
478
  def show_model_type_selector():
479
- logging.info("Showing model type selector")
480
  return st.selectbox(
481
  "Select the embedding model:",
482
  ["english", "multilingual"],
@@ -512,7 +516,7 @@ def show_tabular_data(df, co):
512
 
513
 
514
  def show_date_range_selector():
515
- logging.info("Showing date range selector")
516
  return st.selectbox(
517
  "Select Date Range:",
518
  DATE_RANGE_OPTIONS,
@@ -521,12 +525,12 @@ def show_date_range_selector():
521
  )
522
 
523
  def show_custom_date_inputs():
524
- logging.info("Showing custom date inputs")
525
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
526
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
527
 
528
  def show_dimensions_selector(search_type):
529
- logging.info("Showing dimensions selector")
530
  available_dimensions = update_dimensions(search_type)
531
  return st.multiselect(
532
  "Select Dimensions:",
@@ -536,7 +540,7 @@ def show_dimensions_selector(search_type):
536
  )
537
 
538
  def show_paginated_dataframe(report, rows_per_page=20):
539
- logging.info("Showing paginated dataframe")
540
  report['position'] = report['position'].astype(int)
541
  report['impressions'] = pd.to_numeric(report['impressions'], errors='coerce')
542
 
@@ -608,7 +612,7 @@ def show_paginated_dataframe(report, rows_per_page=20):
608
  # -------------
609
 
610
  def main():
611
- logging.info("Starting main function")
612
  setup_streamlit()
613
  client_config = load_config()
614
 
@@ -653,17 +657,17 @@ def main():
653
  st.write("Data fetched successfully. Click the button below to calculate relevancy scores.")
654
 
655
  if st.button("Calculate Relevancy Scores"):
656
- logger.info("Calculating relevancy scores for all rows")
657
  st.session_state.report_data = calculate_relevancy_scores(st.session_state.report_data, model_type)
658
 
659
  show_tabular_data(st.session_state.report_data, co)
660
 
661
  download_csv_link(st.session_state.report_data)
662
  elif st.session_state.report_data is not None:
663
- logger.warning("No data found for the selected criteria.")
664
  st.warning("No data found for the selected criteria.")
665
 
666
  if __name__ == "__main__":
667
- logging.info("Running main function")
668
  main()
669
- logger.info("Script completed")
 
 
 
1
  # Standard library imports
2
  import datetime
3
  import base64
 
18
  from apify_client import ApifyClient
19
  import urllib.parse
20
 
 
 
21
 
22
  load_dotenv()
23
+
 
24
 
25
  # Initialize Cohere client
26
  APIFY_API_TOKEN = os.environ.get('APIFY_API_TOKEN')
27
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
28
  co = cohere.Client(COHERE_API_KEY)
 
29
  if not APIFY_API_TOKEN:
 
30
  st.error("APIFY_API_TOKEN is not set in the environment variables. Please set it and restart the application.")
31
 
32
  # Initialize the ApifyClient with the API token
33
  client = ApifyClient(APIFY_API_TOKEN)
34
  # Initialize the ApifyClient with the API token
 
35
 
36
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
37
  IS_LOCAL = False
 
81
  st.session_state.custom_start_date = datetime.date.today() - datetime.timedelta(days=7)
82
  if 'custom_end_date' not in st.session_state:
83
  st.session_state.custom_end_date = datetime.date.today()
84
+ #logging.info("Session state initialized")
85
 
86
  # -------------
87
  # Data Processing Functions
88
  # -------------
89
+ def generate_embeddings(text_list, model_type):
90
+ #logging.debug(f"Generating embeddings for model type: {model_type}")
91
+ if not text_list:
92
+ logging.warning("Text list is empty, returning empty embeddings")
93
+ return []
94
+ model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
95
+ input_type = 'search_document'
96
+ response = co.embed(model=model, texts=text_list, input_type=input_type)
97
+ embeddings = response.embeddings
98
+ # logging.debug(f"Embeddings generated successfully for model type: {model_type}")
99
+ return embeddings
100
+
101
 
102
  def get_serp_results(query):
103
  if not APIFY_API_TOKEN:
 
120
  #logger.debug(f"Calling Apify Actor with input: {run_input}")
121
  # Run the Actor and wait for it to finish
122
  run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
123
+ # logger.info(f"Apify Actor run completed. Run ID: {run.get('id')}")
124
 
125
  # Fetch results from the run's dataset
126
 
127
  #logger.debug(f"Fetching results from dataset ID: {run.get('defaultDatasetId')}")
128
  results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
129
+ # logger.info(f"Fetched {len(results)} results from Apify dataset")
130
 
131
  if results and 'organicResults' in results[0]:
132
  urls = [item['url'] for item in results[0]['organicResults']]
133
+ # logger.info(f"Extracted {len(urls)} URLs from organic results")
134
  return urls
135
  else:
136
+ # logger.warning("No organic results found in the SERP data.")
137
  st.warning("No organic results found in the SERP data.")
138
  return []
139
  except Exception as e:
140
+ # logger.exception(f"Error fetching SERP results: {str(e)}")
141
  st.error(f"Error fetching SERP results: {str(e)}")
142
  return []
143
 
 
145
 
146
 
147
  def fetch_content(url):
148
+ # logger.info(f"Fetching content from URL: {url}")
149
  try:
150
  # Decode URL-encoded characters
151
  decoded_url = urllib.parse.unquote(url)
 
156
  #logger.debug(f"Fetched {len(content)} characters from {url}")
157
  return content
158
  except requests.RequestException as e:
159
+ # logger.error(f"Error fetching content from {url}: {e}")
160
  st.warning(f"Error fetching content from {url}: {e}")
161
  return ""
162
 
163
  def calculate_relevance_score(page_content, query, co):
164
+ # logger.info(f"Calculating relevance score for query: {query}")
165
  try:
166
  if not page_content:
167
+ # logger.warning("Empty page content. Returning score 0.")
168
  return 0
169
 
170
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
171
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
172
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
173
+ # logger.debug(f"Relevance score calculated: {score}")
174
  return score
175
  except Exception as e:
176
+ # logger.exception(f"Error calculating relevance score: {str(e)}")
177
  st.error(f"Error calculating relevance score: {str(e)}")
178
  return 0
179
 
180
  def analyze_competitors(row, co):
181
+ # logger.info(f"Analyzing competitors for query: {row['query']}")
182
  query = row['query']
183
  our_url = row['page']
184
 
 
188
 
189
  # Calculate score for our page first
190
  our_content = fetch_content(our_url)
191
+ print(our_url)
192
  print(our_content)
193
  if our_content:
194
  our_score = calculate_relevance_score(our_content, query, co)
195
  results.append({'url': our_url, 'relevancy_score': our_score})
196
+ #logger.info(f"Our URL: {our_url}, Score: {our_score}")
197
  else:
198
+ #logger.warning(f"No content fetched for our URL: {our_url}")
199
 
200
  # Calculate scores for competitor pages
201
  for url in competitor_urls:
 
203
  # logger.debug(f"Processing competitor URL: {url}")
204
  content = fetch_content(url)
205
  if not content:
206
+ # logger.warning(f"No content fetched for competitor URL: {url}")
207
  continue
208
 
209
  score = calculate_relevance_score(content, query, co)
210
 
211
+ # logger.info(f"Competitor URL: {url}, Score: {score}")
212
  results.append({'url': url, 'relevancy_score': score})
213
  except Exception as e:
214
+ # logger.error(f"Error processing URL {url}: {str(e)}")
215
  st.error(f"Error processing URL {url}: {str(e)}")
216
 
217
  results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
218
 
219
+ # logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
220
  return results_df
221
 
222
  def show_competitor_analysis(row, co):
223
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
224
+ # logger.info(f"Competitor analysis requested for page: {row['page']}")
225
  with st.spinner('Analyzing competitors...'):
226
  results_df = analyze_competitors(row, co)
227
  st.write("Relevancy Score Comparison:")
 
230
  our_data = results_df[results_df['url'] == row['page']]
231
  if our_data.empty:
232
  st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
233
+ # logger.error(f"Our page '{row['page']}' is missing from the results.")
234
 
235
  # Additional debugging information
236
  # st.write("Debugging Information:")
 
245
  total_results = len(results_df)
246
  our_score = our_data['relevancy_score'].values[0]
247
 
248
+ # logger.info(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
249
  st.write(f"Our page ('{row['page']}') ranks {our_rank} out of {total_results} in terms of relevancy score.")
250
  st.write(f"Our relevancy score: {our_score:.4f}")
251
 
 
284
 
285
  return results_df
286
  def process_gsc_data(df):
287
+ #logging.info("Processing GSC data")
288
  df_sorted = df.sort_values(['impressions'], ascending=[False])
289
  df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
290
 
 
294
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
295
 
296
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
297
+ #logging.info("GSC data processed successfully")
298
  return result
299
 
300
  # -------------
 
302
  # -------------
303
 
304
  def load_config():
305
+ #logging.info("Loading Google client configuration")
306
  client_config = {
307
  "web": {
308
  "client_id": os.environ["CLIENT_ID"],
 
312
  "redirect_uris": ["https://poemsforaphrodite-gscpro.hf.space/"],
313
  }
314
  }
315
+ #logging.info("Google client configuration loaded")
316
  return client_config
317
 
318
  def init_oauth_flow(client_config):
319
+ #logging.info("Initializing OAuth flow")
320
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
321
  flow = Flow.from_client_config(
322
  client_config,
323
  scopes=scopes,
324
  redirect_uri=client_config["web"]["redirect_uris"][0]
325
  )
326
+ #logging.info("OAuth flow initialized")
327
  return flow
328
 
329
  def google_auth(client_config):
330
+ # logging.info("Starting Google authentication")
331
  flow = init_oauth_flow(client_config)
332
  auth_url, _ = flow.authorization_url(prompt="consent")
333
+ #logging.info("Google authentication URL generated")
334
  return flow, auth_url
335
 
336
  def auth_search_console(client_config, credentials):
 
352
  # -------------
353
 
354
  def list_gsc_properties(credentials):
355
+ # logging.info("Listing GSC properties")
356
  service = build('webmasters', 'v3', credentials=credentials)
357
  site_list = service.sites().list().execute()
358
  properties = [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
 
366
  query = query.filter('device', 'equals', device_type.lower())
367
  try:
368
  df = query.limit(MAX_ROWS).get().to_dataframe()
369
+ #logging.info("GSC data fetched successfully")
370
  return process_gsc_data(df)
371
  except Exception as e:
372
+ #logging.error(f"Error fetching GSC data: {e}")
373
  show_error(e)
374
  return pd.DataFrame()
375
 
376
 
377
  def calculate_relevancy_scores(df, model_type):
378
+ #logging.info("Calculating relevancy scores")
379
  with st.spinner('Calculating relevancy scores...'):
380
  try:
381
  page_contents = [fetch_content(url) for url in df['page']]
 
383
  query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
384
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
385
  df = df.assign(relevancy_score=relevancy_scores)
386
+ #logging.info("Relevancy scores calculated successfully")
387
  except Exception as e:
388
+ #logging.error(f"Error calculating relevancy scores: {e}")
389
  st.warning(f"Error calculating relevancy scores: {e}")
390
  df = df.assign(relevancy_score=0)
391
  return df
 
421
  return date_range
422
 
423
  def show_error(e):
424
+ #logging.error(f"An error occurred: {e}")
425
  st.error(f"An error occurred: {e}")
426
 
427
  def property_change():
 
433
  # -------------
434
 
435
  def show_dataframe(report):
436
+ #logging.info("Showing dataframe preview")
437
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
438
  st.dataframe(report.head(DF_PREVIEW_ROWS))
439
 
440
  def download_csv_link(report):
441
+ #logging.info("Generating CSV download link")
442
  def to_csv(df):
443
  return df.to_csv(index=False, encoding='utf-8-sig')
444
  csv = to_csv(report)
445
  b64_csv = base64.b64encode(csv.encode()).decode()
446
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
447
  st.markdown(href, unsafe_allow_html=True)
448
+ #logging.info("CSV download link generated")
449
 
450
  # -------------
451
  # Streamlit UI Components
452
  # -------------
453
 
454
  def show_google_sign_in(auth_url):
455
+ # logging.info("Showing Google sign-in button")
456
  with st.sidebar:
457
  if st.button("Sign in with Google"):
458
  st.write('Please click the link below to sign in:')
459
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
460
 
461
  def show_property_selector(properties, account):
462
+ # logging.info("Showing property selector")
463
  selected_property = st.selectbox(
464
  "Select a Search Console Property:",
465
  properties,
 
471
  return account[selected_property]
472
 
473
  def show_search_type_selector():
474
+ # logging.info("Showing search type selector")
475
  return st.selectbox(
476
  "Select Search Type:",
477
  SEARCH_TYPES,
 
480
  )
481
 
482
  def show_model_type_selector():
483
+ # logging.info("Showing model type selector")
484
  return st.selectbox(
485
  "Select the embedding model:",
486
  ["english", "multilingual"],
 
516
 
517
 
518
  def show_date_range_selector():
519
+ # logging.info("Showing date range selector")
520
  return st.selectbox(
521
  "Select Date Range:",
522
  DATE_RANGE_OPTIONS,
 
525
  )
526
 
527
  def show_custom_date_inputs():
528
+ # logging.info("Showing custom date inputs")
529
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
530
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
531
 
532
  def show_dimensions_selector(search_type):
533
+ # logging.info("Showing dimensions selector")
534
  available_dimensions = update_dimensions(search_type)
535
  return st.multiselect(
536
  "Select Dimensions:",
 
540
  )
541
 
542
  def show_paginated_dataframe(report, rows_per_page=20):
543
+ # logging.info("Showing paginated dataframe")
544
  report['position'] = report['position'].astype(int)
545
  report['impressions'] = pd.to_numeric(report['impressions'], errors='coerce')
546
 
 
612
  # -------------
613
 
614
  def main():
615
+ # logging.info("Starting main function")
616
  setup_streamlit()
617
  client_config = load_config()
618
 
 
657
  st.write("Data fetched successfully. Click the button below to calculate relevancy scores.")
658
 
659
  if st.button("Calculate Relevancy Scores"):
660
+ # logger.info("Calculating relevancy scores for all rows")
661
  st.session_state.report_data = calculate_relevancy_scores(st.session_state.report_data, model_type)
662
 
663
  show_tabular_data(st.session_state.report_data, co)
664
 
665
  download_csv_link(st.session_state.report_data)
666
  elif st.session_state.report_data is not None:
667
+ # logger.warning("No data found for the selected criteria.")
668
  st.warning("No data found for the selected criteria.")
669
 
670
  if __name__ == "__main__":
671
+ # logging.info("Running main function")
672
  main()
673
+ #logger.info("Script completed")