poemsforaphrodite commited on
Commit
6b02470
1 Parent(s): 11de6fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -19
app.py CHANGED
@@ -252,7 +252,7 @@ def analyze_competitors(row, co, country_code):
252
  competitor_url = normalize_url(data['url'])
253
  score = calculate_relevance_score(data['content'], query, co)
254
  results.append({
255
- 'Position': data['position'],
256
  'URL': competitor_url,
257
  'Score': score,
258
  'is_our_url': competitor_url == our_url
@@ -265,9 +265,11 @@ def analyze_competitors(row, co, country_code):
265
  our_content = fetch_content(our_url, query)
266
  our_score = calculate_relevance_score(our_content, query, co)
267
 
 
268
  if not any(r['is_our_url'] for r in results):
 
269
  results.append({
270
- 'Position': len(results) + 1,
271
  'URL': our_url,
272
  'Score': our_score,
273
  'is_our_url': True
@@ -278,7 +280,6 @@ def analyze_competitors(row, co, country_code):
278
 
279
  # Create DataFrame
280
  results_df = pd.DataFrame(results)
281
- results_df['Position'] = results_df['Position'].astype(int) # Ensure Position is integer
282
 
283
  # Mark our URL
284
  results_df['URL'] = results_df.apply(
@@ -403,7 +404,23 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
403
  try:
404
  df = query.limit(MAX_ROWS).get().to_dataframe()
405
  #logging.info("GSC data fetched successfully")
406
- return process_gsc_data(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  except Exception as e:
408
  #logging.error(f"Error fetching GSC data: {e}")
409
  show_error(e)
@@ -663,9 +680,8 @@ def show_tabular_data(df, co, country_code):
663
  cols[3].write(row.clicks)
664
  cols[4].write(row.impressions)
665
  cols[5].write(f"{row.ctr:.2%}")
666
- cols[6].write(f"{int(row.position)}") # Round Position to integer
667
  cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
668
-
669
  # Competitors column
670
  if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
671
  competitor_state_key = f"comp_state_{i}"
@@ -684,15 +700,11 @@ def show_tabular_data(df, co, country_code):
684
  with st.spinner('Analyzing competitors...'):
685
  results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
686
 
687
- # Sort the results by Position in ascending order
688
- results_df = results_df.sort_values('Position', ascending=True).reset_index(drop=True)
689
-
690
- # Update the Position for our URL
691
- our_url_mask = results_df['URL'].str.contains('Our URL')
692
- results_df.loc[our_url_mask, 'Position'] = row.position # Already integer
693
 
694
- # Ensure our URL's score matches the main table
695
- results_df.loc[our_url_mask, 'Score'] = row.relevancy_score
696
 
697
  # Create a custom style function to highlight only our URL's row
698
  def highlight_our_url(row):
@@ -703,8 +715,8 @@ def show_tabular_data(df, co, country_code):
703
  # Apply the custom style and hide the index
704
  styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
705
 
706
- # Display the styled DataFrame with Position as integer
707
- st.markdown(styled_df.to_html(), unsafe_allow_html=True)
708
 
709
  # Extract our result for additional insights
710
  our_result = results_df[results_df['URL'].str.contains('Our URL')]
@@ -714,8 +726,8 @@ def show_tabular_data(df, co, country_code):
714
  total_results = len(results_df)
715
  our_score = our_result['Score'].values[0]
716
 
717
- st.write(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
718
- st.write(f"Our relevancy score: {our_score:.4f}")
719
 
720
  if our_rank == 1:
721
  st.success("Your page has the highest relevancy score!")
@@ -725,7 +737,7 @@ def show_tabular_data(df, co, country_code):
725
  st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
726
  else:
727
  st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
728
-
729
  if compare_state_key not in st.session_state:
730
  st.session_state[compare_state_key] = False
731
 
 
252
  competitor_url = normalize_url(data['url'])
253
  score = calculate_relevance_score(data['content'], query, co)
254
  results.append({
255
+ 'Position': int(data['position']), # Ensure position is an integer
256
  'URL': competitor_url,
257
  'Score': score,
258
  'is_our_url': competitor_url == our_url
 
265
  our_content = fetch_content(our_url, query)
266
  our_score = calculate_relevance_score(our_content, query, co)
267
 
268
+ # Add our URL if it's not in the results
269
  if not any(r['is_our_url'] for r in results):
270
+ our_position = int(row['position']) # Ensure our position is an integer
271
  results.append({
272
+ 'Position': our_position,
273
  'URL': our_url,
274
  'Score': our_score,
275
  'is_our_url': True
 
280
 
281
  # Create DataFrame
282
  results_df = pd.DataFrame(results)
 
283
 
284
  # Mark our URL
285
  results_df['URL'] = results_df.apply(
 
404
  try:
405
  df = query.limit(MAX_ROWS).get().to_dataframe()
406
  #logging.info("GSC data fetched successfully")
407
+
408
+ # **Ensure 'position' is integer in the processed data**
409
+ df_sorted = df.sort_values(['impressions'], ascending=[False])
410
+ df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
411
+
412
+ if 'relevancy_score' not in df_unique.columns:
413
+ df_unique['relevancy_score'] = 0
414
+ else:
415
+ df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
416
+
417
+ result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
418
+
419
+ # **Cast 'position' to integer**
420
+ result['position'] = result['position'].astype(int)
421
+
422
+ #logging.info("GSC data processed successfully")
423
+ return result
424
  except Exception as e:
425
  #logging.error(f"Error fetching GSC data: {e}")
426
  show_error(e)
 
680
  cols[3].write(row.clicks)
681
  cols[4].write(row.impressions)
682
  cols[5].write(f"{row.ctr:.2%}")
683
+ cols[6].write(f"{int(row.position)}") # Ensure Position is integer
684
  cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
 
685
  # Competitors column
686
  if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
687
  competitor_state_key = f"comp_state_{i}"
 
700
  with st.spinner('Analyzing competitors...'):
701
  results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
702
 
703
+ # Ensure Position is displayed as integer
704
+ results_df['Position'] = results_df['Position'].astype(int)
 
 
 
 
705
 
706
+ # Format Score to 6 decimal places
707
+ results_df['Score'] = results_df['Score'].apply(lambda x: f"{x:.6f}")
708
 
709
  # Create a custom style function to highlight only our URL's row
710
  def highlight_our_url(row):
 
715
  # Apply the custom style and hide the index
716
  styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
717
 
718
+ # Display the styled DataFrame
719
+ st.markdown(styled_df.to_html(escape=False, index=False), unsafe_allow_html=True)
720
 
721
  # Extract our result for additional insights
722
  our_result = results_df[results_df['URL'].str.contains('Our URL')]
 
726
  total_results = len(results_df)
727
  our_score = our_result['Score'].values[0]
728
 
729
+ st.write(f"Our page ranks **{our_rank}** out of **{total_results}** in terms of relevancy score.")
730
+ st.write(f"Our relevancy score: **{our_score:.4f}**")
731
 
732
  if our_rank == 1:
733
  st.success("Your page has the highest relevancy score!")
 
737
  st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
738
  else:
739
  st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
740
+
741
  if compare_state_key not in st.session_state:
742
  st.session_state[compare_state_key] = False
743