Spaces:

poemsforaphrodite
/

gscpro

Sleeping

App Files Files Community

poemsforaphrodite commited on Oct 14, 2024

Commit

6b02470

verified ·

1 Parent(s): 11de6fa

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -19

app.py CHANGED Viewed

@@ -252,7 +252,7 @@ def analyze_competitors(row, co, country_code):
         competitor_url = normalize_url(data['url'])
         score = calculate_relevance_score(data['content'], query, co)
         results.append({
-            'Position': data['position'],
             'URL': competitor_url,
             'Score': score,
             'is_our_url': competitor_url == our_url
@@ -265,9 +265,11 @@ def analyze_competitors(row, co, country_code):
         our_content = fetch_content(our_url, query)
         our_score = calculate_relevance_score(our_content, query, co)
     if not any(r['is_our_url'] for r in results):
         results.append({
-            'Position': len(results) + 1,
             'URL': our_url,
             'Score': our_score,
             'is_our_url': True
@@ -278,7 +280,6 @@ def analyze_competitors(row, co, country_code):
     # Create DataFrame
     results_df = pd.DataFrame(results)
-    results_df['Position'] = results_df['Position'].astype(int)  # Ensure Position is integer
     # Mark our URL
     results_df['URL'] = results_df.apply(
@@ -403,7 +404,23 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
     try:
         df = query.limit(MAX_ROWS).get().to_dataframe()
         #logging.info("GSC data fetched successfully")
-        return process_gsc_data(df)
     except Exception as e:
         #logging.error(f"Error fetching GSC data: {e}")
         show_error(e)
@@ -663,9 +680,8 @@ def show_tabular_data(df, co, country_code):
         cols[3].write(row.clicks)
         cols[4].write(row.impressions)
         cols[5].write(f"{row.ctr:.2%}")
-        cols[6].write(f"{int(row.position)}")  # Round Position to integer
         cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
         # Competitors column
         if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
             competitor_state_key = f"comp_state_{i}"
@@ -684,15 +700,11 @@ def show_tabular_data(df, co, country_code):
                 with st.spinner('Analyzing competitors...'):
                     results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
-                    # Sort the results by Position in ascending order
-                    results_df = results_df.sort_values('Position', ascending=True).reset_index(drop=True)
-                    # Update the Position for our URL
-                    our_url_mask = results_df['URL'].str.contains('Our URL')
-                    results_df.loc[our_url_mask, 'Position'] = row.position  # Already integer
-                    # Ensure our URL's score matches the main table
-                    results_df.loc[our_url_mask, 'Score'] = row.relevancy_score
                     # Create a custom style function to highlight only our URL's row
                     def highlight_our_url(row):
@@ -703,8 +715,8 @@ def show_tabular_data(df, co, country_code):
                     # Apply the custom style and hide the index
                     styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
-                    # Display the styled DataFrame with Position as integer
-                    st.markdown(styled_df.to_html(), unsafe_allow_html=True)
                     # Extract our result for additional insights
                     our_result = results_df[results_df['URL'].str.contains('Our URL')]
@@ -714,8 +726,8 @@ def show_tabular_data(df, co, country_code):
                         total_results = len(results_df)
                         our_score = our_result['Score'].values[0]
-                        st.write(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
-                        st.write(f"Our relevancy score: {our_score:.4f}")
                         if our_rank == 1:
                             st.success("Your page has the highest relevancy score!")
@@ -725,7 +737,7 @@ def show_tabular_data(df, co, country_code):
                             st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
                     else:
                         st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
                     if compare_state_key not in st.session_state:
                         st.session_state[compare_state_key] = False

         competitor_url = normalize_url(data['url'])
         score = calculate_relevance_score(data['content'], query, co)
         results.append({
+            'Position': int(data['position']),  # Ensure position is an integer
             'URL': competitor_url,
             'Score': score,
             'is_our_url': competitor_url == our_url
         our_content = fetch_content(our_url, query)
         our_score = calculate_relevance_score(our_content, query, co)
+    # Add our URL if it's not in the results
     if not any(r['is_our_url'] for r in results):
+        our_position = int(row['position'])  # Ensure our position is an integer
         results.append({
+            'Position': our_position,
             'URL': our_url,
             'Score': our_score,
             'is_our_url': True
     # Create DataFrame
     results_df = pd.DataFrame(results)
     # Mark our URL
     results_df['URL'] = results_df.apply(
     try:
         df = query.limit(MAX_ROWS).get().to_dataframe()
         #logging.info("GSC data fetched successfully")
+        # **Ensure 'position' is integer in the processed data**
+        df_sorted = df.sort_values(['impressions'], ascending=[False])
+        df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
+        if 'relevancy_score' not in df_unique.columns:
+            df_unique['relevancy_score'] = 0
+        else:
+            df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
+        result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
+        # **Cast 'position' to integer**
+        result['position'] = result['position'].astype(int)
+        #logging.info("GSC data processed successfully")
+        return result
     except Exception as e:
         #logging.error(f"Error fetching GSC data: {e}")
         show_error(e)
         cols[3].write(row.clicks)
         cols[4].write(row.impressions)
         cols[5].write(f"{row.ctr:.2%}")
+        cols[6].write(f"{int(row.position)}")  # Ensure Position is integer
         cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
         # Competitors column
         if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
             competitor_state_key = f"comp_state_{i}"
                 with st.spinner('Analyzing competitors...'):
                     results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
+                    # Ensure Position is displayed as integer
+                    results_df['Position'] = results_df['Position'].astype(int)
+                    # Format Score to 6 decimal places
+                    results_df['Score'] = results_df['Score'].apply(lambda x: f"{x:.6f}")
                     # Create a custom style function to highlight only our URL's row
                     def highlight_our_url(row):
                     # Apply the custom style and hide the index
                     styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
+                    # Display the styled DataFrame
+                    st.markdown(styled_df.to_html(escape=False, index=False), unsafe_allow_html=True)
                     # Extract our result for additional insights
                     our_result = results_df[results_df['URL'].str.contains('Our URL')]
                         total_results = len(results_df)
                         our_score = our_result['Score'].values[0]
+                        st.write(f"Our page ranks **{our_rank}** out of **{total_results}** in terms of relevancy score.")
+                        st.write(f"Our relevancy score: **{our_score:.4f}**")
                         if our_rank == 1:
                             st.success("Your page has the highest relevancy score!")
                             st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
                     else:
                         st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
                     if compare_state_key not in st.session_state:
                         st.session_state[compare_state_key] = False