Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
6b02470
1
Parent(s):
11de6fa
Update app.py
Browse files
app.py
CHANGED
@@ -252,7 +252,7 @@ def analyze_competitors(row, co, country_code):
|
|
252 |
competitor_url = normalize_url(data['url'])
|
253 |
score = calculate_relevance_score(data['content'], query, co)
|
254 |
results.append({
|
255 |
-
'Position': data['position'],
|
256 |
'URL': competitor_url,
|
257 |
'Score': score,
|
258 |
'is_our_url': competitor_url == our_url
|
@@ -265,9 +265,11 @@ def analyze_competitors(row, co, country_code):
|
|
265 |
our_content = fetch_content(our_url, query)
|
266 |
our_score = calculate_relevance_score(our_content, query, co)
|
267 |
|
|
|
268 |
if not any(r['is_our_url'] for r in results):
|
|
|
269 |
results.append({
|
270 |
-
'Position':
|
271 |
'URL': our_url,
|
272 |
'Score': our_score,
|
273 |
'is_our_url': True
|
@@ -278,7 +280,6 @@ def analyze_competitors(row, co, country_code):
|
|
278 |
|
279 |
# Create DataFrame
|
280 |
results_df = pd.DataFrame(results)
|
281 |
-
results_df['Position'] = results_df['Position'].astype(int) # Ensure Position is integer
|
282 |
|
283 |
# Mark our URL
|
284 |
results_df['URL'] = results_df.apply(
|
@@ -403,7 +404,23 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
|
|
403 |
try:
|
404 |
df = query.limit(MAX_ROWS).get().to_dataframe()
|
405 |
#logging.info("GSC data fetched successfully")
|
406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
except Exception as e:
|
408 |
#logging.error(f"Error fetching GSC data: {e}")
|
409 |
show_error(e)
|
@@ -663,9 +680,8 @@ def show_tabular_data(df, co, country_code):
|
|
663 |
cols[3].write(row.clicks)
|
664 |
cols[4].write(row.impressions)
|
665 |
cols[5].write(f"{row.ctr:.2%}")
|
666 |
-
cols[6].write(f"{int(row.position)}") #
|
667 |
cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
|
668 |
-
|
669 |
# Competitors column
|
670 |
if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
|
671 |
competitor_state_key = f"comp_state_{i}"
|
@@ -684,15 +700,11 @@ def show_tabular_data(df, co, country_code):
|
|
684 |
with st.spinner('Analyzing competitors...'):
|
685 |
results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
|
686 |
|
687 |
-
#
|
688 |
-
results_df = results_df
|
689 |
-
|
690 |
-
# Update the Position for our URL
|
691 |
-
our_url_mask = results_df['URL'].str.contains('Our URL')
|
692 |
-
results_df.loc[our_url_mask, 'Position'] = row.position # Already integer
|
693 |
|
694 |
-
#
|
695 |
-
results_df
|
696 |
|
697 |
# Create a custom style function to highlight only our URL's row
|
698 |
def highlight_our_url(row):
|
@@ -703,8 +715,8 @@ def show_tabular_data(df, co, country_code):
|
|
703 |
# Apply the custom style and hide the index
|
704 |
styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
|
705 |
|
706 |
-
# Display the styled DataFrame
|
707 |
-
st.markdown(styled_df.to_html(), unsafe_allow_html=True)
|
708 |
|
709 |
# Extract our result for additional insights
|
710 |
our_result = results_df[results_df['URL'].str.contains('Our URL')]
|
@@ -714,8 +726,8 @@ def show_tabular_data(df, co, country_code):
|
|
714 |
total_results = len(results_df)
|
715 |
our_score = our_result['Score'].values[0]
|
716 |
|
717 |
-
st.write(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
|
718 |
-
st.write(f"Our relevancy score: {our_score:.4f}")
|
719 |
|
720 |
if our_rank == 1:
|
721 |
st.success("Your page has the highest relevancy score!")
|
@@ -725,7 +737,7 @@ def show_tabular_data(df, co, country_code):
|
|
725 |
st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
|
726 |
else:
|
727 |
st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
|
728 |
-
|
729 |
if compare_state_key not in st.session_state:
|
730 |
st.session_state[compare_state_key] = False
|
731 |
|
|
|
252 |
competitor_url = normalize_url(data['url'])
|
253 |
score = calculate_relevance_score(data['content'], query, co)
|
254 |
results.append({
|
255 |
+
'Position': int(data['position']), # Ensure position is an integer
|
256 |
'URL': competitor_url,
|
257 |
'Score': score,
|
258 |
'is_our_url': competitor_url == our_url
|
|
|
265 |
our_content = fetch_content(our_url, query)
|
266 |
our_score = calculate_relevance_score(our_content, query, co)
|
267 |
|
268 |
+
# Add our URL if it's not in the results
|
269 |
if not any(r['is_our_url'] for r in results):
|
270 |
+
our_position = int(row['position']) # Ensure our position is an integer
|
271 |
results.append({
|
272 |
+
'Position': our_position,
|
273 |
'URL': our_url,
|
274 |
'Score': our_score,
|
275 |
'is_our_url': True
|
|
|
280 |
|
281 |
# Create DataFrame
|
282 |
results_df = pd.DataFrame(results)
|
|
|
283 |
|
284 |
# Mark our URL
|
285 |
results_df['URL'] = results_df.apply(
|
|
|
404 |
try:
|
405 |
df = query.limit(MAX_ROWS).get().to_dataframe()
|
406 |
#logging.info("GSC data fetched successfully")
|
407 |
+
|
408 |
+
# **Ensure 'position' is integer in the processed data**
|
409 |
+
df_sorted = df.sort_values(['impressions'], ascending=[False])
|
410 |
+
df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
|
411 |
+
|
412 |
+
if 'relevancy_score' not in df_unique.columns:
|
413 |
+
df_unique['relevancy_score'] = 0
|
414 |
+
else:
|
415 |
+
df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
|
416 |
+
|
417 |
+
result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
|
418 |
+
|
419 |
+
# **Cast 'position' to integer**
|
420 |
+
result['position'] = result['position'].astype(int)
|
421 |
+
|
422 |
+
#logging.info("GSC data processed successfully")
|
423 |
+
return result
|
424 |
except Exception as e:
|
425 |
#logging.error(f"Error fetching GSC data: {e}")
|
426 |
show_error(e)
|
|
|
680 |
cols[3].write(row.clicks)
|
681 |
cols[4].write(row.impressions)
|
682 |
cols[5].write(f"{row.ctr:.2%}")
|
683 |
+
cols[6].write(f"{int(row.position)}") # Ensure Position is integer
|
684 |
cols[7].write(f"{row.relevancy_score:.4f}" if not pd.isna(row.relevancy_score) and row.relevancy_score != 0 else "N/A")
|
|
|
685 |
# Competitors column
|
686 |
if not pd.isna(row.relevancy_score) and row.relevancy_score != 0:
|
687 |
competitor_state_key = f"comp_state_{i}"
|
|
|
700 |
with st.spinner('Analyzing competitors...'):
|
701 |
results_df = analyze_competitors(row._asdict(), co, country_code=country_code)
|
702 |
|
703 |
+
# Ensure Position is displayed as integer
|
704 |
+
results_df['Position'] = results_df['Position'].astype(int)
|
|
|
|
|
|
|
|
|
705 |
|
706 |
+
# Format Score to 6 decimal places
|
707 |
+
results_df['Score'] = results_df['Score'].apply(lambda x: f"{x:.6f}")
|
708 |
|
709 |
# Create a custom style function to highlight only our URL's row
|
710 |
def highlight_our_url(row):
|
|
|
715 |
# Apply the custom style and hide the index
|
716 |
styled_df = results_df.style.apply(highlight_our_url, axis=1).hide(axis="index")
|
717 |
|
718 |
+
# Display the styled DataFrame
|
719 |
+
st.markdown(styled_df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
720 |
|
721 |
# Extract our result for additional insights
|
722 |
our_result = results_df[results_df['URL'].str.contains('Our URL')]
|
|
|
726 |
total_results = len(results_df)
|
727 |
our_score = our_result['Score'].values[0]
|
728 |
|
729 |
+
st.write(f"Our page ranks **{our_rank}** out of **{total_results}** in terms of relevancy score.")
|
730 |
+
st.write(f"Our relevancy score: **{our_score:.4f}**")
|
731 |
|
732 |
if our_rank == 1:
|
733 |
st.success("Your page has the highest relevancy score!")
|
|
|
737 |
st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
|
738 |
else:
|
739 |
st.error(f"Our page '{row.page}' is not in the results. This indicates an error in fetching or processing the page.")
|
740 |
+
|
741 |
if compare_state_key not in st.session_state:
|
742 |
st.session_state[compare_state_key] = False
|
743 |
|