Spaces:
Runtime error
Runtime error
# Importing libraries | |
import streamlit as st | |
import matplotlib.pyplot as plt | |
from advertools import sitemap_to_df | |
import pandas as pd | |
# Title | |
st.title("Sitemap Analyzer") | |
# Sidebar instructions | |
st.sidebar.markdown("### How to use this Sitemap Analyzer") | |
st.sidebar.markdown(""" | |
This sitemap analyzer shows you how many pages each domain has published over a period of time. | |
To use it, input the client's sitemap on "Input client sitemap here" and put up to 3 competitor sitemaps below it, pressing enter after every time you put the sitemap URL. | |
""") | |
st.sidebar.markdown("You can use this tool to detect or guess where the sitemap of each domain can be: [Free Sitemap Finder & Checker Tool](https://seomator.com/sitemap-finder)") | |
st.sidebar.markdown("## Tool uploaded and maintained by: [Blazing SEO](http://blazing-seo.com/)") | |
# Sidebar inputs for up to 4 sitemaps | |
sitemap_urls = [ | |
st.sidebar.text_input("Input client sitemap here:", ""), | |
st.sidebar.text_input("Enter the competitor sitemap URL 1:", ""), | |
st.sidebar.text_input("Enter the competitor sitemap URL 2:", ""), | |
st.sidebar.text_input("Enter the competitor sitemap URL 3:", "") | |
] | |
# Colors for sitemaps | |
colors = ['green', 'blue', 'red', 'brown'] | |
# Displaying legend in the sidebar | |
st.sidebar.markdown("### Legend") | |
for idx, sitemap_url in enumerate(sitemap_urls): | |
if sitemap_url: | |
st.sidebar.markdown(f"{sitemap_url}: <span style='color:{colors[idx]}'>■</span>", unsafe_allow_html=True) | |
for idx, sitemap_url in enumerate(sitemap_urls): | |
if sitemap_url: | |
try: | |
# Fetching the sitemap data | |
sitemap_data = sitemap_to_df(sitemap_url) | |
sitemap_data['lastmod'] = pd.to_datetime(sitemap_data['lastmod']) | |
# Displaying the DataFrame | |
st.subheader(f"{sitemap_url}") | |
st.dataframe(sitemap_data) | |
# Plotting trends (excluding yearly trends) | |
for trend_name, resample_rule, ylabel in [("Monthly", "M", "Count"), ("Weekly", "W", "Count")]: | |
st.subheader(f"{trend_name} Trends for {sitemap_url}") | |
fig, ax = plt.subplots(figsize=(10, 6)) # Adjusting figure size for wider charts | |
if trend_name == "Weekly": | |
trends = sitemap_data['lastmod'].dt.dayofweek.value_counts().sort_index() | |
trends.index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | |
else: | |
trends = sitemap_data.resample(resample_rule, on='lastmod').size() | |
ax.bar(trends.index, trends.values, color=colors[idx]) | |
ax.set_ylabel(ylabel) | |
ax.set_title(f"{trend_name} Trends") | |
st.pyplot(fig) | |
# Total number of URLs | |
st.subheader(f"Total Number of URLs for {sitemap_url}") | |
total_urls = len(sitemap_data) | |
st.write(f"The total number of URLs in the sitemap is {total_urls}.") | |
except Exception as e: | |
st.write(f"An error occurred for {sitemap_url}:", str(e)) | |
else: | |
st.write("Please enter at least one sitemap URL to analyze.") | |
st.sidebar.markdown("Credits") | |
st.sidebar.markdown("[semrush.com/blog/content-analysis-xml-sitemaps-python](https://www.semrush.com/blog/content-analysis-xml-sitemaps-python/)") | |
st.sidebar.markdown("[Using XML Sitemaps for Content Analysis with Python](https://github.com/eliasdabbas/semrush_tutorial_sitemap_analysis/)") | |