blazingbunny's picture
Update app.py
37c14ee
# Importing libraries
import streamlit as st
import matplotlib.pyplot as plt
from advertools import sitemap_to_df
import pandas as pd
# Title
st.title("Sitemap Analyzer")
# Sidebar instructions
st.sidebar.markdown("### How to use this Sitemap Analyzer")
st.sidebar.markdown("""
This sitemap analyzer shows you how many pages each domain has published over a period of time.
To use it, input the client's sitemap on "Input client sitemap here" and put up to 3 competitor sitemaps below it, pressing enter after every time you put the sitemap URL.
""")
st.sidebar.markdown("You can use this tool to detect or guess where the sitemap of each domain can be: [Free Sitemap Finder & Checker Tool](https://seomator.com/sitemap-finder)")
st.sidebar.markdown("## Tool uploaded and maintained by: [Blazing SEO](http://blazing-seo.com/)")
# Sidebar inputs for up to 4 sitemaps
sitemap_urls = [
st.sidebar.text_input("Input client sitemap here:", ""),
st.sidebar.text_input("Enter the competitor sitemap URL 1:", ""),
st.sidebar.text_input("Enter the competitor sitemap URL 2:", ""),
st.sidebar.text_input("Enter the competitor sitemap URL 3:", "")
]
# Colors for sitemaps
colors = ['green', 'blue', 'red', 'brown']
# Displaying legend in the sidebar
st.sidebar.markdown("### Legend")
for idx, sitemap_url in enumerate(sitemap_urls):
if sitemap_url:
st.sidebar.markdown(f"{sitemap_url}: <span style='color:{colors[idx]}'>■</span>", unsafe_allow_html=True)
for idx, sitemap_url in enumerate(sitemap_urls):
if sitemap_url:
try:
# Fetching the sitemap data
sitemap_data = sitemap_to_df(sitemap_url)
sitemap_data['lastmod'] = pd.to_datetime(sitemap_data['lastmod'])
# Displaying the DataFrame
st.subheader(f"{sitemap_url}")
st.dataframe(sitemap_data)
# Plotting trends (excluding yearly trends)
for trend_name, resample_rule, ylabel in [("Monthly", "M", "Count"), ("Weekly", "W", "Count")]:
st.subheader(f"{trend_name} Trends for {sitemap_url}")
fig, ax = plt.subplots(figsize=(10, 6)) # Adjusting figure size for wider charts
if trend_name == "Weekly":
trends = sitemap_data['lastmod'].dt.dayofweek.value_counts().sort_index()
trends.index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
else:
trends = sitemap_data.resample(resample_rule, on='lastmod').size()
ax.bar(trends.index, trends.values, color=colors[idx])
ax.set_ylabel(ylabel)
ax.set_title(f"{trend_name} Trends")
st.pyplot(fig)
# Total number of URLs
st.subheader(f"Total Number of URLs for {sitemap_url}")
total_urls = len(sitemap_data)
st.write(f"The total number of URLs in the sitemap is {total_urls}.")
except Exception as e:
st.write(f"An error occurred for {sitemap_url}:", str(e))
else:
st.write("Please enter at least one sitemap URL to analyze.")
st.sidebar.markdown("Credits")
st.sidebar.markdown("[semrush.com/blog/content-analysis-xml-sitemaps-python](https://www.semrush.com/blog/content-analysis-xml-sitemaps-python/)")
st.sidebar.markdown("[Using XML Sitemaps for Content Analysis with Python](https://github.com/eliasdabbas/semrush_tutorial_sitemap_analysis/)")