SEO_Metadata_Analyzer / seo_analyzer.py
Waseem7711's picture
Create seo_analyzer.py
932c5b4 verified
raw
history blame
9.7 kB
import requests
from bs4 import BeautifulSoup
import json
import re
from urllib.parse import urljoin, urlparse
class SEOAnalyzer:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
def analyze_website(self, url):
"""Main analysis function"""
try:
# Fetch the webpage
response = self.session.get(url, timeout=10, allow_redirects=True)
response.raise_for_status()
# Parse HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Extract metadata
metadata = self.extract_metadata(soup)
# Extract structured data
structured_data = self.extract_structured_data(soup)
# Calculate SEO score
seo_score = self.calculate_seo_score(metadata)
# Generate recommendations
recommendations = self.generate_recommendations(metadata, url)
return {
'success': True,
'metadata': metadata,
'structured_data': structured_data,
'seo_score': seo_score,
'recommendations': recommendations,
'html_head': str(soup.head) if soup.head else "Head section not found"
}
except requests.exceptions.RequestException as e:
return {
'success': False,
'error': f"Failed to fetch website: {str(e)}"
}
except Exception as e:
return {
'success': False,
'error': f"Analysis error: {str(e)}"
}
def extract_metadata(self, soup):
"""Extract all relevant metadata from the page"""
metadata = {}
# Basic meta tags
if soup.title:
metadata['title'] = soup.title.string.strip() if soup.title.string else ""
# Meta tags
meta_tags = soup.find_all('meta')
for tag in meta_tags:
# Standard meta tags
if tag.get('name'):
name = tag.get('name').lower()
content = tag.get('content', '')
metadata[name] = content
# Property meta tags (Open Graph, etc.)
elif tag.get('property'):
prop = tag.get('property').lower()
content = tag.get('content', '')
metadata[prop] = content
# HTTP-equiv meta tags
elif tag.get('http-equiv'):
equiv = tag.get('http-equiv').lower()
content = tag.get('content', '')
metadata[f'http-equiv-{equiv}'] = content
# Charset
elif tag.get('charset'):
metadata['charset'] = tag.get('charset')
# Link tags (canonical, etc.)
link_tags = soup.find_all('link')
for tag in link_tags:
rel = tag.get('rel')
if rel:
rel_str = ' '.join(rel) if isinstance(rel, list) else rel
if rel_str in ['canonical', 'alternate', 'prev', 'next']:
metadata[f'link-{rel_str}'] = tag.get('href', '')
return metadata
def extract_structured_data(self, soup):
"""Extract JSON-LD structured data"""
structured_data = []
scripts = soup.find_all('script', type='application/ld+json')
for script in scripts:
try:
if script.string:
data = json.loads(script.string.strip())
structured_data.append(data)
except json.JSONDecodeError:
continue
return structured_data
def calculate_seo_score(self, metadata):
"""Calculate SEO score based on best practices"""
score = 0
max_score = 100
# Title tag (20 points)
title = metadata.get('title', '')
if title:
score += 10
if 30 <= len(title) <= 60:
score += 10
# Meta description (20 points)
description = metadata.get('description', '')
if description:
score += 10
if 120 <= len(description) <= 160:
score += 10
# Open Graph tags (20 points)
og_title = metadata.get('og:title', '')
og_description = metadata.get('og:description', '')
og_image = metadata.get('og:image', '')
if og_title:
score += 7
if og_description:
score += 7
if og_image:
score += 6
# Twitter Card (15 points)
twitter_card = metadata.get('twitter:card', '')
twitter_title = metadata.get('twitter:title', '')
twitter_description = metadata.get('twitter:description', '')
if twitter_card:
score += 5
if twitter_title:
score += 5
if twitter_description:
score += 5
# Technical SEO (25 points)
if metadata.get('viewport'):
score += 5
if metadata.get('charset'):
score += 5
if metadata.get('robots'):
score += 5
if metadata.get('link-canonical'):
score += 5
if not metadata.get('robots') or 'noindex' not in metadata.get('robots', '').lower():
score += 5
return min(score, max_score)
def generate_recommendations(self, metadata, url):
"""Generate actionable SEO recommendations"""
recommendations = []
# Title tag recommendations
title = metadata.get('title', '')
if not title:
recommendations.append({
'type': 'error',
'message': 'Missing title tag. Add a descriptive title between 30-60 characters.'
})
elif len(title) < 30:
recommendations.append({
'type': 'warning',
'message': f'Title tag is too short ({len(title)} chars). Aim for 30-60 characters.'
})
elif len(title) > 60:
recommendations.append({
'type': 'warning',
'message': f'Title tag is too long ({len(title)} chars). Keep it under 60 characters to avoid truncation.'
})
# Meta description recommendations
description = metadata.get('description', '')
if not description:
recommendations.append({
'type': 'error',
'message': 'Missing meta description. Add a compelling description between 120-160 characters.'
})
elif len(description) < 120:
recommendations.append({
'type': 'warning',
'message': f'Meta description is too short ({len(description)} chars). Aim for 120-160 characters.'
})
elif len(description) > 160:
recommendations.append({
'type': 'warning',
'message': f'Meta description is too long ({len(description)} chars). Keep it under 160 characters.'
})
# Open Graph recommendations
if not metadata.get('og:title'):
recommendations.append({
'type': 'warning',
'message': 'Missing Open Graph title. Add og:title for better social media sharing.'
})
if not metadata.get('og:description'):
recommendations.append({
'type': 'warning',
'message': 'Missing Open Graph description. Add og:description for social media previews.'
})
if not metadata.get('og:image'):
recommendations.append({
'type': 'warning',
'message': 'Missing Open Graph image. Add og:image (1200x630px recommended) for social sharing.'
})
# Twitter Card recommendations
if not metadata.get('twitter:card'):
recommendations.append({
'type': 'info',
'message': 'Consider adding Twitter Card meta tags for better Twitter sharing experience.'
})
# Technical SEO recommendations
if not metadata.get('viewport'):
recommendations.append({
'type': 'error',
'message': 'Missing viewport meta tag. Add <meta name="viewport" content="width=device-width, initial-scale=1"> for mobile optimization.'
})
if not metadata.get('charset'):
recommendations.append({
'type': 'warning',
'message': 'Missing charset declaration. Add <meta charset="UTF-8"> in the head section.'
})
if not metadata.get('link-canonical'):
recommendations.append({
'type': 'info',
'message': 'Consider adding a canonical URL to prevent duplicate content issues.'
})
# Robots meta tag
robots = metadata.get('robots', '')
if 'noindex' in robots.lower():
recommendations.append({
'type': 'warning',
'message': 'Page is set to noindex. Remove this if you want the page to be indexed by search engines.'
})
return recommendations