Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| import re | |
| from urllib.parse import urljoin, urlparse | |
| class SEOAnalyzer: | |
| def __init__(self): | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| }) | |
| def analyze_website(self, url): | |
| """Main analysis function""" | |
| try: | |
| # Fetch the webpage | |
| response = self.session.get(url, timeout=10, allow_redirects=True) | |
| response.raise_for_status() | |
| # Parse HTML | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract metadata | |
| metadata = self.extract_metadata(soup) | |
| # Extract structured data | |
| structured_data = self.extract_structured_data(soup) | |
| # Calculate SEO score | |
| seo_score = self.calculate_seo_score(metadata) | |
| # Generate recommendations | |
| recommendations = self.generate_recommendations(metadata, url) | |
| return { | |
| 'success': True, | |
| 'metadata': metadata, | |
| 'structured_data': structured_data, | |
| 'seo_score': seo_score, | |
| 'recommendations': recommendations, | |
| 'html_head': str(soup.head) if soup.head else "Head section not found" | |
| } | |
| except requests.exceptions.RequestException as e: | |
| return { | |
| 'success': False, | |
| 'error': f"Failed to fetch website: {str(e)}" | |
| } | |
| except Exception as e: | |
| return { | |
| 'success': False, | |
| 'error': f"Analysis error: {str(e)}" | |
| } | |
| def extract_metadata(self, soup): | |
| """Extract all relevant metadata from the page""" | |
| metadata = {} | |
| # Basic meta tags | |
| if soup.title: | |
| metadata['title'] = soup.title.string.strip() if soup.title.string else "" | |
| # Meta tags | |
| meta_tags = soup.find_all('meta') | |
| for tag in meta_tags: | |
| # Standard meta tags | |
| if tag.get('name'): | |
| name = tag.get('name').lower() | |
| content = tag.get('content', '') | |
| metadata[name] = content | |
| # Property meta tags (Open Graph, etc.) | |
| elif tag.get('property'): | |
| prop = tag.get('property').lower() | |
| content = tag.get('content', '') | |
| metadata[prop] = content | |
| # HTTP-equiv meta tags | |
| elif tag.get('http-equiv'): | |
| equiv = tag.get('http-equiv').lower() | |
| content = tag.get('content', '') | |
| metadata[f'http-equiv-{equiv}'] = content | |
| # Charset | |
| elif tag.get('charset'): | |
| metadata['charset'] = tag.get('charset') | |
| # Link tags (canonical, etc.) | |
| link_tags = soup.find_all('link') | |
| for tag in link_tags: | |
| rel = tag.get('rel') | |
| if rel: | |
| rel_str = ' '.join(rel) if isinstance(rel, list) else rel | |
| if rel_str in ['canonical', 'alternate', 'prev', 'next']: | |
| metadata[f'link-{rel_str}'] = tag.get('href', '') | |
| return metadata | |
| def extract_structured_data(self, soup): | |
| """Extract JSON-LD structured data""" | |
| structured_data = [] | |
| scripts = soup.find_all('script', type='application/ld+json') | |
| for script in scripts: | |
| try: | |
| if script.string: | |
| data = json.loads(script.string.strip()) | |
| structured_data.append(data) | |
| except json.JSONDecodeError: | |
| continue | |
| return structured_data | |
| def calculate_seo_score(self, metadata): | |
| """Calculate SEO score based on best practices""" | |
| score = 0 | |
| max_score = 100 | |
| # Title tag (20 points) | |
| title = metadata.get('title', '') | |
| if title: | |
| score += 10 | |
| if 30 <= len(title) <= 60: | |
| score += 10 | |
| # Meta description (20 points) | |
| description = metadata.get('description', '') | |
| if description: | |
| score += 10 | |
| if 120 <= len(description) <= 160: | |
| score += 10 | |
| # Open Graph tags (20 points) | |
| og_title = metadata.get('og:title', '') | |
| og_description = metadata.get('og:description', '') | |
| og_image = metadata.get('og:image', '') | |
| if og_title: | |
| score += 7 | |
| if og_description: | |
| score += 7 | |
| if og_image: | |
| score += 6 | |
| # Twitter Card (15 points) | |
| twitter_card = metadata.get('twitter:card', '') | |
| twitter_title = metadata.get('twitter:title', '') | |
| twitter_description = metadata.get('twitter:description', '') | |
| if twitter_card: | |
| score += 5 | |
| if twitter_title: | |
| score += 5 | |
| if twitter_description: | |
| score += 5 | |
| # Technical SEO (25 points) | |
| if metadata.get('viewport'): | |
| score += 5 | |
| if metadata.get('charset'): | |
| score += 5 | |
| if metadata.get('robots'): | |
| score += 5 | |
| if metadata.get('link-canonical'): | |
| score += 5 | |
| if not metadata.get('robots') or 'noindex' not in metadata.get('robots', '').lower(): | |
| score += 5 | |
| return min(score, max_score) | |
| def generate_recommendations(self, metadata, url): | |
| """Generate actionable SEO recommendations""" | |
| recommendations = [] | |
| # Title tag recommendations | |
| title = metadata.get('title', '') | |
| if not title: | |
| recommendations.append({ | |
| 'type': 'error', | |
| 'message': 'Missing title tag. Add a descriptive title between 30-60 characters.' | |
| }) | |
| elif len(title) < 30: | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': f'Title tag is too short ({len(title)} chars). Aim for 30-60 characters.' | |
| }) | |
| elif len(title) > 60: | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': f'Title tag is too long ({len(title)} chars). Keep it under 60 characters to avoid truncation.' | |
| }) | |
| # Meta description recommendations | |
| description = metadata.get('description', '') | |
| if not description: | |
| recommendations.append({ | |
| 'type': 'error', | |
| 'message': 'Missing meta description. Add a compelling description between 120-160 characters.' | |
| }) | |
| elif len(description) < 120: | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': f'Meta description is too short ({len(description)} chars). Aim for 120-160 characters.' | |
| }) | |
| elif len(description) > 160: | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': f'Meta description is too long ({len(description)} chars). Keep it under 160 characters.' | |
| }) | |
| # Open Graph recommendations | |
| if not metadata.get('og:title'): | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': 'Missing Open Graph title. Add og:title for better social media sharing.' | |
| }) | |
| if not metadata.get('og:description'): | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': 'Missing Open Graph description. Add og:description for social media previews.' | |
| }) | |
| if not metadata.get('og:image'): | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': 'Missing Open Graph image. Add og:image (1200x630px recommended) for social sharing.' | |
| }) | |
| # Twitter Card recommendations | |
| if not metadata.get('twitter:card'): | |
| recommendations.append({ | |
| 'type': 'info', | |
| 'message': 'Consider adding Twitter Card meta tags for better Twitter sharing experience.' | |
| }) | |
| # Technical SEO recommendations | |
| if not metadata.get('viewport'): | |
| recommendations.append({ | |
| 'type': 'error', | |
| 'message': 'Missing viewport meta tag. Add <meta name="viewport" content="width=device-width, initial-scale=1"> for mobile optimization.' | |
| }) | |
| if not metadata.get('charset'): | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': 'Missing charset declaration. Add <meta charset="UTF-8"> in the head section.' | |
| }) | |
| if not metadata.get('link-canonical'): | |
| recommendations.append({ | |
| 'type': 'info', | |
| 'message': 'Consider adding a canonical URL to prevent duplicate content issues.' | |
| }) | |
| # Robots meta tag | |
| robots = metadata.get('robots', '') | |
| if 'noindex' in robots.lower(): | |
| recommendations.append({ | |
| 'type': 'warning', | |
| 'message': 'Page is set to noindex. Remove this if you want the page to be indexed by search engines.' | |
| }) | |
| return recommendations | |