Spaces:

yashgori20
/

ThinklySEO

Running

App Files Files Community

yashgori20 commited on Aug 23

Commit

7da164e

1 Parent(s): 5f0cfa7

ok

Browse files

Files changed (1) hide show

modules/keywords.py +484 -245

modules/keywords.py CHANGED Viewed

@@ -1,64 +1,114 @@
 """
 Keywords Rankings Module for SEO Report Generator
-Supports Google Search Console API (primary) and SERP API (fallback)
 """
 import os
 import requests
 import json
-from typing import Dict, Any, List, Optional
 from urllib.parse import urlparse
 from datetime import datetime, timedelta
 class ModuleResult:
     """Standard result object for SEO modules"""
-    def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
-        self.success = success
-        self.data = data
-        self.error = error
 class KeywordsModule:
     def __init__(self):
-        self.gsc_api_key = os.getenv('GOOGLE_SEARCH_CONSOLE_API_KEY')
-        self.serp_api_key = os.getenv('SERP_API_KEY')  # SerpAPI or similar
-        self.data_for_seo_key = os.getenv('DATAFORSEO_API_KEY')
-    def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
         """
-        Analyze keyword rankings for the given URL
         Args:
             url: Target website URL
-            quick_scan: If True, use limited data for competitor analysis
         Returns:
-            ModuleResult with keywords data
         """
         try:
             domain = self._extract_domain(url)
-            # Try Google Search Console first (if credentials available)
-            if self.gsc_api_key:
-                result = self._analyze_with_gsc(domain, quick_scan)
-                if result.success:
-                    return result
-            # Fallback to SERP API
-            if self.serp_api_key:
-                result = self._analyze_with_serp_api(domain, quick_scan)
-                if result.success:
-                    return result
-            # Fallback to DataForSEO
-            if self.data_for_seo_key:
-                result = self._analyze_with_dataforseo(domain, quick_scan)
-                if result.success:
-                    return result
-            # No API keys available - return placeholder data
-            return self._generate_placeholder_data(domain)
         except Exception as e:
             return ModuleResult(
@@ -73,243 +123,432 @@ class KeywordsModule:
             url = 'https://' + url
         return urlparse(url).netloc.replace('www.', '')
-    def _analyze_with_gsc(self, domain: str, quick_scan: bool) -> ModuleResult:
-        """Analyze with Google Search Console API"""
         try:
-            # Note: GSC API requires site verification and proper setup
-            # This is a simplified implementation - real GSC API needs OAuth2
-            # GSC API endpoint (simplified)
-            base_url = "https://searchconsole.googleapis.com/webmasters/v3/sites"
-            site_url = f"https://{domain}/"
-            # Get search analytics data
-            analytics_url = f"{base_url}/{site_url}/searchAnalytics/query"
-            # Date range (last 90 days)
-            end_date = datetime.now().date()
-            start_date = end_date - timedelta(days=90)
-            payload = {
-                "startDate": start_date.isoformat(),
-                "endDate": end_date.isoformat(),
-                "dimensions": ["query", "page"],
-                "rowLimit": 1000 if not quick_scan else 100
             }
-            headers = {
-                "Authorization": f"Bearer {self.gsc_api_key}",
-                "Content-Type": "application/json"
-            }
-            response = requests.post(analytics_url, json=payload, headers=headers, timeout=30)
-            if response.status_code != 200:
-                raise Exception(f"GSC API error: {response.status_code}")
-            data = response.json()
-            return self._process_gsc_data(data, domain)
         except Exception as e:
-            return ModuleResult(success=False, data={}, error=str(e))
-    def _analyze_with_serp_api(self, domain: str, quick_scan: bool) -> ModuleResult:
-        """Analyze with SERP API (SerpAPI, etc.)"""
-        try:
-            # Using SerpAPI as example
-            url = "https://serpapi.com/search"
-            params = {
-                "engine": "google",
-                "q": f"site:{domain}",
-                "api_key": self.serp_api_key,
-                "num": 100 if not quick_scan else 20
             }
-            response = requests.get(url, params=params, timeout=30)
-            if response.status_code != 200:
-                raise Exception(f"SERP API error: {response.status_code}")
-            data = response.json()
-            return self._process_serp_data(data, domain)
-        except Exception as e:
-            return ModuleResult(success=False, data={}, error=str(e))
-    def _analyze_with_dataforseo(self, domain: str, quick_scan: bool) -> ModuleResult:
-        """Analyze with DataForSEO API"""
-        try:
-            # DataForSEO implementation
-            auth = (self.data_for_seo_key, os.getenv('DATAFORSEO_API_PASSWORD', ''))
-            # Get domain keywords
-            url = "https://api.dataforseo.com/v3/dataforseo_labs/google/ranked_keywords/live"
-            payload = {
-                "target": domain,
-                "limit": 1000 if not quick_scan else 100,
-                "offset": 0,
-                "filters": [
-                    ["metrics.organic.pos", "<=", 100]
-                ]
             }
-            response = requests.post(url, json=[payload], auth=auth, timeout=60)
-            if response.status_code != 200:
-                raise Exception(f"DataForSEO API error: {response.status_code}")
-            data = response.json()
-            return self._process_dataforseo_data(data, domain)
-        except Exception as e:
-            return ModuleResult(success=False, data={}, error=str(e))
-    def _process_gsc_data(self, data: Dict, domain: str) -> ModuleResult:
-        """Process Google Search Console data"""
-        if 'rows' not in data:
-            return ModuleResult(success=False, data={}, error="No GSC data available")
-        rows = data['rows']
-        total_keywords = len(rows)
-        # Position distribution
-        top_3 = sum(1 for row in rows if row.get('position', 100) <= 3)
-        top_10 = sum(1 for row in rows if row.get('position', 100) <= 10)
-        top_50 = sum(1 for row in rows if row.get('position', 100) <= 50)
-        # Best and worst performing
-        sorted_by_position = sorted(rows, key=lambda x: x.get('position', 100))
-        best_keywords = sorted_by_position[:10]
-        worst_keywords = sorted_by_position[-10:]
-        # High opportunity keywords (high impressions, low clicks)
-        opportunity_keywords = []
-        for row in rows:
-            impressions = row.get('impressions', 0)
-            clicks = row.get('clicks', 0)
-            ctr = (clicks / impressions * 100) if impressions > 0 else 0
-            if impressions > 100 and ctr < 2 and row.get('position', 100) > 10:
-                opportunity_keywords.append({
-                    'keyword': row.get('keys', [''])[0],
-                    'position': row.get('position', 0),
-                    'impressions': impressions,
-                    'clicks': clicks,
-                    'ctr': round(ctr, 2)
                 })
-        opportunity_keywords = sorted(opportunity_keywords, key=lambda x: x['impressions'], reverse=True)[:10]
-        keywords_data = {
-            'total_keywords': total_keywords,
-            'position_distribution': {
-                'top_3': top_3,
-                'top_10': top_10,
-                'top_50': top_50,
-                'beyond_50': total_keywords - top_50
-            },
-            'best_keywords': [
-                {
-                    'keyword': row.get('keys', [''])[0],
-                    'position': row.get('position', 0),
-                    'clicks': row.get('clicks', 0),
-                    'impressions': row.get('impressions', 0)
-                } for row in best_keywords
-            ],
-            'worst_keywords': [
-                {
-                    'keyword': row.get('keys', [''])[0],
-                    'position': row.get('position', 0),
-                    'clicks': row.get('clicks', 0),
-                    'impressions': row.get('impressions', 0)
-                } for row in worst_keywords
-            ],
-            'opportunity_keywords': opportunity_keywords,
-            'data_source': 'Google Search Console',
-            'last_updated': datetime.now().isoformat()
-        }
-        return ModuleResult(success=True, data=keywords_data)
-    def _process_serp_data(self, data: Dict, domain: str) -> ModuleResult:
-        """Process SERP API data"""
-        # Simplified SERP data processing
-        organic_results = data.get('organic_results', [])
-        keywords_data = {
-            'total_keywords': len(organic_results),
-            'position_distribution': {
-                'top_3': len([r for r in organic_results if r.get('position', 100) <= 3]),
-                'top_10': len([r for r in organic_results if r.get('position', 100) <= 10]),
-                'top_50': len([r for r in organic_results if r.get('position', 100) <= 50]),
-                'beyond_50': len([r for r in organic_results if r.get('position', 100) > 50])
-            },
-            'best_keywords': [
-                {
-                    'keyword': r.get('title', ''),
-                    'position': r.get('position', 0),
-                    'url': r.get('link', '')
-                } for r in organic_results[:10]
-            ],
-            'data_source': 'SERP API',
-            'last_updated': datetime.now().isoformat()
-        }
-        return ModuleResult(success=True, data=keywords_data)
-    def _process_dataforseo_data(self, data: Dict, domain: str) -> ModuleResult:
-        """Process DataForSEO data"""
-        if not data.get('tasks') or not data['tasks'][0].get('result'):
-            return ModuleResult(success=False, data={}, error="No DataForSEO data available")
-        results = data['tasks'][0]['result']
-        total_keywords = len(results)
-        # Position distribution
-        top_3 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 3)
-        top_10 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 10)
-        top_50 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 50)
-        keywords_data = {
-            'total_keywords': total_keywords,
-            'position_distribution': {
-                'top_3': top_3,
-                'top_10': top_10,
-                'top_50': top_50,
-                'beyond_50': total_keywords - top_50
-            },
-            'best_keywords': [
-                {
-                    'keyword': r.get('keyword', ''),
-                    'position': r.get('metrics', {}).get('organic', {}).get('pos', 0),
-                    'search_volume': r.get('keyword_info', {}).get('search_volume', 0)
-                } for r in sorted(results, key=lambda x: x.get('metrics', {}).get('organic', {}).get('pos', 100))[:10]
-            ],
-            'data_source': 'DataForSEO',
-            'last_updated': datetime.now().isoformat()
-        }
-        return ModuleResult(success=True, data=keywords_data)
-    def _generate_placeholder_data(self, domain: str) -> ModuleResult:
-        """Generate placeholder data when no API keys are available"""
-        keywords_data = {
-            'total_keywords': 0,
-            'position_distribution': {
-                'top_3': 0,
-                'top_10': 0,
-                'top_50': 0,
-                'beyond_50': 0
-            },
-            'best_keywords': [],
-            'worst_keywords': [],
-            'opportunity_keywords': [],
-            'data_source': 'No API credentials',
-            'last_updated': datetime.now().isoformat(),
-            'placeholder': True,
-            'message': 'Connect Google Search Console or SERP API to unlock keyword data'
-        }
-        return ModuleResult(success=True, data=keywords_data)

 """
 Keywords Rankings Module for SEO Report Generator
+Implements PRD requirements with Competitors Ranking Keywords API and Google Keyword Insight API
 """
 import os
 import requests
 import json
+import time
+import hashlib
+from typing import Dict, Any, List, Optional, Tuple
 from urllib.parse import urlparse
 from datetime import datetime, timedelta
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor, as_completed
+@dataclass
 class ModuleResult:
     """Standard result object for SEO modules"""
+    success: bool
+    data: Dict[str, Any]
+    error: str = None
 class KeywordsModule:
     def __init__(self):
+        # API Configuration
+        self.rapidapi_key = os.getenv('RAPIDAPI_KEY')
+        self.primary_api_host = "seo-get-competitors-ranking-keywords.p.rapidapi.com"
+        self.enrichment_api_host = "google-keyword-insight1.p.rapidapi.com"
+        # Performance Configuration
+        self.timeout = int(os.getenv('KEYWORD_API_TIMEOUT', 30))
+        self.max_retries = int(os.getenv('KEYWORD_MAX_RETRIES', 3))
+        self.pagination_limit = int(os.getenv('KEYWORD_PAGINATION_LIMIT', 1000))
+        self.enrichment_batch_size = int(os.getenv('ENRICHMENT_BATCH_SIZE', 50))
+        self.enrichment_cache_ttl = int(os.getenv('ENRICHMENT_CACHE_TTL', 86400))
+        # Rate limiting
+        self.primary_api_calls = 0
+        self.enrichment_api_calls = 0
+        self.last_primary_call = 0
+        self.last_enrichment_call = 0
+        # In-memory cache for enrichment data
+        self.enrichment_cache = {}
+        self.cache_timestamps = {}
+    def analyze(self, url: str, competitor_domains: List[str] = None, quick_scan: bool = False) -> ModuleResult:
         """
+        Analyze keyword rankings for the given URL and competitors
         Args:
             url: Target website URL
+            competitor_domains: List of competitor domains to analyze
+            quick_scan: If True, limit to 1000 keywords per domain
         Returns:
+            ModuleResult with comprehensive keywords data
         """
+        start_time = time.time()
         try:
+            if not self.rapidapi_key:
+                return ModuleResult(
+                    success=False,
+                    data={},
+                    error="RAPIDAPI_KEY environment variable is required"
+                )
             domain = self._extract_domain(url)
+            competitor_domains = competitor_domains or []
+            # Limit competitors for demo performance
+            if len(competitor_domains) > 3:
+                competitor_domains = competitor_domains[:3]
+            # Fetch main domain data
+            main_domain_data = self._fetch_domain_keywords(domain, quick_scan)
+            if not main_domain_data['success']:
+                return ModuleResult(
+                    success=False,
+                    data={},
+                    error=f"Failed to fetch data for main domain: {main_domain_data['error']}"
+                )
+            # Fetch competitor data
+            competitor_data = {}
+            for comp_domain in competitor_domains:
+                comp_result = self._fetch_domain_keywords(comp_domain, quick_scan)
+                if comp_result['success']:
+                    competitor_data[comp_domain] = comp_result['data']
+            # Process and enrich data
+            result_data = self._process_keywords_data(
+                main_domain_data['data'],
+                competitor_data,
+                domain,
+                competitor_domains
+            )
+            # Add metadata
+            processing_time = time.time() - start_time
+            result_data['meta'] = {
+                'last_updated': datetime.now().isoformat(),
+                'processing_time': round(processing_time, 2),
+                'locale': 'en-US'
+            }
+            return ModuleResult(success=True, data=result_data)
         except Exception as e:
             return ModuleResult(
             url = 'https://' + url
         return urlparse(url).netloc.replace('www.', '')
+    def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
+        """Fetch keywords data for a domain using Competitors Ranking Keywords API"""
         try:
+            all_keywords = []
+            offset = 0
+            max_keywords = 1000 if quick_scan else 5000
+            while len(all_keywords) < max_keywords:
+                # Rate limiting
+                self._rate_limit_primary_api()
+                url = "https://seo-get-competitors-ranking-keywords.p.rapidapi.com/"
+                headers = {
+                    "x-rapidapi-key": self.rapidapi_key,
+                    "x-rapidapi-host": self.primary_api_host
+                }
+                params = {
+                    "domain": domain,
+                    "offset": offset,
+                    "order_by": "position",
+                    "sort_by": "desc",
+                    "limit": min(self.pagination_limit, max_keywords - len(all_keywords))
+                }
+                response = requests.get(url, headers=headers, params=params, timeout=self.timeout)
+                self.primary_api_calls += 1
+                self.last_primary_call = time.time()
+                if response.status_code != 200:
+                    raise Exception(f"API error {response.status_code}: {response.text}")
+                data = response.json()
+                # Extract keywords
+                keywords = data.get('keywords', [])
+                if not keywords:
+                    break
+                all_keywords.extend(keywords)
+                offset += len(keywords)
+                # Check if we have domain statistics (should be in first response)
+                if offset == len(keywords) and 'domain_statistics' in data:
+                    domain_stats = data['domain_statistics']
+                elif 'domain_statistics' not in locals():
+                    domain_stats = self._calculate_domain_statistics(all_keywords)
+                # Break if no more data
+                if len(keywords) < self.pagination_limit:
+                    break
+            return {
+                'success': True,
+                'data': {
+                    'domain': domain,
+                    'statistics': domain_stats,
+                    'keywords': all_keywords[:max_keywords]
+                }
             }
         except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
+        """Calculate domain statistics from keywords data"""
+        total_keywords = len(keywords)
+        # Position distribution
+        pos_1 = sum(1 for k in keywords if k.get('rank', 100) == 1)
+        pos_2_3 = sum(1 for k in keywords if 2 <= k.get('rank', 100) <= 3)
+        pos_4_10 = sum(1 for k in keywords if 4 <= k.get('rank', 100) <= 10)
+        pos_11_20 = sum(1 for k in keywords if 11 <= k.get('rank', 100) <= 20)
+        # Movement tracking
+        new_keywords = sum(1 for k in keywords if k.get('previous_rank') is None)
+        up_keywords = sum(1 for k in keywords if k.get('rank', 100) < k.get('previous_rank', 100))
+        down_keywords = sum(1 for k in keywords if k.get('rank', 100) > k.get('previous_rank', 100))
+        # Traffic estimation
+        estimated_traffic = sum(k.get('estimated_traffic_volume', 0) for k in keywords)
+        return {
+            'organic': {
+                'keywords_in_pos_1': pos_1,
+                'keywords_in_pos_2_3': pos_2_3,
+                'keywords_in_pos_4_10': pos_4_10,
+                'keywords_in_pos_11_20': pos_11_20,
+                'total_keywords_count': total_keywords,
+                'Estimated_traffic_volume': estimated_traffic,
+                'is_new': new_keywords,
+                'is_up': up_keywords,
+                'is_down': down_keywords,
+                'is_lost': 0
             }
+        }
+    def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
+                             domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
+        """Process and structure the keywords data"""
+        stats = main_data['statistics']['organic']
+        keywords = main_data['keywords']
+        # Calculate totals
+        totals = {
+            'keywords': stats['total_keywords_count'],
+            'estimated_traffic': stats['Estimated_traffic_volume']
+        }
+        # Calculate position distribution
+        top3 = stats['keywords_in_pos_1'] + stats['keywords_in_pos_2_3']
+        top10 = top3 + stats['keywords_in_pos_4_10']
+        top50 = top10 + stats['keywords_in_pos_11_20']  # Approximate
+        distribution = {
+            'top3': top3,
+            'top10': top10,
+            'top50': top50,
+            'percentages': {
+                'top3': round(top3 / stats['total_keywords_count'] * 100, 1) if stats['total_keywords_count'] > 0 else 0,
+                'top10': round(top10 / stats['total_keywords_count'] * 100, 1) if stats['total_keywords_count'] > 0 else 0,
+                'top50': round(top50 / stats['total_keywords_count'] * 100, 1) if stats['total_keywords_count'] > 0 else 0
             }
+        }
+        # Movement tracking
+        movement = {
+            'new': stats['is_new'],
+            'up': stats['is_up'],
+            'down': stats['is_down'],
+            'lost': stats['is_lost']
+        }
+        # Identify best keywords
+        best_keywords = self._identify_best_keywords(keywords)
+        # Identify declining keywords
+        declining_keywords = self._identify_declining_keywords(keywords)
+        # Competitor gap analysis
+        opportunities, competitor_summary = self._analyze_competitor_gaps(
+            keywords, competitor_data, domain, competitor_domains
+        )
+        # Enrich keywords with volume/CPC data
+        enriched_keywords = self._enrich_keywords_data(keywords)
+        # Data sources tracking
+        data_sources = {
+            'positions': 'Competitors Ranking Keywords API',
+            'volume': 'Google Keyword Insight API',
+            'enrichment_rate': self._calculate_enrichment_rate(enriched_keywords)
+        }
+        return {
+            'totals': totals,
+            'distribution': distribution,
+            'movement': movement,
+            'best_keywords': best_keywords,
+            'declining_keywords': declining_keywords,
+            'opportunities': opportunities,
+            'competitor_summary': competitor_summary,
+            'data_sources': data_sources
+        }
+    def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
+        """Identify best performing keywords"""
+        best_candidates = [
+            k for k in keywords
+            if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
+        ]
+        # Sort by estimated traffic volume
+        best_candidates.sort(key=lambda x: x.get('estimated_traffic_volume', 0), reverse=True)
+        return [
+            {
+                'keyword': k.get('keyword', ''),
+                'rank': k.get('rank', 0),
+                'url': k.get('url', ''),
+                'volume': k.get('avg_search_volume', 0),
+                'estimated_traffic': k.get('estimated_traffic_volume', 0),
+                'trend': self._determine_trend(k)
+            }
+            for k in best_candidates[:15]
+        ]
+    def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
+        """Identify keywords with declining performance"""
+        declining_candidates = []
+        for k in keywords:
+            current_rank = k.get('rank', 100)
+            previous_rank = k.get('previous_rank', 100)
+            if current_rank > previous_rank and (current_rank - previous_rank) >= 5:
+                declining_candidates.append({
+                    'keyword': k.get('keyword', ''),
+                    'rank': current_rank,
+                    'previous_rank': previous_rank,
+                    'rank_delta': current_rank - previous_rank,
+                    'volume': k.get('avg_search_volume', 0)
                 })
+        # Sort by rank delta (biggest drops first)
+        declining_candidates.sort(key=lambda x: x['rank_delta'], reverse=True)
+        return declining_candidates[:15]
+    def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
+                               domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
+        """Analyze competitor gaps and opportunities"""
+        opportunities = []
+        competitor_summary = []
+        # Normalize main domain keywords
+        main_keyword_set = {k.get('keyword', '').lower().strip() for k in main_keywords}
+        for comp_domain, comp_data in competitor_data.items():
+            comp_keywords = comp_data.get('keywords', [])
+            comp_stats = comp_data.get('statistics', {}).get('organic', {})
+            # Find gaps
+            gaps = []
+            for k in comp_keywords:
+                keyword = k.get('keyword', '').lower().strip()
+                comp_rank = k.get('rank', 100)
+                # Keyword where competitor ranks well but main domain doesn't
+                if keyword not in main_keyword_set and comp_rank <= 20:
+                    gaps.append({
+                        'keyword': k.get('keyword', ''),
+                        'competitor_rank': comp_rank,
+                        'competitor_domain': comp_domain,
+                        'volume': k.get('avg_search_volume', 0),
+                        'difficulty': self._estimate_difficulty(comp_rank, k.get('avg_search_volume', 0))
+                    })
+            # Calculate opportunity scores
+            for gap in gaps:
+                score = self._calculate_opportunity_score(
+                    gap['competitor_rank'],
+                    gap['volume'],
+                    gap['difficulty']
+                )
+                gap['priority_score'] = score
+            # Sort by priority score
+            gaps.sort(key=lambda x: x['priority_score'], reverse=True)
+            opportunities.extend(gaps[:20])  # Top 20 per competitor
+            # Competitor summary
+            overlapping = len([k for k in comp_keywords if k.get('keyword', '').lower().strip() in main_keyword_set])
+            competitor_summary.append({
+                'domain': comp_domain,
+                'total_keywords': comp_stats.get('total_keywords_count', 0),
+                'overlapping_keywords': overlapping,
+                'gaps_identified': len(gaps)
+            })
+        # Sort all opportunities by priority score
+        opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
+        return opportunities[:50], competitor_summary  # Top 50 overall
+    def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
+        """Calculate opportunity score using the PRD algorithm"""
+        position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
+        # Find closest CTR value
+        ctr_value = 1
+        for pos, ctr in position_ctr.items():
+            if competitor_rank <= pos:
+                ctr_value = ctr
+                break
+        traffic_potential = ctr_value * search_volume / 100
+        competition_factor = max(competitor_rank, 1)
+        difficulty_factor = max(difficulty, 10) / 100
+        score = traffic_potential / (competition_factor * difficulty_factor)
+        return min(round(score, 1), 100)
+    def _estimate_difficulty(self, rank: int, volume: int) -> int:
+        """Estimate keyword difficulty based on rank and volume"""
+        # Simple heuristic - in practice, this would come from a keyword difficulty API
+        if rank <= 3:
+            return 20 + (volume // 1000) * 5
+        elif rank <= 10:
+            return 35 + (volume // 1000) * 3
+        else:
+            return 50 + (volume // 1000) * 2
+    def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
+        """Enrich keywords with volume and CPC data"""
+        # Identify keywords needing enrichment
+        keywords_to_enrich = [
+            k for k in keywords
+            if not k.get('avg_search_volume') or k.get('avg_search_volume', 0) == 0
+        ]
+        if not keywords_to_enrich:
+            return keywords
+        # Batch enrichment
+        enriched_data = self._batch_enrich_keywords(
+            [k.get('keyword', '') for k in keywords_to_enrich]
+        )
+        # Merge enriched data back
+        enriched_keywords = keywords.copy()
+        for i, keyword_data in enumerate(keywords_to_enrich):
+            keyword = keyword_data.get('keyword', '')
+            if keyword in enriched_data:
+                # Find the keyword in the original list and update it
+                for j, k in enumerate(enriched_keywords):
+                    if k.get('keyword', '') == keyword:
+                        enriched_keywords[j].update(enriched_data[keyword])
+                        break
+        return enriched_keywords
+    def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
+        """Batch enrich keywords using Google Keyword Insight API"""
+        enriched_data = {}
+        # Process in batches
+        for i in range(0, len(keywords), self.enrichment_batch_size):
+            batch = keywords[i:i + self.enrichment_batch_size]
+            # Check cache first
+            uncached_keywords = []
+            for keyword in batch:
+                cache_key = self._get_cache_key(keyword)
+                if cache_key in self.enrichment_cache:
+                    cache_age = time.time() - self.cache_timestamps.get(cache_key, 0)
+                    if cache_age < self.enrichment_cache_ttl:
+                        enriched_data[keyword] = self.enrichment_cache[cache_key]
+                    else:
+                        uncached_keywords.append(keyword)
+                else:
+                    uncached_keywords.append(keyword)
+            if not uncached_keywords:
+                continue
+            # Enrich uncached keywords
+            try:
+                self._rate_limit_enrichment_api()
+                url = "https://google-keyword-insight1.p.rapidapi.com/globalkey/"
+                headers = {
+                    "x-rapidapi-key": self.rapidapi_key,
+                    "x-rapidapi-host": self.enrichment_api_host
+                }
+                for keyword in uncached_keywords:
+                    params = {
+                        "keyword": keyword,
+                        "lang": "en"
+                    }
+                    response = requests.get(url, headers=headers, params=params, timeout=self.timeout)
+                    self.enrichment_api_calls += 1
+                    self.last_enrichment_call = time.time()
+                    if response.status_code == 200:
+                        data = response.json()
+                        if data and isinstance(data, list) and len(data) > 0:
+                            insight = data[0]
+                            enriched_info = {
+                                'avg_search_volume': insight.get('volume', 0),
+                                'cpc_low': insight.get('low_bid', 0),
+                                'cpc_high': insight.get('high_bid', 0),
+                                'competition_level': insight.get('competition_level', 'UNKNOWN'),
+                                'trend': insight.get('trend', 0)
+                            }
+                            enriched_data[keyword] = enriched_info
+                            # Cache the result
+                            cache_key = self._get_cache_key(keyword)
+                            self.enrichment_cache[cache_key] = enriched_info
+                            self.cache_timestamps[cache_key] = time.time()
+                    # Small delay to respect rate limits
+                    time.sleep(0.1)
+            except Exception as e:
+                # Continue processing even if enrichment fails
+                print(f"Enrichment error: {e}")
+                continue
+        return enriched_data
+    def _get_cache_key(self, keyword: str) -> str:
+        """Generate cache key for keyword"""
+        return hashlib.md5(keyword.lower().encode()).hexdigest()
+    def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
+        """Calculate the percentage of keywords with volume data"""
+        enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
+        total = len(keywords)
+        return round(enriched / total * 100, 1) if total > 0 else 0
+    def _determine_trend(self, keyword_data: Dict) -> str:
+        """Determine keyword trend based on rank changes"""
+        current_rank = keyword_data.get('rank', 100)
+        previous_rank = keyword_data.get('previous_rank', 100)
+        if previous_rank is None:
+            return 'new'
+        elif current_rank < previous_rank:
+            return 'up'
+        elif current_rank > previous_rank:
+            return 'down'
+        else:
+            return 'stable'
+    def _rate_limit_primary_api(self):
+        """Rate limiting for primary API (60 requests/minute)"""
+        current_time = time.time()
+        if current_time - self.last_primary_call < 1:  # 1 second between calls
+            time.sleep(1)
+    def _rate_limit_enrichment_api(self):
+        """Rate limiting for enrichment API (100 requests/minute)"""
+        current_time = time.time()
+        if current_time - self.last_enrichment_call < 0.6:  # 0.6 seconds between calls
+            time.sleep(0.6)