Spaces:

MBilal-72
/

GenerativeEngineOptimization

Runtime error

App Files Files Community

MBilal-72 commited on Jul 27, 2025

Commit

2e6803f

verified ·

1 Parent(s): 0bb1c44

update with rag utils/optimizer.py

Browse files

Files changed (1) hide show

utils/optimizer.py +503 -458

utils/optimizer.py CHANGED Viewed

@@ -1,503 +1,557 @@
 """
-Content Optimization Module
-Enhances content for better AI/LLM performance and GEO scores
 """
 import json
 import re
 from typing import Dict, Any, List, Optional
 from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
 class ContentOptimizer:
-    """Main class for optimizing content for AI search engines"""
-    def __init__(self, llm):
         self.llm = llm
         self.setup_prompts()
-    def setup_prompts(self):
-        """Initialize optimization prompts"""
-        # Main content enhancement prompt
-        self.enhancement_prompt = (
-            "You are an AI Content Enhancement Specialist. Your purpose is to optimize user-provided text to maximize its effectiveness for large language models (LLMs) in search, question-answering, and conversational AI systems.\n\n"
-            "Evaluate the input text based on the following criteria, assigning a score from 1-10 for each:\n"
-            "- Clarity: How easily can the content be understood?\n"
-            "- Structuredness: How well-organized and coherent is the content?\n"
-            "- LLM Answerability: How easily can an LLM extract precise answers from the content?\n\n"
-            "Identify the most salient keywords.\n\n"
-            "Rewrite the text to improve:\n"
-            "- Clarity and precision\n"
-            "- Logical structure and flow\n"
-            "- Suitability for LLM-based information retrieval\n\n"
-            "Present your analysis and optimized text in the following JSON format:\n"
-            "```json\n"
-            "{{\n"
-            "  \"scores\": {{\n"
-            "    \"clarity\": 8.5,\n"
-            "    \"structuredness\": 7.0,\n"
-            "    \"answerability\": 9.0\n"
-            "  }},\n"
-            "  \"keywords\": [\"example\", \"installation\", \"setup\"],\n,"
-            "  \"optimized_text\": \"...\"\n,"
-            "}}\n"
-            "```"
-        )
-        # SEO-style optimization prompt
-        self.seo_style_prompt = (
-            "You are an AI-first SEO specialist. Optimize this content for AI search engines and LLM systems. "
-            "Focus on:\n"
-            "1. Semantic keyword optimization\n"
-            "2. Question-answer format enhancement\n"
-            "3. Factual accuracy and authority signals\n"
-            "4. Conversational readiness\n"
-            "5. Citation-worthy structure\n"
-            "Provide analysis and optimization in JSON:\n"
-            "```json\n"
-            "{{\n"
-            "  \"seo_analysis\": {{\n"
-            "    \"keyword_density\": \"analysis of current keywords\",\n"
-            "    \"semantic_gaps\": [\"missing semantic terms\"],\n"
-            "    \"readability_score\": 8.5,\n"
-            "    \"authority_signals\": [\"credentials\", \"citations\"]\n"
-            "  }},\n"
-            "  \"optimized_content\": {{\n"
-            "    \"title_suggestions\": [\"optimized title 1\", \"optimized title 2\"],\n"
-            "    \"meta_description\": \"AI-optimized meta description\",\n"
-            "    \"enhanced_content\": \"full optimized content...\",\n"
-            "    \"structured_data_suggestions\": [\"schema markup recommendations\"]\n"
-            "  }},\n"
-            "  \"improvement_summary\": {{\n"
-            "    \"changes_made\": [\"change 1\", \"change 2\"],\n"
-            "    \"expected_impact\": \"description of expected improvements\"\n"
-            "  }}\n"
-            "}}\n"
-            "```"
-        )
-        # Competitive content analysis prompt
-        # self.competitive_analysis_prompt = ("Analyze the following content for AI search optimization gaps in entities, questions, clarity, flow, and semantic links. Return JSON with gaps and actionable recommendations.\nContent: {content}")
-        self.competitive_analysis_prompt = (
-            "Analyze the following content for AI search optimization gaps in entities, questions, clarity, flow, and semantic links. "
-            "Return JSON with gaps and actionable recommendations.\n"
-            "Content: {content}\n"
-            "Provide competitive analysis in JSON format:\n"
-            "{{\n"
-            "  \"competitive_analysis\": {{\n"
-            "    \"entity_gaps\": [\"gap1\", \"gap2\"],\n"
-            "    \"question_coverage\": \"summary of coverage\",\n"
-            "    \"factual_clarity\": \"assessment\",\n"
-            "    \"conversational_flow\": \"assessment\",\n"
-            "    \"semantic_relationships\": [\"relationship1\", \"relationship2\"]\n"
-            "  }},\n"
-            "  \"recommendations\": [\"recommendation 1\", \"recommendation 2\"]\n"
-            "}}\n"
-        )
-        self.voice_prompt = (
             """
-                Optimize this content for voice search and conversational AI systems.
-                Focus on:
-                1. Natural language patterns
-                2. Question-based structure
-                3. Conversational tone
-                4. Clear, direct answers
-                5. Featured snippet optimization
-                Original content: {content}
-                Provide optimization in JSON:
-                ```json
-                    {{
-                    "voice_optimized_content": "conversational version...",
-                    "question_answer_pairs": [
-                        {{"question": "What is...", "answer": "Direct answer..."}},
-                        {{"question": "How does...", "answer": "Step by step..."}}
-                    ],
-                    "featured_snippet_candidates": ["snippet 1", "snippet 2"],
-                    "natural_language_improvements": ["improvement 1", "improvement 2"],
-                    "conversational_score": 8.5
-                    }}
-                ```
             """
-        )
-        # Dedicated prompt for rewriting/optimizing content
-        self.optimization_rewrite_prompt = (
-            "You are an expert AI content optimizer. Rewrite the provided text to maximize clarity, logical structure, and suitability for LLM-based search and conversational AI. "
-            "Your rewritten version should be more precise, well-organized, and easier for AI systems to extract answers from. "
-            "Return your output in the following JSON format:\n"
-            "```json\n"
-            "{{\n"
-            "  \"optimized_text\": \"...your rewritten content here...\"\n"
-            "}}\n"
-            "```"
-        )
-    def optimize_content(self, content: str, analyze_only: bool = False,
-                        include_keywords: bool = True, optimization_type: str = "seo") -> Dict[str, Any]:
         """
-            Main content optimization function
-            Args:
-                content (str): Content to optimize
-                analyze_only (bool): If True, only analyze without rewriting
-                include_keywords (bool): Whether to include keyword analysis
-                optimization_type (str): Type of optimization ("standard", "seo", "competitive")
-            Returns:
-                Dict: Optimization results with scores and enhanced content
         """
         try:
             # Choose optimization approach
-            if optimization_type == "seo" and not analyze_only:
-                return self._seo_style_optimization(content, analyze_only)
-            elif optimization_type == "competitive" and not analyze_only:
-                return self._competitive_optimization(content)
             else:
-                return self._standard_optimization(content, analyze_only, include_keywords)
         except Exception as e:
-            return {'error': f"Optimization failed: {str(e)}"}
-    def _standard_optimization(self, content: str, analyze_only: bool, include_keywords: bool) -> Dict[str, Any]:
-        """Standard content optimization using enhancement prompt"""
-        try:
-            # Always assign prompt_text
-            if analyze_only is True:
-                prompt_text = self.enhancement_prompt
-                prompt_text = prompt_text.replace(
-                    "Rewrite the text to improve:",
-                    "Analyze the text for potential improvements in:"
-                ).replace(
-                    '"optimized_text": "..."',
-                    '"optimization_suggestions": ["suggestion 1", "suggestion 2"]'
-                )
-                if not include_keywords:
-                    prompt_text = prompt_text.replace(
-                        '"keywords": ["example", "installation", "setup"],',
-                        ''
-                    )
-            else:
-                # Use dedicated rewrite prompt for optimization
-                prompt_text = self.optimization_rewrite_prompt
-            prompt_template = ChatPromptTemplate.from_messages([
-                SystemMessagePromptTemplate.from_template(prompt_text),
-                HumanMessagePromptTemplate.from_template(content[:6000])
-            ])
-            chain = prompt_template | self.llm
-            result = chain.invoke({})
-            result_content = result.content if hasattr(result, 'content') else str(result)
-            parsed_result = self._parse_optimization_result(result_content)
-            parsed_result.update({
-                'optimization_type': 'standard',
-                'analyze_only': analyze_only,
-                'original_length': len(content),
-                'original_word_count': len(content.split())
-            })
-            return parsed_result
-        except Exception as e:
-            return {'error': f"Standard optimization failed: {str(e)}"}
-    def _seo_style_optimization(self, content: str, analyze_only: bool) -> Dict[str, Any]:
-        """SEO-focused optimization for AI search engines"""
         try:
             prompt_template = ChatPromptTemplate.from_messages([
-                 SystemMessagePromptTemplate.from_template(self.seo_style_prompt),
-                 HumanMessagePromptTemplate.from_template(f"Optimize this content for AI search engines:\n\n{content[:6000]}")
             ])
             chain = prompt_template | self.llm
-            result = chain.invoke({})
             result_content = result.content if hasattr(result, 'content') else str(result)
             parsed_result = self._parse_optimization_result(result_content)
-            # Add SEO-specific metadata
             parsed_result.update({
-                'optimization_type': 'seo',
                 'analyze_only': analyze_only,
-                'seo_focused': True
             })
             return parsed_result
         except Exception as e:
-            return {'error': f"SEO optimization failed: {str(e)}"}
-    def _competitive_optimization(self, content: str) -> Dict[str, Any]:
-        """Competitive analysis-based optimization"""
         try:
-            formatted_prompt = self.competitive_analysis_prompt.format(content=content[:5000])
             prompt_template = ChatPromptTemplate.from_messages([
-                 SystemMessagePromptTemplate.from_template(formatted_prompt),
-                 HumanMessagePromptTemplate.from_template("Perform the competitive analysis and provide optimization recommendations.")
             ])
-                # ("system", formatted_prompt),
-                # ("user", "Perform the competitive analysis and provide optimization recommendations.")
             chain = prompt_template | self.llm
-            result = chain.invoke({})
             result_content = result.content if hasattr(result, 'content') else str(result)
             parsed_result = self._parse_optimization_result(result_content)
             parsed_result.update({
-                'optimization_type': 'competitive',
                 'competitive_analysis': True
             })
             return parsed_result
         except Exception as e:
-            return {'error': f"Competitive optimization failed: {str(e)}"}
-    # def batch_optimize_content(self, content_list: List[str], optimization_type: str = "standard") -> List[Dict[str, Any]]:
-    #     """
-    #     Optimize multiple pieces of content in batch
-    #     Args:
-    #         content_list (List[str]): List of content pieces to optimize
-    #         optimization_type (str): Type of optimization to apply
-    #     Returns:
-    #         List[Dict]: List of optimization results
-    #     """
-    #     results = []
-    #     for i, content in enumerate(content_list):
-    #         try:
-    #             result = self.optimize_content(
-    #                 content,
-    #                 optimization_type=optimization_type
-    #             )
-    #             result['batch_index'] = i
-    #             results.append(result)
-    #         except Exception as e:
-    #             results.append({
-    #                 'batch_index': i,
-    #                 'error': f"Batch optimization failed: {str(e)}"
-    #             })
-    #     return results
-    # def generate_content_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
-    #     """
-    #     Generate multiple optimized variations of the same content
-    #     Args:
-    #         content (str): Original content
-    #         num_variations (int): Number of variations to generate
-    #     Returns:
-    #         List[Dict]: List of content variations with analysis
-    #     """
-    #     variations = []
-    #     variation_prompts = [
-    #         "Create a more conversational version optimized for AI chat responses",
-    #         "Create a more authoritative version optimized for citations",
-    #         "Create a more structured version optimized for question-answering"
-    #     ]
-    #     for i in range(min(num_variations, len(variation_prompts))):
-    #         try:
-    #             custom_prompt = f"""You are optimizing content for AI systems. {variation_prompts[i]}.
-    #                                 Original content: {content[:4000]}
-    #                                 Provide the optimized variation in JSON format:
-    #                                 ```json
-    #                                 {{
-    #                                 "variation_type": "conversational/authoritative/structured",
-    #                                 "optimized_content": "the rewritten content...",
-    #                                 "key_changes": ["change 1", "change 2"],
-    #                                 "target_use_case": "description of ideal use case"
-    #                                 }}
-    #                                 ```
-    #                             """
-    #             prompt_template = ChatPromptTemplate.from_messages([
-    #                 SystemMessagePromptTemplate.from_template(custom_prompt),
-    #                 HumanMessagePromptTemplate.from_template("Generate the variation.")
-    #             ])
-    #                 # ("system", custom_prompt),
-    #                 # ("user", "Generate the variation.")
-    #             chain = prompt_template | self.llm
-    #             result = chain.invoke({})
-    #             result_content = result.content if hasattr(result, 'content') else str(result)
-    #             parsed_result = self._parse_optimization_result(result_content)
-    #             parsed_result.update({
-    #                 'variation_index': i,
-    #                 'variation_prompt': variation_prompts[i]
-    #             })
-    #             variations.append(parsed_result)
-    #         except Exception as e:
-    #             variations.append({
-    #                 'variation_index': i,
-    #                 'error': f"Variation generation failed: {str(e)}"
-    #             })
-    #     return variations
-    def analyze_content_readability(self, content: str) -> Dict[str, Any]:
         """
-        Analyze content readability for AI systems
         Args:
-            content (str): Content to analyze
         Returns:
-            Dict: Readability analysis results
         """
         try:
-            # Basic readability metrics
             words = content.split()
             sentences = re.split(r'[.!?]+', content)
             sentences = [s.strip() for s in sentences if s.strip()]
             paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
-            # Calculate metrics
-            avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
-            avg_sentences_per_paragraph = len(sentences) / len(paragraphs) if paragraphs else 0
-            # Character-based metrics
-            avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
-            # Complexity indicators
-            long_sentences = [s for s in sentences if len(s.split()) > 20]
-            complex_words = [w for w in words if len(w) > 6]
             return {
-                'basic_metrics': {
                     'total_words': len(words),
                     'total_sentences': len(sentences),
                     'total_paragraphs': len(paragraphs),
-                    'avg_words_per_sentence': avg_words_per_sentence,
-                    'avg_sentences_per_paragraph': avg_sentences_per_paragraph,
-                    'avg_word_length': avg_word_length
                 },
-                'complexity_indicators': {
-                    'long_sentences_count': len(long_sentences),
-                    'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
-                    'complex_words_count': len(complex_words),
-                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
                 },
-                'ai_readability_score': self._calculate_ai_readability_score({
-                    'avg_words_per_sentence': avg_words_per_sentence,
-                    'avg_word_length': avg_word_length,
-                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
-                }),
-                'recommendations': self._generate_readability_recommendations({
-                    'avg_words_per_sentence': avg_words_per_sentence,
-                    'long_sentences_percentage': len(long_sentences) / len(sentences) * 100 if sentences else 0,
-                    'complex_words_percentage': len(complex_words) / len(words) * 100 if words else 0
                 })
             }
         except Exception as e:
-            return {'error': f"Readability analysis failed: {str(e)}"}
-    # def extract_key_entities(self, content: str) -> Dict[str, Any]:
-    #     """
-    #     Extract key entities and topics for optimization
-    #     Args:
-    #         content (str): Content to analyze
-    #     Returns:
-    #         Dict: Extracted entities and topics
-    #     """
-    #     try:
-    #         entity_prompt = """Extract key entities, topics, and concepts from this content for AI optimization.
-    #                             Content: {content}
-    #                             Identify:
-    #                             1. Named entities (people, places, organizations)
-    #                             2. Key concepts and topics
-    #                             3. Technical terms and jargon
-    #                             4. Potential semantic keywords
-    #                             5. Question-answer opportunities
-    #                             Format as JSON:
-    #                             ```json
-    #                             {{
-    #                             "named_entities": ["entity1", "entity2"],
-    #                             "key_topics": ["topic1", "topic2"],
-    #                             "technical_terms": ["term1", "term2"],
-    #                             "semantic_keywords": ["keyword1", "keyword2"],
-    #                             "question_opportunities": ["What is...", "How does..."],
-    #                             "entity_relationships": ["relationship descriptions"]
-    #                             }}
-    #                             ```
-    #                         """
-    #         prompt_template = ChatPromptTemplate.from_messages([
-    #             SystemMessagePromptTemplate.from_template(entity_prompt.format(content=content[:5000])),
-    #             HumanMessagePromptTemplate.from_template("Extract the entities and topics.")
-    #         ])
-    #             # ("system", entity_prompt.format(content=content[:5000])),
-    #             # ("user", "Extract the entities and topics.")
-    #         chain = prompt_template | self.llm
-    #         result = chain.invoke({})
-    #         result_content = result.content if hasattr(result, 'content') else str(result)
-    #         return self._parse_optimization_result(result_content)
-    #     except Exception as e:
-    #         return {'error': f"Entity extraction failed: {str(e)}"}
-    def optimize_for_voice_search(self, content: str) -> Dict[str, Any]:
         """
-        Optimize content specifically for voice search and conversational AI
-        Args:
-            content (str): Content to optimize
-        Returns:
-            Dict: Voice search optimization results
-        """
         try:
-            # self.voice_prompt = ("Optimize the following content for voice search and conversational AI by improving natural language flow, question-based structure, tone, and featured snippet potential. Return JSON with improved content, Q&A pairs, snippet candidates, and a conversational score.\nContent: {content}")
-            prompt_template = ChatPromptTemplate.from_messages([
-                SystemMessagePromptTemplate.from_template(voice_prompt.format(content=content[:4000])),
-                HumanMessagePromptTemplate.from_template("Optimize for voice search.")
-            ])
-                # ("system", voice_prompt.format(content=content[:4000])),
-                # ("user", "Optimize for voice search.")
-            chain = prompt_template | self.llm
-            result = chain.invoke({})
-            result_content = result.content if hasattr(result, 'content') else str(result)
-            parsed_result = self._parse_optimization_result(result_content)
-            parsed_result.update({
-                'optimization_type': 'voice_search',
-                'voice_optimized': True
-            })
-            return parsed_result
-        except Exception as e:
-            return {'error': f"Voice search optimization failed: {str(e)}"}
     def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
         """Parse LLM response and extract structured results"""
         try:
@@ -508,73 +562,64 @@ class ContentOptimizer:
             if json_start != -1 and json_end != -1:
                 json_str = response_text[json_start:json_end]
                 parsed = json.loads(json_str)
-                # Ensure consistent structure
-                if 'scores' not in parsed and 'score' in parsed:
-                    parsed['scores'] = parsed['score']
                 return parsed
             else:
-                # If no JSON found, return raw response with error flag
                 return {
                     'raw_response': response_text,
                     'parsing_error': 'No JSON structure found in response',
-                    'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
                 }
         except json.JSONDecodeError as e:
             return {
                 'raw_response': response_text,
                 'parsing_error': f'JSON decode error: {str(e)}',
-                'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
             }
         except Exception as e:
             return {
                 'raw_response': response_text,
                 'parsing_error': f'Unexpected parsing error: {str(e)}',
-                'scores': {'clarity': 0, 'structuredness': 0, 'answerability': 0}
             }
-    def _calculate_ai_readability_score(self, metrics: Dict[str, float]) -> float:
-        """Calculate AI-specific readability score"""
-        try:
-            # Optimal ranges for AI consumption
-            optimal_words_per_sentence = 15  # Sweet spot for AI processing
-            optimal_word_length = 5  # Balance of complexity and clarity
-            optimal_complex_words_percentage = 15  # Some complexity is good for authority
-            # Calculate deviations from optimal
-            sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - optimal_words_per_sentence) * 0.5)
-            word_length_score = max(0, 10 - abs(metrics['avg_word_length'] - optimal_word_length) * 2)
-            complexity_score = max(0, 10 - abs(metrics['complex_words_percentage'] - optimal_complex_words_percentage) * 0.3)
-            # Weighted average
-            overall_score = (sentence_score * 0.4 + word_length_score * 0.3 + complexity_score * 0.3)
-            return round(overall_score, 1)
-        except Exception:
-            return 5.0  # Default neutral score
-    def _generate_readability_recommendations(self, metrics: Dict[str, float]) -> List[str]:
-        """Generate specific readability improvement recommendations"""
-        recommendations = []
-        try:
-            if metrics['avg_words_per_sentence'] > 20:
-                recommendations.append("Break down long sentences for better AI processing")
-            elif metrics['avg_words_per_sentence'] < 8:
-                recommendations.append("Consider combining very short sentences for better context")
-            if metrics['long_sentences_percentage'] > 30:
-                recommendations.append("Reduce the number of complex sentences (>20 words)")
-            if metrics['complex_words_percentage'] > 25:
-                recommendations.append("Simplify vocabulary where possible for broader accessibility")
-            elif metrics['complex_words_percentage'] < 5:
-                recommendations.append("Add more specific terminology to establish authority")
-            return recommendations
-        except Exception:
-            return ["Unable to generate specific recommendations"]

 """
+Enhanced Content Optimization Module with RAG for GEO
+Integrates RAG functionality for better Generative Engine Optimization
 """
 import json
 import re
 from typing import Dict, Any, List, Optional
 from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
+from langchain.schema import Document
 class ContentOptimizer:
+    """Enhanced Content Optimizer with RAG capabilities for GEO"""
+    def __init__(self, llm, vector_chunker=None):
         self.llm = llm
+        self.vector_chunker = vector_chunker
         self.setup_prompts()
+        self.setup_geo_knowledge_base()
+    def setup_geo_knowledge_base(self):
+        """Initialize GEO best practices knowledge base"""
+        self.geo_knowledge = [
+            """
+            Generative Engine Optimization (GEO) Best Practices:
+            1. Structure for AI Consumption:
+            - Use clear headings and subheadings
+            - Include bullet points and numbered lists
+            - Provide direct, concise answers to common questions
+            - Use schema markup when possible
+            2. Content Format for LLMs:
+            - Answer questions directly in the first sentence
+            - Use "what, why, how" question patterns
+            - Include relevant entities and proper nouns
+            - Maintain factual accuracy with citations
+            3. Semantic Optimization:
+            - Include related terms and synonyms
+            - Use entity-rich content (people, places, organizations)
+            - Connect concepts with clear relationships
+            - Optimize for topic clusters, not just keywords
+            """,
+            """
+            AI Search Visibility Optimization:
+            1. Query Intent Matching:
+            - Address user intent explicitly
+            - Use natural language patterns
+            - Include question-answer pairs
+            - Optimize for conversational queries
+            2. Citation Worthiness:
+            - Include authoritative sources and data
+            - Use specific facts and statistics
+            - Provide expert opinions and insights
+            - Maintain consistent tone and expertise
+            3. Multi-Query Coverage:
+            - Address related questions in the same content
+            - Use comprehensive topic coverage
+            - Include long-tail and specific queries
+            - Provide context for complex topics
+            """,
             """
+            Content Structure for AI Systems:
+            1. Information Architecture:
+            - Lead with key information
+            - Use inverted pyramid structure
+            - Include table of contents for long content
+            - Break complex topics into digestible sections
+            2. Conversational Readiness:
+            - Write in active voice
+            - Use clear, direct language
+            - Include transitional phrases
+            - Optimize sentence length (12-20 words)
+            3. Context Completeness:
+            - Define technical terms
+            - Provide background information
+            - Include relevant examples
+            - Connect to broader topic context
             """
+        ]
+    def setup_prompts(self):
+        """Initialize optimization prompts with RAG integration"""
+        self.rag_enhancement_prompt = """
+        You are a Generative Engine Optimization (GEO) specialist with access to best practices knowledge.
+        Based on the provided GEO knowledge and the user's content, optimize the content for:
+        1. AI search engines (ChatGPT, Claude, Gemini)
+        2. LLM-based question answering systems
+        3. Conversational AI interfaces
+        4. Citation and reference systems
+        Use the knowledge base to inform your optimization decisions.
+        Knowledge Base Context:
+        {context}
+        Original Content:
+        {content}
+        Provide comprehensive GEO optimization in JSON format:
+        ```json
+        {{
+          "geo_analysis": {{
+            "current_geo_score": 7.5,
+            "ai_search_visibility": 8.0,
+            "query_intent_matching": 7.0,
+            "conversational_readiness": 8.5,
+            "citation_worthiness": 6.5,
+            "context_completeness": 7.5
+          }},
+          "optimization_opportunities": [
+            {{
+              "type": "Structure Enhancement",
+              "description": "Add clear headings and Q&A format",
+              "priority": "high",
+              "expected_impact": "Improve AI parsing by 25%"
+            }}
+          ],
+          "optimized_content": {{
+            "enhanced_text": "Your optimized content here...",
+            "structural_improvements": ["Added FAQ section", "Improved headings"],
+            "semantic_enhancements": ["Added related terms", "Improved entity density"]
+          }},
+          "geo_keywords": {{
+            "primary_entities": ["entity1", "entity2"],
+            "semantic_terms": ["term1", "term2"],
+            "question_patterns": ["What is...", "How does..."],
+            "related_concepts": ["concept1", "concept2"]
+          }},
+          "recommendations": [
+            "Add more specific examples",
+            "Include authoritative citations",
+            "Improve conversational flow"
+          ]
+        }}
+        ```
+        """
+        self.competitive_geo_prompt = """
+        Analyze the content against GEO best practices and identify competitive optimization opportunities.
+        GEO Knowledge Base:
+        {context}
+        Content to Analyze:
+        {content}
+        Provide competitive GEO analysis:
+        ```json
+        {{
+          "competitive_gaps": {{
+            "missing_question_patterns": ["What questions aren't covered"],
+            "entity_gaps": ["Important entities not mentioned"],
+            "semantic_opportunities": ["Related terms to include"],
+            "structural_weaknesses": ["Formatting issues for AI"]
+          }},
+          "benchmark_comparison": {{
+            "current_performance": {{
+              "ai_answerability": 6.5,
+              "semantic_richness": 7.0,
+              "structural_clarity": 8.0
+            }},
+            "optimization_potential": {{
+              "ai_answerability": 9.0,
+              "semantic_richness": 8.5,
+              "structural_clarity": 9.5
+            }}
+          }},
+          "action_plan": [
+            {{
+              "priority": "high",
+              "action": "Add FAQ section",
+              "rationale": "Improves direct question answering"
+            }}
+          ]
+        }}
+        ```
+        """
+    def optimize_content_with_rag(self, content: str, optimization_type: str = "geo_standard",
+                                 analyze_only: bool = False) -> Dict[str, Any]:
         """
+        Main RAG-enhanced content optimization for GEO
+        Args:
+            content (str): Content to optimize
+            optimization_type (str): Type of GEO optimization
+            analyze_only (bool): Whether to only analyze without rewriting
+        Returns:
+            Dict: Comprehensive GEO optimization results
         """
         try:
+            # Create knowledge base documents
+            knowledge_docs = [Document(page_content=knowledge, metadata={"source": "geo_best_practices"})
+                            for knowledge in self.geo_knowledge]
+            if self.vector_chunker:
+                # Use RAG to get relevant knowledge
+                qa_chain = self.vector_chunker.create_qa_chain(knowledge_docs, self.llm)
+                # Query for relevant GEO practices
+                geo_query = f"How to optimize this type of content for AI search engines: {content[:500]}"
+                context_result = qa_chain({"query": geo_query})
+                context = context_result.get("result", "")
+            else:
+                # Fallback to using all knowledge if vector_chunker not available
+                context = "\n\n".join(self.geo_knowledge)
             # Choose optimization approach
+            if optimization_type == "competitive_geo":
+                return self._competitive_geo_optimization(content, context)
             else:
+                return self._standard_geo_optimization(content, context, analyze_only)
         except Exception as e:
+            return {'error': f"RAG-enhanced optimization failed: {str(e)}"}
+    def _standard_geo_optimization(self, content: str, context: str, analyze_only: bool) -> Dict[str, Any]:
+        """Standard GEO optimization with RAG context"""
         try:
             prompt_template = ChatPromptTemplate.from_messages([
+                SystemMessagePromptTemplate.from_template(self.rag_enhancement_prompt),
+                HumanMessagePromptTemplate.from_template("Optimize this content using GEO best practices.")
             ])
             chain = prompt_template | self.llm
+            result = chain.invoke({
+                "context": context,
+                "content": content[:5000]  # Limit content length
+            })
             result_content = result.content if hasattr(result, 'content') else str(result)
             parsed_result = self._parse_optimization_result(result_content)
+            # Add metadata
             parsed_result.update({
+                'optimization_type': 'geo_standard',
+                'rag_enhanced': True,
                 'analyze_only': analyze_only,
+                'original_length': len(content),
+                'knowledge_sources': len(self.geo_knowledge)
             })
             return parsed_result
         except Exception as e:
+            return {'error': f"Standard GEO optimization failed: {str(e)}"}
+    def _competitive_geo_optimization(self, content: str, context: str) -> Dict[str, Any]:
+        """Competitive GEO analysis with RAG context"""
         try:
             prompt_template = ChatPromptTemplate.from_messages([
+                SystemMessagePromptTemplate.from_template(self.competitive_geo_prompt),
+                HumanMessagePromptTemplate.from_template("Perform competitive GEO analysis.")
             ])
             chain = prompt_template | self.llm
+            result = chain.invoke({
+                "context": context,
+                "content": content[:5000]
+            })
             result_content = result.content if hasattr(result, 'content') else str(result)
             parsed_result = self._parse_optimization_result(result_content)
             parsed_result.update({
+                'optimization_type': 'competitive_geo',
+                'rag_enhanced': True,
                 'competitive_analysis': True
             })
             return parsed_result
         except Exception as e:
+            return {'error': f"Competitive GEO optimization failed: {str(e)}"}
+    def batch_optimize_with_rag(self, content_list: List[str], optimization_type: str = "geo_standard") -> List[Dict[str, Any]]:
         """
+        Batch optimize multiple content pieces with RAG
         Args:
+            content_list: List of content to optimize
+            optimization_type: Type of optimization
         Returns:
+            List of optimization results
+        """
+        results = []
+        for i, content in enumerate(content_list):
+            try:
+                result = self.optimize_content_with_rag(
+                    content,
+                    optimization_type=optimization_type
+                )
+                result['batch_index'] = i
+                results.append(result)
+            except Exception as e:
+                results.append({
+                    'batch_index': i,
+                    'error': f"Batch GEO optimization failed: {str(e)}"
+                })
+        return results
+    def analyze_geo_readability(self, content: str) -> Dict[str, Any]:
+        """
+        Analyze content readability specifically for GEO/AI systems
         """
         try:
+            # Basic metrics
             words = content.split()
             sentences = re.split(r'[.!?]+', content)
             sentences = [s.strip() for s in sentences if s.strip()]
             paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
+            # GEO-specific analysis
+            questions = len(re.findall(r'\?', content))
+            headings = len(re.findall(r'^#+\s', content, re.MULTILINE))
+            lists = len(re.findall(r'^\s*[-*+]\s', content, re.MULTILINE))
+            numbers = len(re.findall(r'\b\d+\.?\d*\b', content))
+            # Entity-like patterns (proper nouns)
+            entities = len(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content))
+            # Calculate GEO readability score
+            geo_score = self._calculate_geo_readability_score({
+                'avg_words_per_sentence': len(words) / len(sentences) if sentences else 0,
+                'questions_ratio': questions / len(sentences) if sentences else 0,
+                'structure_elements': headings + lists,
+                'entity_density': entities / len(words) if words else 0,
+                'numeric_data': numbers / len(words) if words else 0
+            })
             return {
+                'geo_readability_metrics': {
                     'total_words': len(words),
                     'total_sentences': len(sentences),
                     'total_paragraphs': len(paragraphs),
+                    'questions_count': questions,
+                    'headings_count': headings,
+                    'lists_count': lists,
+                    'entity_mentions': entities,
+                    'numeric_data_points': numbers
                 },
+                'geo_readability_score': geo_score,
+                'ai_optimization_indicators': {
+                    'question_ratio': questions / len(sentences) if sentences else 0,
+                    'structure_score': min(10, (headings + lists) * 2),
+                    'entity_density': entities / len(words) if words else 0,
+                    'data_richness': numbers / len(words) if words else 0
                 },
+                'geo_recommendations': self._generate_geo_recommendations({
+                    'questions': questions,
+                    'headings': headings,
+                    'lists': lists,
+                    'entities': entities,
+                    'sentences': len(sentences)
                 })
             }
         except Exception as e:
+            return {'error': f"GEO readability analysis failed: {str(e)}"}
+    def extract_geo_entities(self, content: str) -> Dict[str, Any]:
+        """
+        Extract entities and concepts relevant for GEO optimization
+        """
+        try:
+            if not self.vector_chunker:
+                return {'error': 'Vector chunker not available for entity extraction'}
+            # Create knowledge context about entity extraction
+            entity_knowledge = [Document(
+                page_content="""
+                For GEO optimization, important entities include:
+                1. Named entities: People, organizations, locations, brands
+                2. Technical concepts: Industry terms, methodologies, tools
+                3. Topical entities: Core subjects, themes, categories
+                4. Relational entities: Connected concepts, dependencies
+                5. Question entities: What users commonly ask about
+                """,
+                metadata={"source": "entity_extraction_guide"}
+            )]
+            qa_chain = self.vector_chunker.create_qa_chain(entity_knowledge, self.llm)
+            # Extract different types of entities
+            extraction_queries = [
+                "What are the main named entities (people, places, organizations) in this content?",
+                "What are the key technical concepts and terms?",
+                "What questions might users have about this content?",
+                "What related topics and concepts are mentioned?"
+            ]
+            extracted_data = {}
+            for query in extraction_queries:
+                full_query = f"{query}\n\nContent: {content[:3000]}"
+                result = qa_chain({"query": full_query})
+                query_key = query.split('?')[0].lower().replace(' ', '_').replace('what_are_the_', '')
+                extracted_data[query_key] = result.get("result", "")
+            return {
+                'geo_entities': extracted_data,
+                'extraction_method': 'rag_enhanced',
+                'content_length': len(content),
+                'extraction_success': True
+            }
+        except Exception as e:
+            return {'error': f"GEO entity extraction failed: {str(e)}"}
+    def generate_geo_variations(self, content: str, num_variations: int = 3) -> List[Dict[str, Any]]:
         """
+        Generate GEO-optimized content variations using RAG
+        """
+        variations = []
+        variation_types = [
+            ("faq_focused", "Transform into FAQ format optimized for AI Q&A systems"),
+            ("conversational", "Optimize for conversational AI and voice search"),
+            ("authoritative", "Enhance with authoritative tone for citation systems")
+        ]
+        try:
+            # Get GEO context
+            knowledge_docs = [Document(page_content=knowledge, metadata={"source": "geo_practices"})
+                            for knowledge in self.geo_knowledge]
+            if self.vector_chunker:
+                qa_chain = self.vector_chunker.create_qa_chain(knowledge_docs, self.llm)
+                for i, (variation_type, description) in enumerate(variation_types[:num_variations]):
+                    try:
+                        # Get specific guidance for this variation type
+                        context_query = f"How to optimize content for {variation_type} in AI systems?"
+                        context_result = qa_chain({"query": context_query})
+                        context = context_result.get("result", "")
+                        variation_prompt = f"""
+                        Create a {variation_type} version of the content optimized for GEO.
+                        Context: {context}
+                        Original Content: {content[:4000]}
+                        Variation Goal: {description}
+                        Return JSON:
+                        {{
+                          "variation_type": "{variation_type}",
+                          "optimized_content": "the rewritten content...",
+                          "geo_improvements": ["improvement 1", "improvement 2"],
+                          "target_ai_systems": ["ChatGPT", "Claude", "etc"],
+                          "expected_geo_benefits": ["benefit 1", "benefit 2"]
+                        }}
+                        """
+                        prompt_template = ChatPromptTemplate.from_messages([
+                            SystemMessagePromptTemplate.from_template(variation_prompt),
+                            HumanMessagePromptTemplate.from_template("Generate the GEO-optimized variation.")
+                        ])
+                        chain = prompt_template | self.llm
+                        result = chain.invoke({})
+                        result_content = result.content if hasattr(result, 'content') else str(result)
+                        parsed_result = self._parse_optimization_result(result_content)
+                        parsed_result.update({
+                            'variation_index': i,
+                            'rag_enhanced': True,
+                            'geo_optimized': True
+                        })
+                        variations.append(parsed_result)
+                    except Exception as e:
+                        variations.append({
+                            'variation_index': i,
+                            'variation_type': variation_type,
+                            'error': f"GEO variation generation failed: {str(e)}"
+                        })
+            else:
+                return [{'error': 'Vector chunker not available for variation generation'}]
+        except Exception as e:
+            return [{'error': f"GEO variation generation failed: {str(e)}"}]
+        return variations
+    def _calculate_geo_readability_score(self, metrics: Dict[str, float]) -> float:
+        """Calculate GEO-specific readability score"""
         try:
+            # GEO-optimized scoring
+            sentence_score = max(0, 10 - abs(metrics['avg_words_per_sentence'] - 15) * 0.3)
+            question_score = min(10, metrics['questions_ratio'] * 50)  # Reward questions
+            structure_score = min(10, metrics['structure_elements'] * 1.5)  # Reward headings/lists
+            entity_score = min(10, metrics['entity_density'] * 100)  # Reward entities
+            data_score = min(10, metrics['numeric_data'] * 200)  # Reward data points
+            # Weighted for GEO priorities
+            overall_score = (
+                sentence_score * 0.2 +
+                question_score * 0.25 +
+                structure_score * 0.25 +
+                entity_score * 0.15 +
+                data_score * 0.15
+            )
+            return round(overall_score, 1)
+        except Exception:
+            return 5.0
+    def _generate_geo_recommendations(self, metrics: Dict[str, int]) -> List[str]:
+        """Generate GEO-specific recommendations"""
+        recommendations = []
+        try:
+            if metrics['questions'] == 0:
+                recommendations.append("Add FAQ section or question-based headings for better AI Q&A performance")
+            if metrics['headings'] < 2:
+                recommendations.append("Add more structured headings to improve AI content parsing")
+            if metrics['lists'] == 0:
+                recommendations.append("Include bullet points or numbered lists for better information extraction")
+            if metrics['entities'] < 5:
+                recommendations.append("Include more specific entities (names, places, organizations) for authority")
+            if metrics['questions'] / metrics['sentences'] < 0.1:
+                recommendations.append("Consider transforming statements into question-answer pairs")
+            return recommendations
+        except Exception:
+            return ["Unable to generate specific GEO recommendations"]
     def _parse_optimization_result(self, response_text: str) -> Dict[str, Any]:
         """Parse LLM response and extract structured results"""
         try:
             if json_start != -1 and json_end != -1:
                 json_str = response_text[json_start:json_end]
                 parsed = json.loads(json_str)
                 return parsed
             else:
+                # If no JSON found, return structured error
                 return {
                     'raw_response': response_text,
                     'parsing_error': 'No JSON structure found in response',
+                    'geo_analysis': {
+                        'current_geo_score': 0,
+                        'ai_search_visibility': 0,
+                        'query_intent_matching': 0,
+                        'conversational_readiness': 0,
+                        'citation_worthiness': 0,
+                        'context_completeness': 0
+                    }
                 }
         except json.JSONDecodeError as e:
             return {
                 'raw_response': response_text,
                 'parsing_error': f'JSON decode error: {str(e)}',
+                'geo_analysis': {
+                    'current_geo_score': 0,
+                    'ai_search_visibility': 0,
+                    'query_intent_matching': 0,
+                    'conversational_readiness': 0,
+                    'citation_worthiness': 0,
+                    'context_completeness': 0
+                }
             }
         except Exception as e:
             return {
                 'raw_response': response_text,
                 'parsing_error': f'Unexpected parsing error: {str(e)}',
+                'geo_analysis': {
+                    'current_geo_score': 0,
+                    'ai_search_visibility': 0,
+                    'query_intent_matching': 0,
+                    'conversational_readiness': 0,
+                    'citation_worthiness': 0,
+                    'context_completeness': 0
+                }
             }
+    # Legacy methods for backward compatibility
+    def optimize_content(self, content: str, analyze_only: bool = False,
+                        include_keywords: bool = True, optimization_type: str = "standard") -> Dict[str, Any]:
+        """
+        Legacy method - redirects to RAG-enhanced optimization
+        """
+        if optimization_type == "standard":
+            return self.optimize_content_with_rag(content, "geo_standard", analyze_only)
+        elif optimization_type == "seo":
+            return self.optimize_content_with_rag(content, "geo_standard", analyze_only)
+        elif optimization_type == "competitive":
+            return self.optimize_content_with_rag(content, "competitive_geo", analyze_only)
+        else:
+            return self.optimize_content_with_rag(content, "geo_standard", analyze_only)
+    def analyze_content_readability(self, content: str) -> Dict[str, Any]:
+        """Legacy method - redirects to GEO readability analysis"""
+        return self.analyze_geo_readability(content)