Spaces:

Tirath5504
/

MetaSearch

Sleeping

App Files Files Community

Tirath5504 commited on 19 days ago

Commit

08a5a31

1 Parent(s): 2bf686d

use openrouter only instead of google-genai

Browse files

Files changed (9) hide show

.gitignore +3 -0
config.py +4 -6
pipeline/critique_extraction.py +49 -36
pipeline/disagreement_detection.py +39 -27
pipeline/disagreement_resolution.py +2 -2
pipeline/meta_review.py +1 -1
pipeline/search_retrieval.py +126 -74
requirements.txt +1 -3
test_api.py +273 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+.env
+__pycache__/

config.py CHANGED Viewed

@@ -11,10 +11,10 @@ API_DESCRIPTION = """
 ## Automated Consensus Analysis for Peer Reviews
 This API provides comprehensive analysis of peer review disagreements using:
-- **LLM-based critique extraction** (Gemini 2.0)
 - **Disagreement detection** between reviewers
 - **Search-augmented evidence retrieval** (Semantic Scholar, arXiv, Google Scholar, Tavily)
-- **AI-powered disagreement resolution** (DeepSeek-R1)
 - **Meta-review generation**
 ### Features:
@@ -29,12 +29,11 @@ MAX_REQUESTS_PER_MINUTE = int(os.getenv("MAX_REQUESTS_PER_MINUTE", "10"))
 MAX_CONCURRENT_TASKS = int(os.getenv("MAX_CONCURRENT_TASKS", "3"))
 QUEUE_MAX_SIZE = int(os.getenv("QUEUE_MAX_SIZE", "20"))
-# Model Configuration
-GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.0-flash")
 DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek/deepseek-r1")
 # API Keys (from HF Spaces secrets)
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
@@ -58,7 +57,6 @@ def validate_environment():
         ValueError: If required variables are missing
     """
     required_vars = {
-        "GEMINI_API_KEY": GEMINI_API_KEY,
         "OPENROUTER_API_KEY": OPENROUTER_API_KEY,
         "TAVILY_API_KEY": TAVILY_API_KEY,
     }

 ## Automated Consensus Analysis for Peer Reviews
 This API provides comprehensive analysis of peer review disagreements using:
+- **LLM-based critique extraction** (Gemini 2.5 Flash Lite via OpenRouter)
 - **Disagreement detection** between reviewers
 - **Search-augmented evidence retrieval** (Semantic Scholar, arXiv, Google Scholar, Tavily)
+- **AI-powered disagreement resolution** (DeepSeek-R1 via OpenRouter)
 - **Meta-review generation**
 ### Features:
 MAX_CONCURRENT_TASKS = int(os.getenv("MAX_CONCURRENT_TASKS", "3"))
 QUEUE_MAX_SIZE = int(os.getenv("QUEUE_MAX_SIZE", "20"))
+# Model Configuration (all via OpenRouter)
+GEMINI_MODEL = os.getenv("GEMINI_MODEL", "google/gemini-2.5-flash-lite")
 DEEPSEEK_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek/deepseek-r1")
 # API Keys (from HF Spaces secrets)
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
         ValueError: If required variables are missing
     """
     required_vars = {
         "OPENROUTER_API_KEY": OPENROUTER_API_KEY,
         "TAVILY_API_KEY": TAVILY_API_KEY,
     }

pipeline/critique_extraction.py CHANGED Viewed

@@ -1,16 +1,21 @@
 import json
 import os
 from typing import List, Dict
-import google.generativeai as genai
 from pydantic import BaseModel
 import asyncio
-import time
 from dotenv import load_dotenv
 load_dotenv()
-# Configure Gemini
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class CritiquePoint(BaseModel):
     Methodology: List[str] = []
@@ -21,7 +26,7 @@ class CritiquePoint(BaseModel):
 async def extract_single_critique(review_text: str, retries: int = 5) -> Dict:
     """
-    Extract critique points from a single review using Gemini
     Args:
         review_text: The review text to analyze
@@ -30,59 +35,67 @@ async def extract_single_critique(review_text: str, retries: int = 5) -> Dict:
     Returns:
         Dictionary with categorized critique points
     """
-    prompt = f"""
-    Extract key critique points from the following research paper review.
-    Categorize them into aspects: Methodology, Experiments, Clarity, Significance, Novelty.
-    Return a structured JSON with these categories as keys and lists of critique points as values.
-    Review:
-    {review_text}
     Respond with ONLY valid JSON in this format:
-    {{
         "Methodology": ["point1", "point2"],
         "Experiments": ["point1"],
         "Clarity": ["point1", "point2"],
         "Significance": ["point1"],
         "Novelty": ["point1"]
-    }}
     """
-    model = genai.GenerativeModel(
-        model_name="gemini-2.5-flash-lite",
-        generation_config={
-            "response_mime_type": "application/json",
-        }
-    )
     for attempt in range(retries):
         try:
             response = await asyncio.to_thread(
-                model.generate_content,
-                prompt
             )
-            if not response.text.strip():
-                raise ValueError("Empty response from Gemini")
-            result = json.loads(response.text)
             # Validate structure
             critique = CritiquePoint(**result)
             return critique.model_dump()
-        except genai.types.generation_types.BlockedPromptException as e:
-            print(f"Content blocked by safety filters: {e}")
-            return {
-                "Methodology": [],
-                "Experiments": [],
-                "Clarity": [],
-                "Significance": [],
-                "Novelty": [],
-                "error": "Content blocked by safety filters"
-            }
         except Exception as e:
             wait_time = 2 ** attempt
             print(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s...")

 import json
 import os
 from typing import List, Dict
+from openai import OpenAI
 from pydantic import BaseModel
 import asyncio
 from dotenv import load_dotenv
 load_dotenv()
+# Initialize OpenRouter client
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY"),
+)
+# Model to use for critique extraction
+CRITIQUE_MODEL = "google/gemini-2.5-flash-lite"
 class CritiquePoint(BaseModel):
     Methodology: List[str] = []
 async def extract_single_critique(review_text: str, retries: int = 5) -> Dict:
     """
+    Extract critique points from a single review using OpenRouter (Gemini)
     Args:
         review_text: The review text to analyze
     Returns:
         Dictionary with categorized critique points
     """
+    system_prompt = """
+    You are an expert at analyzing academic peer reviews.
+    Extract key critique points from the review and categorize them.
     Respond with ONLY valid JSON in this format:
+    {
         "Methodology": ["point1", "point2"],
         "Experiments": ["point1"],
         "Clarity": ["point1", "point2"],
         "Significance": ["point1"],
         "Novelty": ["point1"]
+    }
+    """
+    user_prompt = f"""
+    Extract key critique points from the following research paper review.
+    Categorize them into aspects: Methodology, Experiments, Clarity, Significance, Novelty.
+    Review:
+    {review_text}
     """
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
     for attempt in range(retries):
         try:
             response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=CRITIQUE_MODEL,
+                messages=messages,
+                max_tokens=2048,
+                response_format={"type": "json_object"},
             )
+            if not response.choices or not response.choices[0].message.content.strip():
+                raise ValueError("Empty response from API")
+            result = json.loads(response.choices[0].message.content.strip())
             # Validate structure
             critique = CritiquePoint(**result)
             return critique.model_dump()
         except Exception as e:
+            error_msg = str(e)
+            # Check for content safety blocks
+            if "safety" in error_msg.lower() or "blocked" in error_msg.lower():
+                print(f"Content blocked by safety filters: {e}")
+                return {
+                    "Methodology": [],
+                    "Experiments": [],
+                    "Clarity": [],
+                    "Significance": [],
+                    "Novelty": [],
+                    "error": "Content blocked by safety filters"
+                }
             wait_time = 2 ** attempt
             print(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s...")

pipeline/disagreement_detection.py CHANGED Viewed

@@ -2,15 +2,21 @@ import json
 import os
 from typing import List, Dict
 from itertools import combinations
-import google.generativeai as genai
 from pydantic import BaseModel, Field
 import asyncio
 from dotenv import load_dotenv
 load_dotenv()
-# Configure Gemini
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 class DisagreementDetails(BaseModel):
     Methodology: List[str] = Field(default_factory=list)
@@ -48,7 +54,24 @@ async def compare_review_pair(
     Returns:
         Disagreement analysis results
     """
-    prompt = f"""
     Compare the following two reviews and identify disagreements across different aspects.
     Assess disagreement level (0.0 = perfect agreement, 1.0 = complete disagreement) and
     list specific points of disagreement for each category.
@@ -66,38 +89,27 @@ async def compare_review_pair(
     Clarity: {list_to_string(review2.get('Clarity', []))}
     Significance: {list_to_string(review2.get('Significance', []))}
     Novelty: {list_to_string(review2.get('Novelty', []))}
-    Respond with ONLY valid JSON in this exact format:
-    {{
-        "disagreement_score": 0.5,
-        "disagreement_details": {{
-            "Methodology": ["specific disagreement point 1"],
-            "Experiments": ["specific disagreement point 1"],
-            "Clarity": [],
-            "Significance": ["specific disagreement point 1"],
-            "Novelty": []
-        }}
-    }}
     """
-    model = genai.GenerativeModel(
-        model_name="gemini-2.5-flash-lite",
-        generation_config={
-            "response_mime_type": "application/json",
-        }
-    )
     for attempt in range(retries):
         try:
             response = await asyncio.to_thread(
-                model.generate_content,
-                prompt
             )
-            if not response.text.strip():
-                raise ValueError("Empty response from Gemini")
-            result = json.loads(response.text)
             # Validate structure
             disagreement = DisagreementResult(

 import os
 from typing import List, Dict
 from itertools import combinations
+from openai import OpenAI
 from pydantic import BaseModel, Field
 import asyncio
 from dotenv import load_dotenv
 load_dotenv()
+# Initialize OpenRouter client
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY"),
+)
+# Model to use for disagreement detection
+DISAGREEMENT_MODEL = "google/gemini-2.5-flash-lite"
 class DisagreementDetails(BaseModel):
     Methodology: List[str] = Field(default_factory=list)
     Returns:
         Disagreement analysis results
     """
+    system_prompt = """
+    You are an expert at analyzing academic peer review disagreements.
+    Compare reviews and identify disagreements across different aspects.
+    Respond with ONLY valid JSON in this exact format:
+    {
+        "disagreement_score": 0.5,
+        "disagreement_details": {
+            "Methodology": ["specific disagreement point 1"],
+            "Experiments": ["specific disagreement point 1"],
+            "Clarity": [],
+            "Significance": ["specific disagreement point 1"],
+            "Novelty": []
+        }
+    }
+    """
+    user_prompt = f"""
     Compare the following two reviews and identify disagreements across different aspects.
     Assess disagreement level (0.0 = perfect agreement, 1.0 = complete disagreement) and
     list specific points of disagreement for each category.
     Clarity: {list_to_string(review2.get('Clarity', []))}
     Significance: {list_to_string(review2.get('Significance', []))}
     Novelty: {list_to_string(review2.get('Novelty', []))}
     """
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
     for attempt in range(retries):
         try:
             response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=DISAGREEMENT_MODEL,
+                messages=messages,
+                max_tokens=2048,
+                response_format={"type": "json_object"},
             )
+            if not response.choices or not response.choices[0].message.content.strip():
+                raise ValueError("Empty response from API")
+            result = json.loads(response.choices[0].message.content.strip())
             # Validate structure
             disagreement = DisagreementResult(

pipeline/disagreement_resolution.py CHANGED Viewed

@@ -18,11 +18,11 @@ client = OpenAI(
 # Priority list of models to try
 # 1. DeepSeek R1 (Best reasoning, most expensive)
 # 2. DeepSeek R1 Distill (Good reasoning, cheaper)
-# 3. Gemini 2.0 Flash (Free/Cheap, very fast fallback)
 MODELS = [
     "deepseek/deepseek-r1",
     "deepseek/deepseek-r1-distill-llama-70b",
-    "google/gemini-2.0-flash-exp:free"
 ]
 class ResolutionDetails(BaseModel):

 # Priority list of models to try
 # 1. DeepSeek R1 (Best reasoning, most expensive)
 # 2. DeepSeek R1 Distill (Good reasoning, cheaper)
+# 3. Gemini 2.5 Flash Lite (Cheap, fast fallback)
 MODELS = [
     "deepseek/deepseek-r1",
     "deepseek/deepseek-r1-distill-llama-70b",
+    "google/gemini-2.5-flash-lite"
 ]
 class ResolutionDetails(BaseModel):

pipeline/meta_review.py CHANGED Viewed

@@ -18,7 +18,7 @@ client = OpenAI(
 MODELS = [
     "deepseek/deepseek-r1",
     "deepseek/deepseek-r1-distill-llama-70b",
-    "google/gemini-2.0-flash-exp:free"
 ]
 class MetaReviewResult(BaseModel):

 MODELS = [
     "deepseek/deepseek-r1",
     "deepseek/deepseek-r1-distill-llama-70b",
+    "google/gemini-2.5-flash-lite"
 ]
 class MetaReviewResult(BaseModel):

pipeline/search_retrieval.py CHANGED Viewed

@@ -1,63 +1,29 @@
 import os
 from typing import Dict, List
 import asyncio
-from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.utilities import ArxivAPIWrapper, SerpAPIWrapper
 from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
 from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain.agents import AgentType, initialize_agent, AgentExecutor
-from langchain.tools import Tool
 from dotenv import load_dotenv
 load_dotenv()
-# Initialize LLM
-llm = ChatGoogleGenerativeAI(
-    model=os.getenv("GEMINI_MODEL"),
-    google_api_key=os.getenv("GEMINI_API_KEY"),
-    max_retries=2,
 )
 # Initialize search tools
 semantic_scholar = SemanticScholarQueryRun()
 google_scholar = SerpAPIWrapper(params={"engine": "google_scholar"})
 arxiv_search = ArxivAPIWrapper()
 tavily_search = TavilySearchResults(max_results=5)
-# Define tools
-tools = [
-    Tool(
-        name="TavilySearch",
-        func=tavily_search.run,
-        description="Retrieves the latest State-of-the-Art (SoTA) research and current academic information"
-    ),
-    Tool(
-        name="SemanticScholar",
-        func=semantic_scholar.run,
-        description="Find academic papers from Semantic Scholar database"
-    ),
-    Tool(
-        name="GoogleScholar",
-        func=google_scholar.run,
-        description="Search for scholarly articles and citations"
-    ),
-    Tool(
-        name="ArxivSearch",
-        func=arxiv_search.run,
-        description="Find research papers from ArXiv preprint repository"
-    ),
-]
-# Initialize agent
-agent = initialize_agent(
-    tools=tools,
-    llm=llm,
-    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-    verbose=False,
-    handle_parsing_errors=True,
-    max_iterations=10
-)
 def combine_critiques(critique_points: List[Dict]) -> Dict[str, str]:
     """
     Combine critique points from multiple reviews into categories
@@ -82,6 +48,16 @@ def combine_critiques(critique_points: List[Dict]) -> Dict[str, str]:
     return combined
 async def search_sota(paper_title: str, paper_abstract: str, retries: int = 3) -> str:
     """
     Search for state-of-the-art research related to the paper
@@ -94,29 +70,73 @@ async def search_sota(paper_title: str, paper_abstract: str, retries: int = 3) -
     Returns:
         Summary of SoTA findings
     """
-    query = (
-        f"Find the latest state-of-the-art research related to: '{paper_title}'. "
-        f"Abstract: {paper_abstract[:500]}. "
-        f"Focus on recent advances, similar methodologies, and competing approaches."
-    )
     for attempt in range(retries):
         try:
-            result = await asyncio.to_thread(agent.run, query)
-            if not result or len(result.strip()) < 50:
-                raise ValueError("Empty or insufficient response")
-            return result
         except Exception as e:
             wait_time = 2 ** attempt
-            print(f"SoTA search attempt {attempt + 1} failed: {e}")
             if attempt < retries - 1:
                 await asyncio.sleep(wait_time)
             else:
-                return f"Error retrieving SoTA research: {str(e)}"
 async def retrieve_evidence_for_category(
     category: str,
@@ -137,28 +157,60 @@ async def retrieve_evidence_for_category(
     if critiques == "No critiques" or not critiques.strip():
         return f"No critiques to validate for {category}"
-    query = (
-        f"Find research papers that support or contradict these critiques "
-        f"related to {category}: {critiques[:500]}"
-    )
-    for attempt in range(retries):
-        try:
-            result = await asyncio.to_thread(agent.run, query)
-            if not result:
-                raise ValueError("Empty response")
-            return result
-        except Exception as e:
-            wait_time = 2 ** attempt
-            print(f"Evidence retrieval for {category} attempt {attempt + 1} failed: {e}")
-            if attempt < retries - 1:
-                await asyncio.sleep(wait_time)
-            else:
-                return f"Error retrieving evidence for {category}: {str(e)}"
 async def retrieve_evidence(combined_critiques: Dict[str, str]) -> Dict[str, str]:
     """

 import os
 from typing import Dict, List
 import asyncio
+from openai import OpenAI
 from langchain_community.utilities import ArxivAPIWrapper, SerpAPIWrapper
 from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
 from langchain_community.tools.tavily_search import TavilySearchResults
 from dotenv import load_dotenv
 load_dotenv()
+# Initialize OpenRouter client for LLM calls
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY"),
 )
+# Model for search/retrieval tasks
+SEARCH_MODEL = "google/gemini-2.5-flash-lite"
 # Initialize search tools
 semantic_scholar = SemanticScholarQueryRun()
 google_scholar = SerpAPIWrapper(params={"engine": "google_scholar"})
 arxiv_search = ArxivAPIWrapper()
 tavily_search = TavilySearchResults(max_results=5)
 def combine_critiques(critique_points: List[Dict]) -> Dict[str, str]:
     """
     Combine critique points from multiple reviews into categories
     return combined
+async def run_search_tool(tool_name: str, tool_func, query: str) -> str:
+    """Run a search tool with error handling"""
+    try:
+        result = await asyncio.to_thread(tool_func, query)
+        return str(result) if result else ""
+    except Exception as e:
+        print(f"{tool_name} search failed: {e}")
+        return ""
 async def search_sota(paper_title: str, paper_abstract: str, retries: int = 3) -> str:
     """
     Search for state-of-the-art research related to the paper
     Returns:
         Summary of SoTA findings
     """
+    # Create search query
+    search_query = f"{paper_title} recent advances methodology"
+    # Run multiple searches in parallel
+    search_tasks = [
+        run_search_tool("Tavily", tavily_search.run, search_query),
+        run_search_tool("ArXiv", arxiv_search.run, search_query[:300]),
+        run_search_tool("SemanticScholar", semantic_scholar.run, paper_title),
+    ]
+    search_results = await asyncio.gather(*search_tasks)
+    # Combine all search results
+    combined_results = "\n\n".join([
+        f"=== Tavily Results ===\n{search_results[0]}" if search_results[0] else "",
+        f"=== ArXiv Results ===\n{search_results[1]}" if search_results[1] else "",
+        f"=== Semantic Scholar Results ===\n{search_results[2]}" if search_results[2] else "",
+    ])
+    if not combined_results.strip():
+        return "No SoTA research found from available sources."
+    # Use LLM to synthesize the results
+    system_prompt = """
+    You are an expert at synthesizing academic research findings.
+    Summarize the search results to identify state-of-the-art approaches and recent advances.
+    Focus on methodologies, key findings, and how they relate to the paper being reviewed.
+    """
+    user_prompt = f"""
+    Paper Title: {paper_title}
+    Paper Abstract: {paper_abstract[:500]}
+    Search Results:
+    {combined_results[:4000]}
+    Provide a concise summary of the state-of-the-art research relevant to this paper.
+    """
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
     for attempt in range(retries):
         try:
+            response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=SEARCH_MODEL,
+                messages=messages,
+                max_tokens=2048,
+            )
+            if not response.choices or not response.choices[0].message.content.strip():
+                raise ValueError("Empty response from API")
+            return response.choices[0].message.content.strip()
         except Exception as e:
             wait_time = 2 ** attempt
+            print(f"SoTA synthesis attempt {attempt + 1} failed: {e}")
             if attempt < retries - 1:
                 await asyncio.sleep(wait_time)
             else:
+                # Return raw results if synthesis fails
+                return f"Raw search results (synthesis failed):\n{combined_results[:2000]}"
 async def retrieve_evidence_for_category(
     category: str,
     if critiques == "No critiques" or not critiques.strip():
         return f"No critiques to validate for {category}"
+    # Create targeted search query
+    search_query = f"{category} research validation {critiques[:200]}"
+    # Run search
+    try:
+        tavily_result = await run_search_tool("Tavily", tavily_search.run, search_query)
+        arxiv_result = await run_search_tool("ArXiv", arxiv_search.run, search_query[:200])
+        combined = f"{tavily_result}\n{arxiv_result}".strip()
+        if not combined:
+            return f"No evidence found for {category} critiques"
+        # Use LLM to analyze relevance
+        system_prompt = f"""
+        You are an expert at evaluating academic critiques.
+        Analyze the search results to find evidence that supports or contradicts the critiques.
+        Focus on the {category} aspect.
+        """
+        user_prompt = f"""
+        Critiques for {category}: {critiques}
+        Search Results:
+        {combined[:2000]}
+        Summarize the evidence found that relates to these critiques.
+        """
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+        for attempt in range(retries):
+            try:
+                response = await asyncio.to_thread(
+                    client.chat.completions.create,
+                    model=SEARCH_MODEL,
+                    messages=messages,
+                    max_tokens=1024,
+                )
+                if response.choices and response.choices[0].message.content.strip():
+                    return response.choices[0].message.content.strip()
+            except Exception as e:
+                if attempt < retries - 1:
+                    await asyncio.sleep(2 ** attempt)
+        return f"Evidence retrieval completed for {category}"
+    except Exception as e:
+        return f"Error retrieving evidence for {category}: {str(e)}"
 async def retrieve_evidence(combined_critiques: Dict[str, str]) -> Dict[str, str]:
     """

requirements.txt CHANGED Viewed

@@ -1,14 +1,12 @@
 # Web Framework
 gradio==5.9.1
-# LLM Libraries
 openai==1.59.5
-google-generativeai==0.8.3
 # LangChain and Tools
 langchain==0.3.13
 langchain-community==0.3.13
-langchain-google-genai==2.0.8
 langgraph==0.2.59
 langgraph-checkpoint-sqlite==2.0.5

 # Web Framework
 gradio==5.9.1
+# LLM Libraries (OpenRouter uses OpenAI SDK)
 openai==1.59.5
 # LangChain and Tools
 langchain==0.3.13
 langchain-community==0.3.13
 langgraph==0.2.59
 langgraph-checkpoint-sqlite==2.0.5

test_api.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+Local test script for the MetaSearch API
+Tests individual pipeline components with sample data
+"""
+import asyncio
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Sample test data
+SAMPLE_PAPER_TITLE = "Attention Is All You Need"
+SAMPLE_PAPER_ABSTRACT = """
+We propose a new simple network architecture, the Transformer, based solely on
+attention mechanisms, dispensing with recurrence and convolutions entirely.
+Experiments on two machine translation tasks show these models to be superior
+in quality while being more parallelizable and requiring significantly less time to train.
+"""
+SAMPLE_REVIEWS = [
+    """
+    This paper introduces a novel architecture that replaces recurrence with self-attention.
+    Strengths:
+    - The model achieves state-of-the-art results on translation benchmarks
+    - Training is significantly faster due to parallelization
+    - The attention visualization provides interpretability
+    Weaknesses:
+    - Limited evaluation on other NLP tasks beyond translation
+    - The computational complexity of self-attention scales quadratically with sequence length
+    - Missing comparison with some recent RNN variants
+    The methodology is sound but could benefit from more diverse experiments.
+    Overall, this is a strong contribution to the field.
+    """,
+    """
+    The Transformer architecture is an interesting departure from RNN-based models.
+    Strengths:
+    - Clean and elegant architecture design
+    - Strong empirical results on WMT benchmarks
+    - Good ablation studies
+    Weaknesses:
+    - The paper overclaims novelty - attention mechanisms existed before
+    - Experiments are limited to machine translation only
+    - No theoretical analysis of why this works better
+    - Memory requirements are high for long sequences
+    The significance of this work is questionable given the narrow evaluation scope.
+    """,
+    """
+    This is a well-written paper with clear presentation of a new architecture.
+    Strengths:
+    - Excellent results, setting new SOTA on translation
+    - The multi-head attention is a clever innovation
+    - Reproducibility details are provided
+    Weaknesses:
+    - Claims of "attention is all you need" are overstated
+    - Limited to sequence-to-sequence tasks
+    - Positional encoding seems like a hack
+    Overall a solid paper with important contributions despite some limitations.
+    """
+]
+async def test_critique_extraction():
+    """Test the critique extraction module"""
+    print("\n" + "="*60)
+    print("Testing Critique Extraction")
+    print("="*60)
+    from pipeline.critique_extraction import extract_critiques
+    print(f"Processing {len(SAMPLE_REVIEWS)} reviews...")
+    critiques = await extract_critiques(SAMPLE_REVIEWS)
+    for i, critique in enumerate(critiques):
+        print(f"\n--- Review {i+1} Critiques ---")
+        for category, points in critique.items():
+            if category != "error" and points:
+                print(f"  {category}: {len(points)} points")
+                for point in points[:2]:  # Show first 2 points
+                    print(f"    - {point[:80]}...")
+    return critiques
+async def test_disagreement_detection(critiques):
+    """Test the disagreement detection module"""
+    print("\n" + "="*60)
+    print("Testing Disagreement Detection")
+    print("="*60)
+    from pipeline.disagreement_detection import detect_disagreements
+    print(f"Detecting disagreements across {len(critiques)} reviews...")
+    disagreements = await detect_disagreements(critiques)
+    for d in disagreements:
+        pair = d.get('review_pair', [])
+        score = d.get('disagreement_score', 0)
+        print(f"\n--- Reviews {pair[0]+1} vs {pair[1]+1} ---")
+        print(f"  Disagreement Score: {score:.2f}")
+        details = d.get('disagreement_details', {})
+        for category, points in details.items():
+            if points:
+                print(f"  {category}: {len(points)} disagreements")
+    return disagreements
+async def test_search_retrieval(critiques):
+    """Test the search and retrieval module"""
+    print("\n" + "="*60)
+    print("Testing Search & Retrieval")
+    print("="*60)
+    from pipeline.search_retrieval import search_and_retrieve
+    print("Searching for SoTA research and evidence...")
+    results = await search_and_retrieve(
+        SAMPLE_PAPER_TITLE,
+        SAMPLE_PAPER_ABSTRACT,
+        critiques
+    )
+    print(f"\n--- SoTA Results (first 500 chars) ---")
+    print(results.get('SoTA_Results', 'N/A')[:500])
+    print(f"\n--- Combined Critiques ---")
+    for cat, text in results.get('Combined_Critiques', {}).items():
+        print(f"  {cat}: {len(text)} chars")
+    print(f"\n--- Retrieved Evidence ---")
+    for cat, evidence in results.get('Retrieved_Evidence', {}).items():
+        print(f"  {cat}: {len(evidence)} chars")
+    return results
+async def test_disagreement_resolution(critiques, disagreements, search_results):
+    """Test the disagreement resolution module"""
+    print("\n" + "="*60)
+    print("Testing Disagreement Resolution")
+    print("="*60)
+    from pipeline.disagreement_resolution import resolve_disagreements
+    print(f"Resolving {len(disagreements)} disagreements...")
+    resolutions = await resolve_disagreements(
+        SAMPLE_PAPER_TITLE,
+        SAMPLE_PAPER_ABSTRACT,
+        disagreements,
+        critiques,
+        search_results
+    )
+    for i, resolution in enumerate(resolutions):
+        print(f"\n--- Resolution {i+1} ---")
+        details = resolution.get('resolution_details', {})
+        accepted = details.get('accepted_critique_points', {})
+        rejected = details.get('rejected_critique_points', {})
+        print(f"  Accepted categories: {list(accepted.keys())}")
+        print(f"  Rejected categories: {list(rejected.keys())}")
+        summary = details.get('final_resolution_summary', '')
+        print(f"  Summary: {summary[:200]}...")
+    return resolutions
+async def test_meta_review(resolutions, search_results):
+    """Test the meta-review generation module"""
+    print("\n" + "="*60)
+    print("Testing Meta-Review Generation")
+    print("="*60)
+    from pipeline.meta_review import generate_meta_review
+    print("Generating meta-review...")
+    meta_review = await generate_meta_review(
+        SAMPLE_PAPER_TITLE,
+        SAMPLE_PAPER_ABSTRACT,
+        resolutions,
+        search_results
+    )
+    print(f"\n--- Meta-Review (first 1000 chars) ---")
+    print(meta_review[:1000])
+    print("...")
+    return meta_review
+async def run_full_pipeline():
+    """Run the complete pipeline test"""
+    print("\n" + "#"*60)
+    print("# MetaSearch API - Full Pipeline Test")
+    print("#"*60)
+    # Check environment
+    if not os.getenv("OPENROUTER_API_KEY"):
+        print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
+        print("Please set it in your .env file")
+        return
+    print("\n✅ OPENROUTER_API_KEY is set")
+    try:
+        # Step 1: Extract critiques
+        critiques = await test_critique_extraction()
+        # Step 2: Detect disagreements
+        disagreements = await test_disagreement_detection(critiques)
+        # Step 3: Search and retrieve (optional - can be slow)
+        search_results = await test_search_retrieval(critiques)
+        # Step 4: Resolve disagreements
+        resolutions = await test_disagreement_resolution(
+            critiques, disagreements, search_results
+        )
+        # Step 5: Generate meta-review
+        meta_review = await test_meta_review(resolutions, search_results)
+        print("\n" + "#"*60)
+        print("# ✅ Full Pipeline Test Complete!")
+        print("#"*60)
+    except Exception as e:
+        print(f"\n❌ Pipeline failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+async def run_quick_test():
+    """Run a quick test of just critique extraction"""
+    print("\n" + "#"*60)
+    print("# MetaSearch API - Quick Test (Critique Extraction Only)")
+    print("#"*60)
+    if not os.getenv("OPENROUTER_API_KEY"):
+        print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
+        return
+    print("\n✅ OPENROUTER_API_KEY is set")
+    try:
+        critiques = await test_critique_extraction()
+        print("\n✅ Quick test passed!")
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1 and sys.argv[1] == "--quick":
+        asyncio.run(run_quick_test())
+    else:
+        asyncio.run(run_full_pipeline())