Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import google.generativeai as genai | |
| import logging | |
| import asyncio | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List, Dict, Any | |
| # --- 配置 --- | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # 从环境变量获取 API 密钥和后端 URL | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| SEARCH_API_BASE_URL = os.getenv("SEARCH_API_BASE_URL", "").strip() | |
| # 配置 Google Gemini | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| gemini_model = genai.GenerativeModel('gemini-2.5-flash') | |
| # --- FastAPI 应用设置 --- | |
| app = FastAPI( | |
| title="AI Search Agent", | |
| description="一个使用 Gemini-2.5-Flash 进行查询优化和结果摘要的智能中间层。", | |
| version="2.0.0" # Version bump! | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --- 数据模型 --- | |
| class SearchRequest(BaseModel): | |
| platform: str | |
| query: str | |
| max_results: int = 10 | |
| # --- 核心 AI 功能 --- | |
| async def get_ai_keywords(natural_language_query: str) -> str: | |
| # ... (此函数保持不变) | |
| if not GEMINI_API_KEY: | |
| logger.warning("GEMINI_API_KEY 未设置,将使用原始查询。") | |
| return natural_language_query | |
| prompt = f""" | |
| You are an expert academic researcher. Your task is to convert a user's natural language query into a highly effective, concise, boolean-logic keyword string for searching academic databases like PubMed. | |
| - Use boolean operators like AND, OR. | |
| - Use parentheses for grouping. | |
| - Focus on core concepts. | |
| - Keep the string concise and in English. | |
| - Do not add any explanation, markdown, or quotation marks. Just return the pure keyword string. | |
| User Query: "{natural_language_query}" | |
| Keyword String: | |
| """ | |
| try: | |
| logger.info(f"向 Gemini 发送请求 [关键词提炼],查询: '{natural_language_query}'") | |
| response = await gemini_model.generate_content_async(prompt) | |
| optimized_query = response.text.strip() | |
| logger.info(f"原始查询: '{natural_language_query}' -> Gemini 优化关键词: '{optimized_query}'") | |
| if not optimized_query: | |
| logger.warning("Gemini 返回空关键词,回退到原始查询。") | |
| return natural_language_query | |
| return optimized_query | |
| except Exception as e: | |
| logger.error(f"调用 Gemini API [关键词提炼] 失败: {e}") | |
| return natural_language_query | |
| # ================================================================= | |
| # BEGIN: 新增功能 - AI 摘要 | |
| # ================================================================= | |
| async def summarize_results_with_ai(papers: List[Dict[str, Any]], original_query: str) -> str | None: | |
| """ | |
| 使用 Gemini 根据搜索结果的摘要生成一个综合性总结。 | |
| """ | |
| if not GEMINI_API_KEY or not papers: | |
| return None | |
| # 只选择前 5 篇或更少的论文进行摘要,以提高效率和相关性 | |
| papers_for_summary = papers[:5] | |
| # 构建用于摘要的上下文 | |
| context = "" | |
| for i, paper in enumerate(papers_for_summary): | |
| title = paper.get('title', 'No Title') | |
| abstract = paper.get('abstract') or paper.get('summary', 'No Abstract Available.') | |
| context += f"### Paper {i+1}: {title}\nAbstract: {abstract}\n\n" | |
| # 精心设计的摘要 Prompt | |
| prompt = f""" | |
| You are a helpful medical research assistant. Based on the abstracts of the scientific papers provided below, write a concise and easy-to-understand summary that directly answers the user's original research question. | |
| - Start with a direct introductory sentence. | |
| - Use bullet points to list the key findings. | |
| - Base your summary STRICTLY on the information given in the abstracts. Do not add any outside knowledge. | |
| - The summary should be in clear, accessible language. | |
| USER'S ORIGINAL QUESTION: "{original_query}" | |
| PROVIDED ABSTRACTS: | |
| {context} | |
| CONCISE SUMMARY: | |
| """ | |
| try: | |
| logger.info(f"向 Gemini 发送请求 [结果摘要],基于 {len(papers_for_summary)} 篇论文。") | |
| response = await gemini_model.generate_content_async(prompt) | |
| summary = response.text.strip() | |
| logger.info("Gemini 摘要生成成功。") | |
| return summary | |
| except Exception as e: | |
| logger.error(f"调用 Gemini API [结果摘要] 失败: {e}") | |
| return None # 如果摘要失败,不影响主流程 | |
| # ================================================================= | |
| # END: 新增功能 - AI 摘要 | |
| # ================================================================= | |
| # --- API 端点 --- | |
| def read_root(): | |
| return {"status": "AI Search Agent is running"} | |
| async def intelligent_search(request: SearchRequest): | |
| if not SEARCH_API_BASE_URL: | |
| raise HTTPException(status_code=500, detail="SEARCH_API_BASE_URL 未配置") | |
| # 1. 关键词提炼 | |
| optimized_query = await get_ai_keywords(request.query) | |
| search_payload = { | |
| "platform": request.platform, | |
| "query": optimized_query, | |
| "max_results": request.max_results | |
| } | |
| # 2. 调用搜索后端 | |
| try: | |
| logger.info(f"向搜索后端发送请求: {search_payload}") | |
| search_url = f"{SEARCH_API_BASE_URL}/search" | |
| # 使用 asyncio.to_thread 运行同步的 requests 调用,避免阻塞 | |
| loop = asyncio.get_running_loop() | |
| response = await loop.run_in_executor( | |
| None, | |
| lambda: requests.post(search_url, json=search_payload, timeout=30) | |
| ) | |
| response.raise_for_status() | |
| search_results_data = response.json() | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"调用搜索后端失败: {e}") | |
| raise HTTPException(status_code=503, detail=f"无法连接到搜索服务: {str(e)}") | |
| # 3. (新) 生成 AI 摘要 | |
| ai_summary = None | |
| if search_results_data and search_results_data.get("results"): | |
| ai_summary = await summarize_results_with_ai(search_results_data["results"], request.query) | |
| # 4. 组合最终响应 | |
| final_response = { | |
| "original_query": request.query, | |
| "optimized_query": optimized_query, | |
| "ai_summary": ai_summary, # 添加摘要 | |
| "results": search_results_data.get("results", []) | |
| } | |
| return final_response |