Spaces:
Sleeping
Sleeping
| """ | |
| RAG引擎:实现传统RAG和GraphRAG的检索逻辑 | |
| """ | |
| from typing import List, Dict, Tuple | |
| # 优先使用轻量级版本(避免超过 Vercel 250MB 限制) | |
| try: | |
| from database_setup_lite import SimpleGraphDB, VectorDB | |
| except ImportError: | |
| from database_setup import SimpleGraphDB, VectorDB | |
| import json | |
| import requests | |
| # LLM配置(从环境变量读取,确保安全) | |
| import os | |
| LLM_API_BASE = os.getenv("LLM_API_BASE", "https://api.ai-gaochao.cn/v1") | |
| LLM_API_KEY = os.getenv("LLM_API_KEY", "") | |
| LLM_MODEL = os.getenv("LLM_MODEL", "gemini-2.5-flash") | |
| if not LLM_API_KEY: | |
| raise ValueError("LLM_API_KEY 环境变量未设置!请在 .env 文件中设置 LLM_API_KEY") | |
| class TraditionalRAG: | |
| """传统语义RAG""" | |
| def __init__(self, vector_db: VectorDB, graph_db: SimpleGraphDB = None): | |
| self.vector_db = vector_db | |
| self.graph_db = graph_db # 用于限制搜索范围 | |
| def retrieve(self, query: str, product_name: str = None, style_name: str = None, n_results: int = 5) -> Dict: | |
| """语义检索(传统RAG:直接向量搜索,不利用图结构,返回片段句子)""" | |
| # 传统RAG的特点:直接进行语义相似度搜索,不利用图结构 | |
| # 使用相同的文案数据库,但只返回相似的片段句子(而不是完整文案) | |
| # 直接进行向量搜索(传统RAG的特点) | |
| # 传统RAG限制结果数量,只返回最相关的2-3个结果 | |
| limited_results = min(3, n_results) # 最多返回3个结果 | |
| all_results = self.vector_db.search(query, n_results=limited_results * 2) # 多搜索一些,用于提取片段 | |
| # 从完整文案中提取与查询最相关的片段句子 | |
| processed_results = [] | |
| query_keywords = set(query.lower().split()) | |
| for result in all_results[:limited_results * 2]: | |
| full_content = result.get("content", "") | |
| if not full_content: | |
| continue | |
| # 将文案按句子分割(中文句号、英文句号、感叹号、问号) | |
| import re | |
| sentences = re.split(r'[。!?.!?]', full_content) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| # 找到与查询最相关的句子片段 | |
| best_sentences = [] | |
| for sentence in sentences: | |
| # 计算句子与查询的相关度(简单关键词匹配) | |
| sentence_lower = sentence.lower() | |
| keyword_matches = sum(1 for keyword in query_keywords if keyword in sentence_lower) | |
| if keyword_matches > 0: | |
| best_sentences.append((sentence, keyword_matches)) | |
| # 按相关度排序,取前2-3个最相关的句子 | |
| best_sentences.sort(key=lambda x: x[1], reverse=True) | |
| selected_sentences = [s[0] for s in best_sentences[:3]] | |
| # 如果没有找到相关句子,取前3个句子作为片段 | |
| if not selected_sentences and sentences: | |
| selected_sentences = sentences[:3] | |
| # 组合成片段(最多150字,确保有足够内容) | |
| snippet = "。".join(selected_sentences) | |
| if not snippet and sentences: | |
| # 如果还是空的,至少取前3个句子 | |
| snippet = "。".join(sentences[:3]) | |
| if len(snippet) > 150: | |
| snippet = snippet[:150] + "..." | |
| elif len(snippet) < 30 and len(sentences) > 0: | |
| # 如果片段太短,至少取前2-3个句子 | |
| snippet = "。".join(sentences[:min(3, len(sentences))]) | |
| if len(snippet) > 150: | |
| snippet = snippet[:150] + "..." | |
| if snippet: | |
| # 计算相似度分数(distance = 1 - similarity,所以 similarity = 1 - distance) | |
| distance = result.get("distance", 1.0) | |
| similarity_score = max(0, min(1, 1 - distance)) # 确保在 0-1 范围内 | |
| similarity_percentage = round(similarity_score * 100, 2) # 转换为百分比,保留2位小数 | |
| processed_results.append({ | |
| "content": snippet, # 返回片段而不是完整文案 | |
| "full_content": full_content, # 保留完整内容用于显示 | |
| "metadata": result.get("metadata", {}), | |
| "distance": distance, | |
| "similarity_score": similarity_score, # 相似度分数(0-1) | |
| "similarity_percentage": similarity_percentage, # 相似度百分比(0-100) | |
| "is_snippet": True # 标记这是片段 | |
| }) | |
| if len(processed_results) >= limited_results: | |
| break | |
| # 如果结果太少,至少返回1-2个语义相似的结果 | |
| if len(processed_results) < 1: | |
| # 如果提取片段失败,至少返回一些结果 | |
| for result in all_results[:max(1, limited_results)]: | |
| content = result.get("content", "") | |
| if content: | |
| # 简单截取前150字作为片段 | |
| snippet = content[:150] + "..." if len(content) > 150 else content | |
| # 计算相似度分数 | |
| distance = result.get("distance", 1.0) | |
| similarity_score = max(0, min(1, 1 - distance)) | |
| similarity_percentage = round(similarity_score * 100, 2) | |
| processed_results.append({ | |
| "content": snippet, | |
| "full_content": content, | |
| "metadata": result.get("metadata", {}), | |
| "distance": distance, | |
| "similarity_score": similarity_score, | |
| "similarity_percentage": similarity_percentage, | |
| "is_snippet": True | |
| }) | |
| if len(processed_results) >= limited_results: | |
| break | |
| return { | |
| "method": "语义检索", | |
| "query": query, | |
| "product": product_name, | |
| "style": style_name, | |
| "results": processed_results[:limited_results], | |
| "retrieval_path": [ | |
| "向量相似度搜索(传统RAG:不利用图结构)", | |
| f"找到 {len(processed_results)} 个语义相似的片段", | |
| "⚠️ 局限性:只返回片段句子,没有图结构,无法找到跨品类的风格相关文案" | |
| ], | |
| "explanation": "传统RAG直接通过语义相似度搜索相关文案,使用相同的文案数据库,但只返回与查询最相关的片段句子(而不是完整文案)。没有图结构,无法找到跨品类的风格相关文案。", | |
| "similarity_explanation": """相似度计算方法 / Similarity Calculation Method: | |
| 1. **文本向量化** / Text Vectorization: | |
| - 使用 OpenAI 的 text-embedding-3-small 模型将查询和文案转换为向量 | |
| - Uses OpenAI's text-embedding-3-small model to convert query and copywriting into vectors | |
| 2. **相似度计算** / Similarity Calculation: | |
| - 计算查询向量与文案向量的余弦相似度(范围 0-1) | |
| - Calculates cosine similarity between query vector and copywriting vector (range 0-1)""" | |
| } | |
| class GraphRAG: | |
| """图增强RAG""" | |
| def __init__(self, graph_db: SimpleGraphDB, vector_db: VectorDB): | |
| self.graph_db = graph_db | |
| self.vector_db = vector_db | |
| def retrieve(self, query: str, product_name: str = None, style_name: str = None, n_results: int = 5) -> Dict: | |
| """图增强检索""" | |
| retrieval_path = [] | |
| retrieved_docs = [] | |
| # 步骤1: 尝试找到风格节点 | |
| style_node = None | |
| if style_name: | |
| style_node = self.graph_db.find_node_by_property("Style", "name", style_name) | |
| if style_node: | |
| retrieval_path.append(f"定位风格节点: {style_node['properties']['name']}") | |
| # 步骤2: 通过风格节点找到相关文案(跨品类) | |
| if style_node: | |
| # 反向查找:找到连接到风格的文案节点 | |
| for edge in self.graph_db.edges: | |
| if edge["target"] == style_node["id"] and edge["relationship"] == "HAS_STYLE": | |
| copy_node = self.graph_db.nodes.get(edge["source"]) | |
| if copy_node and copy_node["type"] == "Copywriting": | |
| content = copy_node["properties"]["content"] | |
| # 获取该文案关联的产品(HAS_COPY关系:Product -> Copywriting) | |
| product_id = None | |
| for e in self.graph_db.edges: | |
| if e["target"] == edge["source"] and e["relationship"] == "HAS_COPY": | |
| product_id = e["source"] | |
| break | |
| product_info = self.graph_db.nodes.get(product_id, {}).get("properties", {}) | |
| retrieved_docs.append({ | |
| "content": content, | |
| "source": "图遍历", | |
| "product": product_info.get("name", "未知"), | |
| "style": style_name, | |
| "tag": copy_node["properties"].get("tag", ""), | |
| "retrieval_reason": f"通过风格节点'{style_name}'找到的跨品类文案(来自产品:{product_info.get('name', '未知')})" | |
| }) | |
| if retrieved_docs: | |
| retrieval_path.append(f"通过风格节点遍历找到 {len(retrieved_docs)} 个相关文案") | |
| else: | |
| retrieval_path.append("未找到该风格的相关文案") | |
| # 步骤3: 如果指定了产品,查找产品特征 | |
| product_features = [] | |
| if product_name: | |
| product_node = self.graph_db.find_node_by_property("Product", "name", product_name) | |
| if product_node: | |
| retrieval_path.append(f"定位产品节点: {product_name}") | |
| features = product_node["properties"].get("features", []) | |
| keywords = product_node["properties"].get("keywords", []) | |
| product_features = features + keywords | |
| retrieval_path.append(f"提取产品特征: {', '.join(product_features[:5])}") | |
| # 步骤4: 如果图检索结果不足,用向量检索补充 | |
| if len(retrieved_docs) < n_results: | |
| vector_results = self.vector_db.search(query, n_results=n_results - len(retrieved_docs)) | |
| for result in vector_results: | |
| # 避免重复 | |
| if not any(doc["content"] == result["content"] for doc in retrieved_docs): | |
| retrieved_docs.append({ | |
| "content": result["content"], | |
| "source": "向量检索补充", | |
| "product": result["metadata"].get("product_id", "未知"), | |
| "style": result["metadata"].get("style_id", "未知"), | |
| "tag": result["metadata"].get("tag", ""), | |
| "retrieval_reason": "语义相似度补充检索" | |
| }) | |
| if vector_results: | |
| retrieval_path.append(f"向量检索补充 {len(vector_results)} 个结果") | |
| return { | |
| "method": "图增强检索", | |
| "query": query, | |
| "product": product_name, | |
| "style": style_name, | |
| "product_features": product_features, | |
| "results": retrieved_docs[:n_results], | |
| "retrieval_path": retrieval_path, | |
| "explanation": "通过图结构找到跨品类的风格相关文案,即使产品不同,但风格相通,可以借鉴文案模板。" | |
| } | |
| class RAGEngine: | |
| """RAG引擎主类""" | |
| def __init__(self, graph_db: SimpleGraphDB, vector_db: VectorDB): | |
| self.graph_db = graph_db | |
| self.traditional_rag = TraditionalRAG(vector_db, graph_db) | |
| self.graph_rag = GraphRAG(graph_db, vector_db) | |
| def compare_retrieval(self, query: str, product_name: str = None, style_name: str = None) -> Dict: | |
| """对比传统RAG和GraphRAG的检索结果""" | |
| traditional_result = self.traditional_rag.retrieve(query, product_name, style_name) | |
| graph_result = self.graph_rag.retrieve(query, product_name, style_name) | |
| return { | |
| "traditional_rag": traditional_result, | |
| "graph_rag": graph_result, | |
| "comparison": { | |
| "traditional_count": len(traditional_result["results"]), | |
| "graph_count": len(graph_result["results"]), | |
| "graph_cross_category": len([r for r in graph_result["results"] if r.get("source") == "图遍历"]) | |
| } | |
| } | |
| def generate_copywriting(self, query: str, product_name: str, style_name: str, use_graph: bool = True) -> Dict: | |
| """生成文案(使用LLM)""" | |
| if use_graph: | |
| retrieval_result = self.graph_rag.retrieve(query, product_name, style_name) | |
| else: | |
| retrieval_result = self.traditional_rag.retrieve(query, product_name, style_name) | |
| # 获取检索到的参考文案 | |
| retrieved_texts = [r["content"] for r in retrieval_result["results"][:5]] # 取前5个作为参考 | |
| # 统计信息 | |
| cross_category_count = len([r for r in retrieval_result["results"] if r.get("source") == "图遍历"]) if use_graph else 0 | |
| # 获取产品特征(用于GraphRAG) | |
| product_features = [] | |
| if use_graph and retrieval_result.get("product_features"): | |
| product_features = retrieval_result["product_features"] | |
| # 调用LLM生成文案 | |
| try: | |
| llm_generated = self._call_llm_generate( | |
| product_name=product_name, | |
| style_name=style_name, | |
| reference_texts=retrieved_texts, | |
| product_features=product_features, | |
| use_graph=use_graph, | |
| cross_category_count=cross_category_count | |
| ) | |
| except Exception as e: | |
| print(f"LLM生成失败: {e}") | |
| # 如果LLM失败,使用模板生成 | |
| llm_generated = self._generate_template(retrieved_texts, product_name, style_name) | |
| # 组装最终输出 | |
| if use_graph and product_features: | |
| features = ", ".join(product_features[:3]) | |
| reference_sources = ', '.join([r.get('product', '未知') for r in retrieval_result["results"][:3]]) | |
| generated_text = f"""基于图增强检索生成的文案: | |
| ✨ 检索策略:通过图结构找到跨品类的风格相关文案 | |
| 📊 检索结果:找到 {len(retrieved_texts)} 个相关文案,其中 {cross_category_count} 个来自跨品类(通过风格节点关联) | |
| 🎯 产品特征:{features} | |
| 📝 参考文案来源:{reference_sources} | |
| 【{style_name}风格】{product_name}文案: | |
| {llm_generated} | |
| 💡 说明:GraphRAG 通过风格节点找到了跨品类的参考文案(如香薰蜡烛的清冷避世风文案),即使产品不同,但风格相通,可以借鉴文案模板。 | |
| 💡 Note: GraphRAG found cross-category reference copywriting through style nodes (such as the cold and reclusive style copywriting of scented candles). Even though the products are different, the styles are connected, allowing us to learn from the copywriting templates.""" | |
| else: | |
| generated_text = f"""基于传统语义检索生成的文案: | |
| 🔍 检索策略:直接通过语义相似度搜索 | |
| 📊 检索结果:找到 {len(retrieved_texts)} 个语义相似的文案 | |
| ⚠️ 局限性:如果数据库中没有相似内容,可能返回不相关的结果 | |
| 【{style_name}风格】{product_name}文案: | |
| {llm_generated} | |
| 💡 说明:传统 RAG 只能找到语义相似的文案,如果数据库中没有该产品的该风格文案,可能无法生成合适的文案。 | |
| 💡 Note: Traditional RAG can only find semantically similar copywriting. If there is no copywriting for this product in this style in the database, it may not be able to generate suitable copywriting.""" | |
| # 生成对比分析 | |
| analysis = self._generate_comparison_analysis( | |
| use_graph=use_graph, | |
| cross_category_count=cross_category_count, | |
| retrieved_count=len(retrieved_texts), | |
| product_name=product_name, | |
| style_name=style_name, | |
| has_references=len(retrieved_texts) > 0 | |
| ) | |
| return { | |
| "generated_text": generated_text, | |
| "retrieval_result": retrieval_result, | |
| "method": "GraphRAG" if use_graph else "Traditional RAG", | |
| "analysis": analysis | |
| } | |
| def _call_llm_generate(self, product_name: str, style_name: str, reference_texts: List[str], | |
| product_features: List[str] = None, use_graph: bool = True, | |
| cross_category_count: int = 0) -> str: | |
| """调用LLM生成文案""" | |
| headers = { | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {LLM_API_KEY}" | |
| } | |
| url = f"{LLM_API_BASE}/chat/completions" | |
| # 构建参考文案说明 | |
| reference_context = "" | |
| if reference_texts: | |
| reference_context = "\n\n参考文案(用于学习风格和句式):\n" | |
| for i, text in enumerate(reference_texts[:3], 1): | |
| reference_context += f"{i}. {text}\n" | |
| else: | |
| reference_context = "\n\n⚠️ 注意:没有找到相关参考文案,请根据产品特征和风格要求创作。" | |
| # 构建产品特征说明 | |
| features_context = "" | |
| if product_features: | |
| features_context = f"\n产品特征:{', '.join(product_features[:5])}" | |
| # 构建prompt | |
| if use_graph and cross_category_count > 0: | |
| # GraphRAG 使用更详细的 prompt,突出其优势 | |
| prompt = f"""你是一名擅长小红书文案写作的创意编辑,拥有丰富的跨品类文案创作经验。请根据以下信息,生成一篇适合在小红书发布的优质文案(200-300字,要求内容丰富、有细节感、有感染力)。 | |
| 产品名称:{product_name} | |
| 目标风格:{style_name} | |
| {features_context} | |
| {reference_context} | |
| 创作指导(这些参考文案来自跨品类产品,但风格相通): | |
| 1. **风格学习**:仔细分析参考文案的句式结构、语气特点、情感表达方式和修辞手法 | |
| 2. **风格迁移**:将参考文案中体现的{style_name}风格特征(如用词、节奏、情感色彩)巧妙地应用到{product_name}上 | |
| 3. **内容创新**:结合{product_name}的产品特征,创造性地表达产品价值和用户情感 | |
| 4. **细节丰富**:文案要有具体的使用场景、真实的感受描述、生动的细节刻画 | |
| 5. **情感共鸣**:通过细腻的情感表达,让读者产生共鸣和购买欲望 | |
| 6. **风格一致**:确保整篇文案都保持{style_name}的风格特征,风格统一且鲜明 | |
| 文案要求: | |
| - 长度:200-300字,内容丰富充实 | |
| - 结构:可以包含产品介绍、使用场景、情感体验、产品特色等多个维度 | |
| - 语言:符合小红书用户的阅读习惯,自然流畅,有感染力 | |
| - 完整性:确保文案完整,不要被截断,以完整的句子结尾 | |
| **必须遵守的输出格式要求:** | |
| - 你必须使用中英对照格式输出文案,按段落进行中英对照 | |
| - 格式:中文段落(换行)English paragraph(再换行) | |
| - 每个中文段落后面必须换行,然后添加对应的英文段落翻译,英文段落后再换行 | |
| - 示例格式: | |
| 这款真丝眼罩真的太舒服了,遮光效果特别好,戴上之后整个世界都安静了。 | |
| This silk eye mask is really comfortable, with excellent light-blocking effect. After putting it on, the whole world becomes quiet. | |
| 每天晚上睡前戴上它,就像给自己创造了一个专属的避风港。 | |
| Every night before sleep, putting it on is like creating a personal sanctuary for yourself. | |
| 材质柔软亲肤,完全不会压迫眼睛,真的爱了。 | |
| The material is soft and skin-friendly, completely non-pressuring on the eyes, I really love it. | |
| - 不要只输出中文,必须每个段落都包含对应的英文翻译 | |
| - 可以一个段落包含多句话,然后整体翻译成英文 | |
| - 每个中文段落和英文段落之间必须换行,段落之间用空行分隔 | |
| 请直接输出文案内容,不要包含"好的"、"没问题"等前缀,也不要使用markdown格式。只输出文案正文,确保内容完整,并且严格按照以下格式输出:中文段落(换行)English paragraph(换行)。""" | |
| else: | |
| # 传统 RAG 使用简化的 prompt | |
| prompt = f"""请根据以下信息生成一篇小红书风格的文案(200-300字)。 | |
| 产品名称:{product_name} | |
| 目标风格:{style_name} | |
| {features_context} | |
| {reference_context} | |
| 要求: | |
| 1. 保持{style_name}风格 | |
| 2. 200-300字 | |
| 3. 中英对照格式,每段中文后换行添加英文翻译 | |
| **输出格式:** | |
| 中文段落 | |
| English paragraph | |
| 请直接输出文案,不要前缀。""" | |
| body = { | |
| "model": LLM_MODEL, | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "你是一名擅长文案写作的创意编辑,擅长创作小红书风格的文案。你必须使用中英对照格式输出所有文案内容,按段落进行中英对照,每个中文段落后面换行添加对应的英文翻译。格式:中文段落(换行)English paragraph" | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| "max_tokens": 4000, # 增加token限制以支持更长的文案(200-300字约需要800-1200 tokens,设置4000确保完整输出) | |
| "temperature": 0.9 | |
| } | |
| resp = requests.post(url, headers=headers, json=body, timeout=60) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| generated = data["choices"][0]["message"]["content"].strip() | |
| # 清理生成的内容 | |
| # 移除常见的前缀(只移除开头的前缀,不要截断内容) | |
| prefixes_to_remove = [ | |
| "好的,没问题!", | |
| "好的,", | |
| "没问题!", | |
| "好的!", | |
| ] | |
| for prefix in prefixes_to_remove: | |
| if generated.startswith(prefix): | |
| generated = generated[len(prefix):].strip() | |
| # 移除markdown格式符号(但保留内容) | |
| generated = generated.replace("**", "").replace("*", "").strip() | |
| return generated | |
| def _generate_comparison_analysis(self, use_graph: bool, cross_category_count: int, | |
| retrieved_count: int, product_name: str, style_name: str, | |
| has_references: bool) -> str: | |
| """生成对比分析,说明为什么 GraphRAG 比传统 RAG 好""" | |
| if use_graph: | |
| if cross_category_count > 0: | |
| analysis = f"""📊 GraphRAG 优势分析 / GraphRAG Advantage Analysis: | |
| ✨ **跨品类检索能力** / Cross-Category Retrieval Capability: | |
| - GraphRAG 通过风格节点找到了 {cross_category_count} 个跨品类参考文案 | |
| - GraphRAG found {cross_category_count} cross-category reference copywriting through style nodes | |
| - 即使数据库中不存在"{product_name}"的"{style_name}"风格文案,也能通过风格关联找到其他产品的同风格文案 | |
| - Even if there is no "{style_name}" style copywriting for "{product_name}" in the database, it can find copywriting of the same style from other products through style associations | |
| 🎯 **风格一致性** / Style Consistency: | |
| - 通过图结构中的风格节点,确保检索到的文案风格高度一致 | |
| - Through style nodes in the graph structure, ensuring highly consistent style of retrieved copywriting | |
| - 可以学习跨品类产品的优秀文案模板和表达方式 | |
| - Can learn excellent copywriting templates and expression methods from cross-category products | |
| 💡 **冷启动问题解决** / Cold Start Problem Solution: | |
| - 对于新产品或冷门产品,GraphRAG 仍能找到相关参考文案 | |
| - For new products or niche products, GraphRAG can still find relevant reference copywriting | |
| - 传统 RAG 在这种情况下可能无法找到任何相关文案 | |
| - Traditional RAG may not be able to find any relevant copywriting in this situation""" | |
| else: | |
| analysis = f"""📊 GraphRAG 优势分析 / GraphRAG Advantage Analysis: | |
| ✨ **图结构优势** / Graph Structure Advantage: | |
| - GraphRAG 利用产品、风格、特征之间的图关系进行检索 | |
| - GraphRAG uses graph relationships between products, styles, and features for retrieval | |
| - 即使没有跨品类结果,也能通过图结构找到更相关的文案 | |
| - Even without cross-category results, it can find more relevant copywriting through graph structure | |
| 🎯 **语义理解增强** / Enhanced Semantic Understanding: | |
| - 通过图结构可以理解产品特征和风格之间的深层关联 | |
| - Can understand deep associations between product features and styles through graph structure | |
| - 检索结果更加精准和符合需求 | |
| - Retrieval results are more accurate and meet requirements better""" | |
| else: | |
| analysis = f"""📊 传统 RAG 局限性分析 / Traditional RAG Limitations Analysis: | |
| ⚠️ **依赖数据库内容** / Database Dependency: | |
| - 传统 RAG 只能找到语义相似的文案,完全依赖数据库中的现有内容 | |
| - Traditional RAG can only find semantically similar copywriting, completely dependent on existing content in the database | |
| - 如果数据库中没有"{product_name}"的"{style_name}"风格文案,可能无法生成合适的文案 | |
| - If there is no "{style_name}" style copywriting for "{product_name}" in the database, it may not be able to generate suitable copywriting | |
| 🔍 **检索范围有限** / Limited Retrieval Scope: | |
| - 只能通过向量相似度搜索,无法利用产品之间的关联关系 | |
| - Can only search through vector similarity, unable to utilize relationships between products | |
| - 对于新产品或冷门产品,可能找不到任何相关参考文案 | |
| - For new products or niche products, may not find any relevant reference copywriting | |
| 💡 **冷启动问题** / Cold Start Problem: | |
| - 当产品不在数据库中时,传统 RAG 无法找到任何参考 | |
| - When products are not in the database, traditional RAG cannot find any references | |
| - 生成的文案质量可能较差或不够相关 | |
| - Generated copywriting quality may be poor or irrelevant""" | |
| return analysis | |
| def _generate_template(self, reference_texts: List[str], product_name: str, style_name: str) -> str: | |
| """生成文案模板(简化版,实际应调用LLM)""" | |
| # 如果有参考文案,提取关键句式 | |
| key_phrases = [] | |
| if reference_texts: | |
| for text in reference_texts[:2]: # 只取前2个参考 | |
| # 提取关键句式(简单提取) | |
| if "避难所" in text: | |
| key_phrases.append("避难所") | |
| if "安静" in text: | |
| key_phrases.append("安静") | |
| if "唯一" in text: | |
| key_phrases.append("唯一") | |
| if "绝绝子" in text: | |
| key_phrases.append("绝绝子") | |
| # 根据风格和产品生成 | |
| if "清冷避世风" in style_name or "深夜emo" in style_name.lower(): | |
| if "眼罩" in product_name: | |
| if key_phrases: | |
| # GraphRAG:使用参考文案的句式 | |
| return f"戴上眼罩的这片刻漆黑,是我在繁杂城市里唯一的{'避难所' if '避难所' in key_phrases else '避风港'}。物理意义上的关灯,也是心理上的断联。世界终于{'安静了' if '安静' in key_phrases else '静下来了'},今晚只属于我自己。" | |
| else: | |
| # 传统RAG:没有参考,使用通用模板 | |
| return f"这个{product_name}真的很不错,遮光效果好,推荐给大家使用。" | |
| elif "CCD" in product_name or "相机" in product_name: | |
| return "深夜拿起它,在颗粒感的画面里,所有的情绪都有了出口。低像素不是缺陷,是另一种真实。" | |
| else: | |
| if key_phrases: | |
| return f"每一个与{product_name}的瞬间,都是我与世界的{'唯一连接' if '唯一' in key_phrases else '连接'}。" | |
| else: | |
| return f"这个{product_name}真的很不错,推荐给大家。" | |
| elif "疯狂种草" in style_name: | |
| if key_phrases and "绝绝子" in key_phrases: | |
| # GraphRAG:使用参考文案的语气 | |
| return f"家人们谁懂啊!这个{product_name}真的绝绝子,一秒沦陷!必须人手一个!" | |
| else: | |
| # 传统RAG:没有参考,使用通用语气 | |
| return f"这个{product_name}真的很不错,推荐给大家购买!" | |
| else: | |
| if key_phrases: | |
| return f"这个{product_name}真的很不错,{'强烈推荐' if '绝绝子' in key_phrases else '推荐'}给大家!" | |
| else: | |
| return f"这个{product_name}真的很不错,推荐给大家!" | |