Spaces:
Paused
Paused
Commit
·
004731a
1
Parent(s):
ea32640
feat: 关键词助手的流程实现
Browse files- server.py +53 -0
- 关键词助手.ipynb +238 -0
server.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Time : 2024/6/5 上午7:24
|
4 |
+
# @Author : Tim-Saijun https://zair.top
|
5 |
+
# @File : langchain_tutorials.py
|
6 |
+
# @Project : SAgent
|
7 |
+
|
8 |
+
from typing import List
|
9 |
+
|
10 |
+
from fastapi import FastAPI
|
11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
12 |
+
from langchain_core.output_parsers import StrOutputParser
|
13 |
+
from langchain_openai import ChatOpenAI
|
14 |
+
from langserve import add_routes
|
15 |
+
|
16 |
+
# 1. Create prompt template
|
17 |
+
system_template = "Translate the following into {language}:"
|
18 |
+
prompt_template = ChatPromptTemplate.from_messages([
|
19 |
+
('system', system_template),
|
20 |
+
('user', '{text}')
|
21 |
+
])
|
22 |
+
|
23 |
+
# 2. Create model
|
24 |
+
model = ChatOpenAI()
|
25 |
+
|
26 |
+
# 3. Create parser
|
27 |
+
parser = StrOutputParser()
|
28 |
+
|
29 |
+
# 4. Create chain
|
30 |
+
chain = prompt_template | model | parser
|
31 |
+
|
32 |
+
|
33 |
+
# 4. App definition
|
34 |
+
app = FastAPI(
|
35 |
+
title="LangChain Server",
|
36 |
+
version="1.0",
|
37 |
+
description="A simple API server using LangChain's Runnable interfaces",
|
38 |
+
)
|
39 |
+
|
40 |
+
# 5. Adding chain route
|
41 |
+
|
42 |
+
add_routes(
|
43 |
+
app,
|
44 |
+
chain,
|
45 |
+
path="/chain",
|
46 |
+
)
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
import uvicorn
|
50 |
+
|
51 |
+
uvicorn.run(app, host="localhost", port=8000)
|
52 |
+
|
53 |
+
|
关键词助手.ipynb
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"id": "initial_id",
|
6 |
+
"metadata": {
|
7 |
+
"collapsed": true,
|
8 |
+
"ExecuteTime": {
|
9 |
+
"end_time": "2024-06-05T09:41:38.526785Z",
|
10 |
+
"start_time": "2024-06-05T09:41:37.319136Z"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"source": [
|
14 |
+
"from IPython.display import display, Markdown\n",
|
15 |
+
"\n",
|
16 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
17 |
+
"from langchain_core.output_parsers import StrOutputParser\n",
|
18 |
+
"from langchain_openai import ChatOpenAI\n",
|
19 |
+
"\n",
|
20 |
+
"model = ChatOpenAI(model = \"gpt-4o\",\n",
|
21 |
+
" temperature = 0.1,\n",
|
22 |
+
" max_tokens = 4096,\n",
|
23 |
+
" # presence_penalty = 1,\n",
|
24 |
+
" )\n",
|
25 |
+
"parser = StrOutputParser()"
|
26 |
+
],
|
27 |
+
"outputs": [],
|
28 |
+
"execution_count": 83
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"metadata": {},
|
32 |
+
"cell_type": "markdown",
|
33 |
+
"source": "# 1. 关键词筛选",
|
34 |
+
"id": "30ff1e47cd72de9f"
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"metadata": {
|
38 |
+
"ExecuteTime": {
|
39 |
+
"end_time": "2024-06-05T09:51:52.767269Z",
|
40 |
+
"start_time": "2024-06-05T09:51:52.764220Z"
|
41 |
+
}
|
42 |
+
},
|
43 |
+
"cell_type": "code",
|
44 |
+
"source": [
|
45 |
+
"# 1. Create prompt template\n",
|
46 |
+
"filter_system_template = \"\"\"\n",
|
47 |
+
"进行关键词分析,筛选出拓展词列表中与目标关键词高度相关的关键词,具体要求如下:\n",
|
48 |
+
"- 根据目标关键词,评估拓展词列表中每个关键词的相关性,删除相关性不高的关键词。\n",
|
49 |
+
"- 删除拓展词列表中的所有品牌词。\n",
|
50 |
+
"- 删除拓展词列表中的所有平台名称。\n",
|
51 |
+
"- 搜索意图匹配:确保每个关键词都与目标用户的搜索意图相匹配。不符合目标用户搜索意图的关键词应予以删除。\n",
|
52 |
+
"- 长尾关键词优先:倾向于保留具有具体意图且通常竞争较小的长尾关键词。\n",
|
53 |
+
"- 内容覆盖度:确保所选关键词能够被现有内容或计划中的内容充分覆盖,与网站内容策略相符合。\n",
|
54 |
+
"## Output:\n",
|
55 |
+
"直接输出筛选后的关键词列表,每行输出一个关键词。\n",
|
56 |
+
"\"\"\"\n",
|
57 |
+
"filter_prompt_template = ChatPromptTemplate.from_messages([\n",
|
58 |
+
" ('system', filter_system_template),\n",
|
59 |
+
" ('user', '目标关键词:{corekeywords} 拓展词列表:{relatedkeywords}')\n",
|
60 |
+
"])\n"
|
61 |
+
],
|
62 |
+
"id": "7059fa2d895f586d",
|
63 |
+
"outputs": [],
|
64 |
+
"execution_count": 141
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"metadata": {
|
68 |
+
"ExecuteTime": {
|
69 |
+
"end_time": "2024-06-05T09:50:55.619759Z",
|
70 |
+
"start_time": "2024-06-05T09:50:55.613906Z"
|
71 |
+
}
|
72 |
+
},
|
73 |
+
"cell_type": "code",
|
74 |
+
"source": [
|
75 |
+
"filter_prompt_template.invoke({'corekeywords': \"打印机\",\n",
|
76 |
+
" 'relatedkeywords': \"激光打印机 喷墨打印机\"})"
|
77 |
+
],
|
78 |
+
"id": "98fd18304ff75856",
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"data": {
|
82 |
+
"text/plain": [
|
83 |
+
"ChatPromptValue(messages=[SystemMessage(content='\\n进行关键词分析,筛选出拓展词列表中与目标关键词高度相关的关键词,具体要求如下:\\n- 根据目标关键词,评估拓展词列表中每个关键词的相关性,删除相关性不高的关键词。\\n- 删除拓展词列表中的所有品牌词。\\n- 删除拓展词列表中的所有平台名称。\\n- 搜索意图匹配:确保每个关键词都与目标用户的搜索意图相匹配。不符合目标用户搜索意图的关键词应予以删除。\\n- 长尾关键词优先:倾向于保留具有具体意图且通常竞争较小的长尾关键词。\\n- 内容覆盖度:确保所选关键词能够被现有内容或计划中的内容充分覆盖,与网站内容策略相符合。\\n## Output:\\n直接输出筛选后的关键词列表,每行输出一个关键词。\\n'), HumanMessage(content='目标关键词:打印机 拓展词列表:激光打印机 喷墨打印机')])"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
"execution_count": 118,
|
87 |
+
"metadata": {},
|
88 |
+
"output_type": "execute_result"
|
89 |
+
}
|
90 |
+
],
|
91 |
+
"execution_count": 118
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"metadata": {
|
95 |
+
"ExecuteTime": {
|
96 |
+
"end_time": "2024-06-05T09:53:52.219922Z",
|
97 |
+
"start_time": "2024-06-05T09:53:51.496246Z"
|
98 |
+
}
|
99 |
+
},
|
100 |
+
"cell_type": "code",
|
101 |
+
"source": [
|
102 |
+
"chain = filter_prompt_template | model | parser\n",
|
103 |
+
"filter_res = chain.invoke({'corekeywords': \"打印机\",\n",
|
104 |
+
" 'relatedkeywords': \"激光打印机 喷墨打印机 汉堡王 京东商城 亚马逊 活字印刷术 印刷制品\"})\n",
|
105 |
+
"display(Markdown(filter_res))"
|
106 |
+
],
|
107 |
+
"id": "25cc895e34e2ac5e",
|
108 |
+
"outputs": [
|
109 |
+
{
|
110 |
+
"data": {
|
111 |
+
"text/plain": [
|
112 |
+
"<IPython.core.display.Markdown object>"
|
113 |
+
],
|
114 |
+
"text/markdown": "激光打印机 \n喷墨打印机"
|
115 |
+
},
|
116 |
+
"metadata": {},
|
117 |
+
"output_type": "display_data"
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"execution_count": 149
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"metadata": {},
|
124 |
+
"cell_type": "markdown",
|
125 |
+
"source": "# 2. 筛选结果确认",
|
126 |
+
"id": "2569efb1c53bffd9"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"metadata": {},
|
130 |
+
"cell_type": "markdown",
|
131 |
+
"source": "# 3. 关键词分类",
|
132 |
+
"id": "792a4d9ab769d833"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"metadata": {
|
136 |
+
"ExecuteTime": {
|
137 |
+
"end_time": "2024-06-05T08:41:57.856976Z",
|
138 |
+
"start_time": "2024-06-05T08:41:57.851765Z"
|
139 |
+
}
|
140 |
+
},
|
141 |
+
"cell_type": "code",
|
142 |
+
"source": [
|
143 |
+
"classify_system_template = \"\"\"\n",
|
144 |
+
"## Role:\n",
|
145 |
+
"你是一名具有二十年工作经验的Google SEO专家,擅长快速制定SEO策略。\n",
|
146 |
+
"## Background:\n",
|
147 |
+
"我需要快速进行关键词分类,请参考用户提供的关键词列表,并按照指定的维度进行分类。\n",
|
148 |
+
"## Constraints:\n",
|
149 |
+
"1. 关键词列表: {key_words}\n",
|
150 |
+
"\n",
|
151 |
+
"2. 关键词分类维度: 颜色、规格、属性、材质、功能、样式、型号、品牌、商业词、地域词、文章词。\n",
|
152 |
+
"3. 以下关键词均属于商业词类别: manufacture, supplier, wholesale, OEM, ODM, company, factory, private label, Custom, bulk, Import, Export, Agent, Dealer, distributor。\n",
|
153 |
+
"## Workflows:\n",
|
154 |
+
"1. 将关键词列表中的每个词按照[Constraints]中关键词分类维度进行分类。\n",
|
155 |
+
"2. 单个关键词只能隶属于一个关键词分类维度,不能同时属于多个分类。\n",
|
156 |
+
"3. 每个关键词必须进行一次分类,不得遗漏。\n",
|
157 |
+
"4. 分类完毕后,在对应的分类字段中填写关键词中的修饰词。\n",
|
158 |
+
"5. 确保关键词列表中的每个词都被执行分类。\n",
|
159 |
+
"## Output:\n",
|
160 |
+
"1. 请用表格输出。\n",
|
161 |
+
"2. 不要任何解释,直接输出表格。\n",
|
162 |
+
"## Output Examples:\n",
|
163 |
+
"| 目标关键词 | 颜色 | 规格 | 属性 | 材质 | 功能 | 样式 | 型号 | 品牌 | 商业词 | 地域词 | 文章词 |\n",
|
164 |
+
"|------------|------|------|------|------|------|------|------|------|--------|--------|--------|\n",
|
165 |
+
"| 关键词1 | | | | | | | | | | | |\n",
|
166 |
+
"| 关键词2 | | | | | | | | | | | |\n",
|
167 |
+
"| 关键词3 | | | | | | | | | | | |\n",
|
168 |
+
"## Notice:\n",
|
169 |
+
"你必须将关键词列表中的每个关键词全部执行一次关键词分类,不得遗漏。每个关键词只能属于一个分类维度,不能同时出现在多个分类中。在分类字段中只填写关键词中的修饰词。\n",
|
170 |
+
"## Initialization:\n",
|
171 |
+
"作为[Role],在[Background]下,回顾你的技能,严格遵守[Constraints]进行关键词分类,按[Workflows]执行流程,按[Output]格式要求输出。\n",
|
172 |
+
"## Additional Constraints:\n",
|
173 |
+
"1. 确保所有关键词都被分类,不得遗漏。\n",
|
174 |
+
"2. 分类完毕后,检查表格,确保每个关键词都被准确分类。\n",
|
175 |
+
"3. 用户输入的目标关键词中的任何单词不参与关键词分类。\n",
|
176 |
+
"\"\"\"\n",
|
177 |
+
"classify_prompt_template = ChatPromptTemplate.from_messages([\n",
|
178 |
+
" ('system', classify_system_template),\n",
|
179 |
+
" ('user', '目标关键词:{target_keywords}')\n",
|
180 |
+
"])\n",
|
181 |
+
"# res = classify_prompt_template.invoke({'target_keywords': \"打印机\", 'key_words': \"激光打印机 喷墨打印机\"})\n",
|
182 |
+
"# res.to_messages()"
|
183 |
+
],
|
184 |
+
"id": "2c72b992f3a15f94",
|
185 |
+
"outputs": [],
|
186 |
+
"execution_count": 40
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"metadata": {
|
190 |
+
"ExecuteTime": {
|
191 |
+
"end_time": "2024-06-05T09:54:05.635944Z",
|
192 |
+
"start_time": "2024-06-05T09:54:03.158609Z"
|
193 |
+
}
|
194 |
+
},
|
195 |
+
"cell_type": "code",
|
196 |
+
"source": [
|
197 |
+
"classify_chain = classify_prompt_template | model | parser\n",
|
198 |
+
"class_res = classify_chain.invoke({'target_keywords': \"打印机\", 'key_words': filter_res})\n",
|
199 |
+
"display(Markdown(class_res))"
|
200 |
+
],
|
201 |
+
"id": "2db5d8e8d991213e",
|
202 |
+
"outputs": [
|
203 |
+
{
|
204 |
+
"data": {
|
205 |
+
"text/plain": [
|
206 |
+
"<IPython.core.display.Markdown object>"
|
207 |
+
],
|
208 |
+
"text/markdown": "| 目标关键词 | 颜色 | 规格 | 属性 | 材质 | 功能 | 样式 | 型号 | 品牌 | 商业词 | 地域词 | 文章词 |\n|------------|------|------|------|------|------|------|------|------|--------|--------|--------|\n| 激光打印机 | | | | | 激光 | | | | | | |\n| 喷墨打印机 | | | | | 喷墨 | | | | | | |"
|
209 |
+
},
|
210 |
+
"metadata": {},
|
211 |
+
"output_type": "display_data"
|
212 |
+
}
|
213 |
+
],
|
214 |
+
"execution_count": 150
|
215 |
+
}
|
216 |
+
],
|
217 |
+
"metadata": {
|
218 |
+
"kernelspec": {
|
219 |
+
"display_name": "Python 3",
|
220 |
+
"language": "python",
|
221 |
+
"name": "python3"
|
222 |
+
},
|
223 |
+
"language_info": {
|
224 |
+
"codemirror_mode": {
|
225 |
+
"name": "ipython",
|
226 |
+
"version": 2
|
227 |
+
},
|
228 |
+
"file_extension": ".py",
|
229 |
+
"mimetype": "text/x-python",
|
230 |
+
"name": "python",
|
231 |
+
"nbconvert_exporter": "python",
|
232 |
+
"pygments_lexer": "ipython2",
|
233 |
+
"version": "2.7.6"
|
234 |
+
}
|
235 |
+
},
|
236 |
+
"nbformat": 4,
|
237 |
+
"nbformat_minor": 5
|
238 |
+
}
|