Tim-Saijun commited on
Commit
004731a
·
1 Parent(s): ea32640

feat: 关键词助手的流程实现

Browse files
Files changed (2) hide show
  1. server.py +53 -0
  2. 关键词助手.ipynb +238 -0
server.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2024/6/5 上午7:24
4
+ # @Author : Tim-Saijun https://zair.top
5
+ # @File : langchain_tutorials.py
6
+ # @Project : SAgent
7
+
8
+ from typing import List
9
+
10
+ from fastapi import FastAPI
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_openai import ChatOpenAI
14
+ from langserve import add_routes
15
+
16
+ # 1. Create prompt template
17
+ system_template = "Translate the following into {language}:"
18
+ prompt_template = ChatPromptTemplate.from_messages([
19
+ ('system', system_template),
20
+ ('user', '{text}')
21
+ ])
22
+
23
+ # 2. Create model
24
+ model = ChatOpenAI()
25
+
26
+ # 3. Create parser
27
+ parser = StrOutputParser()
28
+
29
+ # 4. Create chain
30
+ chain = prompt_template | model | parser
31
+
32
+
33
+ # 4. App definition
34
+ app = FastAPI(
35
+ title="LangChain Server",
36
+ version="1.0",
37
+ description="A simple API server using LangChain's Runnable interfaces",
38
+ )
39
+
40
+ # 5. Adding chain route
41
+
42
+ add_routes(
43
+ app,
44
+ chain,
45
+ path="/chain",
46
+ )
47
+
48
+ if __name__ == "__main__":
49
+ import uvicorn
50
+
51
+ uvicorn.run(app, host="localhost", port=8000)
52
+
53
+
关键词助手.ipynb ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2024-06-05T09:41:38.526785Z",
10
+ "start_time": "2024-06-05T09:41:37.319136Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "from IPython.display import display, Markdown\n",
15
+ "\n",
16
+ "from langchain_core.prompts import ChatPromptTemplate\n",
17
+ "from langchain_core.output_parsers import StrOutputParser\n",
18
+ "from langchain_openai import ChatOpenAI\n",
19
+ "\n",
20
+ "model = ChatOpenAI(model = \"gpt-4o\",\n",
21
+ " temperature = 0.1,\n",
22
+ " max_tokens = 4096,\n",
23
+ " # presence_penalty = 1,\n",
24
+ " )\n",
25
+ "parser = StrOutputParser()"
26
+ ],
27
+ "outputs": [],
28
+ "execution_count": 83
29
+ },
30
+ {
31
+ "metadata": {},
32
+ "cell_type": "markdown",
33
+ "source": "# 1. 关键词筛选",
34
+ "id": "30ff1e47cd72de9f"
35
+ },
36
+ {
37
+ "metadata": {
38
+ "ExecuteTime": {
39
+ "end_time": "2024-06-05T09:51:52.767269Z",
40
+ "start_time": "2024-06-05T09:51:52.764220Z"
41
+ }
42
+ },
43
+ "cell_type": "code",
44
+ "source": [
45
+ "# 1. Create prompt template\n",
46
+ "filter_system_template = \"\"\"\n",
47
+ "进行关键词分析,筛选出拓展词列表中与目标关键词高度相关的关键词,具体要求如下:\n",
48
+ "- 根据目标关键词,评估拓展词列表中每个关键词的相关性,删除相关性不高的关键词。\n",
49
+ "- 删除拓展词列表中的所有品牌词。\n",
50
+ "- 删除拓展词列表中的所有平台名称。\n",
51
+ "- 搜索意图匹配:确保每个关键词都与目标用户的搜索意图相匹配。不符合目标用户搜索意图的关键词应予以删除。\n",
52
+ "- 长尾关键词优先:倾向于保留具有具体意图且通常竞争较小的长尾关键词。\n",
53
+ "- 内容覆盖度:确保所选关键词能够被现有内容或计划中的内容充分覆盖,与网站内容策略相符合。\n",
54
+ "## Output:\n",
55
+ "直接输出筛选后的关键词列表,每行输出一个关键词。\n",
56
+ "\"\"\"\n",
57
+ "filter_prompt_template = ChatPromptTemplate.from_messages([\n",
58
+ " ('system', filter_system_template),\n",
59
+ " ('user', '目标关键词:{corekeywords} 拓展词列表:{relatedkeywords}')\n",
60
+ "])\n"
61
+ ],
62
+ "id": "7059fa2d895f586d",
63
+ "outputs": [],
64
+ "execution_count": 141
65
+ },
66
+ {
67
+ "metadata": {
68
+ "ExecuteTime": {
69
+ "end_time": "2024-06-05T09:50:55.619759Z",
70
+ "start_time": "2024-06-05T09:50:55.613906Z"
71
+ }
72
+ },
73
+ "cell_type": "code",
74
+ "source": [
75
+ "filter_prompt_template.invoke({'corekeywords': \"打印机\",\n",
76
+ " 'relatedkeywords': \"激光打印机 喷墨打印机\"})"
77
+ ],
78
+ "id": "98fd18304ff75856",
79
+ "outputs": [
80
+ {
81
+ "data": {
82
+ "text/plain": [
83
+ "ChatPromptValue(messages=[SystemMessage(content='\\n进行关键词分析,筛选出拓展词列表中与目标关键词高度相关的关键词,具体要求如下:\\n- 根据目标关键词,评估拓展词列表中每个关键词的相关性,删除相关性不高的关键词。\\n- 删除拓展词列表中的所有品牌词。\\n- 删除拓展词列表中的所有平台名称。\\n- 搜索意图匹配:确保每个关键词都与目标用户的搜索意图相匹配。不符合目标用户搜索意图的关键词应予以删除。\\n- 长尾关键词优先:倾向于保留具有具体意图且通常竞争较小的长尾关键词。\\n- 内容覆盖度:确保所选关键词能够被现有内容或计划中的内容充分覆盖,与网站内容策略相符合。\\n## Output:\\n直接输出筛选后的关键词列表,每行输出一个关键词。\\n'), HumanMessage(content='目标关键词:打印机 拓展词列表:激光打印机 喷墨打印机')])"
84
+ ]
85
+ },
86
+ "execution_count": 118,
87
+ "metadata": {},
88
+ "output_type": "execute_result"
89
+ }
90
+ ],
91
+ "execution_count": 118
92
+ },
93
+ {
94
+ "metadata": {
95
+ "ExecuteTime": {
96
+ "end_time": "2024-06-05T09:53:52.219922Z",
97
+ "start_time": "2024-06-05T09:53:51.496246Z"
98
+ }
99
+ },
100
+ "cell_type": "code",
101
+ "source": [
102
+ "chain = filter_prompt_template | model | parser\n",
103
+ "filter_res = chain.invoke({'corekeywords': \"打印机\",\n",
104
+ " 'relatedkeywords': \"激光打印机 喷墨打印机 汉堡王 京东商城 亚马逊 活字印刷术 印刷制品\"})\n",
105
+ "display(Markdown(filter_res))"
106
+ ],
107
+ "id": "25cc895e34e2ac5e",
108
+ "outputs": [
109
+ {
110
+ "data": {
111
+ "text/plain": [
112
+ "<IPython.core.display.Markdown object>"
113
+ ],
114
+ "text/markdown": "激光打印机 \n喷墨打印机"
115
+ },
116
+ "metadata": {},
117
+ "output_type": "display_data"
118
+ }
119
+ ],
120
+ "execution_count": 149
121
+ },
122
+ {
123
+ "metadata": {},
124
+ "cell_type": "markdown",
125
+ "source": "# 2. 筛选结果确认",
126
+ "id": "2569efb1c53bffd9"
127
+ },
128
+ {
129
+ "metadata": {},
130
+ "cell_type": "markdown",
131
+ "source": "# 3. 关键词分类",
132
+ "id": "792a4d9ab769d833"
133
+ },
134
+ {
135
+ "metadata": {
136
+ "ExecuteTime": {
137
+ "end_time": "2024-06-05T08:41:57.856976Z",
138
+ "start_time": "2024-06-05T08:41:57.851765Z"
139
+ }
140
+ },
141
+ "cell_type": "code",
142
+ "source": [
143
+ "classify_system_template = \"\"\"\n",
144
+ "## Role:\n",
145
+ "你是一名具有二十年工作经验的Google SEO专家,擅长快速制定SEO策略。\n",
146
+ "## Background:\n",
147
+ "我需要快速进行关键词分类,请参考用户提供的关键词列表,并按照指定的维度进行分类。\n",
148
+ "## Constraints:\n",
149
+ "1. 关键词列表: {key_words}\n",
150
+ "\n",
151
+ "2. 关键词分类维度: 颜色、规格、属性、材质、功能、样式、型号、品牌、商业词、地域词、文章词。\n",
152
+ "3. 以下关键词均属于商业词类别: manufacture, supplier, wholesale, OEM, ODM, company, factory, private label, Custom, bulk, Import, Export, Agent, Dealer, distributor。\n",
153
+ "## Workflows:\n",
154
+ "1. 将关键词列表中的每个词按照[Constraints]中关键词分类维度进行分类。\n",
155
+ "2. 单个关键词只能隶属于一个关键词分类维度,不能同时属于多个分类。\n",
156
+ "3. 每个关键词必须进行一次分类,不得遗漏。\n",
157
+ "4. 分类完毕后,在对应的分类字段中填写关键词中的修饰词。\n",
158
+ "5. 确保关键词列表中的每个词都被执行分类。\n",
159
+ "## Output:\n",
160
+ "1. 请用表格输出。\n",
161
+ "2. 不要任何解释,直接输出表格。\n",
162
+ "## Output Examples:\n",
163
+ "| 目标关键词 | 颜色 | 规格 | 属性 | 材质 | 功能 | 样式 | 型号 | 品牌 | 商业词 | 地域词 | 文章词 |\n",
164
+ "|------------|------|------|------|------|------|------|------|------|--------|--------|--------|\n",
165
+ "| 关键词1 | | | | | | | | | | | |\n",
166
+ "| 关键词2 | | | | | | | | | | | |\n",
167
+ "| 关键词3 | | | | | | | | | | | |\n",
168
+ "## Notice:\n",
169
+ "你必须将关键词列表中的每个关键词全部执行一次关键词分类,不得遗漏。每个关键词只能属于一个分类维度,不能同时出现在多个分类中。在分类字段中只填写关键词中的修饰词。\n",
170
+ "## Initialization:\n",
171
+ "作为[Role],在[Background]下,回顾你的技能,严格遵守[Constraints]进行关键词分类,按[Workflows]执行流程,按[Output]格式要求输出。\n",
172
+ "## Additional Constraints:\n",
173
+ "1. 确保所有关键词都被分类,不得遗漏。\n",
174
+ "2. 分类完毕后,检查表格,确保每个关键词都被准确分类。\n",
175
+ "3. 用户输入的目标关键词中的任何单词不参与关键词分类。\n",
176
+ "\"\"\"\n",
177
+ "classify_prompt_template = ChatPromptTemplate.from_messages([\n",
178
+ " ('system', classify_system_template),\n",
179
+ " ('user', '目标关键词:{target_keywords}')\n",
180
+ "])\n",
181
+ "# res = classify_prompt_template.invoke({'target_keywords': \"打印机\", 'key_words': \"激光打印机 喷墨打印机\"})\n",
182
+ "# res.to_messages()"
183
+ ],
184
+ "id": "2c72b992f3a15f94",
185
+ "outputs": [],
186
+ "execution_count": 40
187
+ },
188
+ {
189
+ "metadata": {
190
+ "ExecuteTime": {
191
+ "end_time": "2024-06-05T09:54:05.635944Z",
192
+ "start_time": "2024-06-05T09:54:03.158609Z"
193
+ }
194
+ },
195
+ "cell_type": "code",
196
+ "source": [
197
+ "classify_chain = classify_prompt_template | model | parser\n",
198
+ "class_res = classify_chain.invoke({'target_keywords': \"打印机\", 'key_words': filter_res})\n",
199
+ "display(Markdown(class_res))"
200
+ ],
201
+ "id": "2db5d8e8d991213e",
202
+ "outputs": [
203
+ {
204
+ "data": {
205
+ "text/plain": [
206
+ "<IPython.core.display.Markdown object>"
207
+ ],
208
+ "text/markdown": "| 目标关键词 | 颜色 | 规格 | 属性 | 材质 | 功能 | 样式 | 型号 | 品牌 | 商业词 | 地域词 | 文章词 |\n|------------|------|------|------|------|------|------|------|------|--------|--------|--------|\n| 激光打印机 | | | | | 激光 | | | | | | |\n| 喷墨打印机 | | | | | 喷墨 | | | | | | |"
209
+ },
210
+ "metadata": {},
211
+ "output_type": "display_data"
212
+ }
213
+ ],
214
+ "execution_count": 150
215
+ }
216
+ ],
217
+ "metadata": {
218
+ "kernelspec": {
219
+ "display_name": "Python 3",
220
+ "language": "python",
221
+ "name": "python3"
222
+ },
223
+ "language_info": {
224
+ "codemirror_mode": {
225
+ "name": "ipython",
226
+ "version": 2
227
+ },
228
+ "file_extension": ".py",
229
+ "mimetype": "text/x-python",
230
+ "name": "python",
231
+ "nbconvert_exporter": "python",
232
+ "pygments_lexer": "ipython2",
233
+ "version": "2.7.6"
234
+ }
235
+ },
236
+ "nbformat": 4,
237
+ "nbformat_minor": 5
238
+ }