Spaces:
Running
Running
Update bin_public/utils/Pinecone.py
Browse files- bin_public/utils/Pinecone.py +71 -1
bin_public/utils/Pinecone.py
CHANGED
@@ -104,4 +104,74 @@ def context_construction(api_key, query, model, pinecone_api_key, pinecone_api_e
|
|
104 |
# response = chain.run(input_documents=docs, question=str(query))
|
105 |
for i in docs:
|
106 |
temp.append(i.page_content)
|
107 |
-
return '用以下资料进行辅助回答\n' + ' '.join(temp), '\n' + ' '.join(temp), "Connecting to Pinecone"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# response = chain.run(input_documents=docs, question=str(query))
|
105 |
for i in docs:
|
106 |
temp.append(i.page_content)
|
107 |
+
return '用以下资料进行辅助回答\n' + ' '.join(temp), '\n' + ' '.join(temp), "Connecting to Pinecone"
|
108 |
+
|
109 |
+
def chat_prerequisites(input, filter, embeddings, top_k=4):
|
110 |
+
# filter : dic
|
111 |
+
|
112 |
+
# input_prompt = '只基于以下规范的两种分类对形如 "position_name: xx job_name: xx job_description: xxx"的描述进行分类,只要回复规范的类别名'
|
113 |
+
input_prompt = '接下来我会给你一段"不规范的招聘职位描述",以及4个用(选项一,选项二,选项三,选项四)四个选项表示的规范的职业分类描述。' \
|
114 |
+
'你需要将"不规范的招聘职位描述"归类为”选项一“或“选项二”或“选项三”或“选项四”。' \
|
115 |
+
'你只需要回复”选项一“或“选项二”或“选项三”或“选项四”,不要回复任何别的东西'
|
116 |
+
query = input_prompt + input
|
117 |
+
|
118 |
+
temp = []
|
119 |
+
docsearch = Pinecone.from_existing_index(index_name=pinecone.list_indexes()[0], embedding=embeddings)
|
120 |
+
docs = docsearch.similarity_search(query, k=top_k, filter=filter)
|
121 |
+
for index, i in enumerate(docs):
|
122 |
+
if index == 0:
|
123 |
+
temp.append("选项一:" + i.page_content + "##")
|
124 |
+
if index == 1:
|
125 |
+
temp.append("选项二:" + i.page_content + "##")
|
126 |
+
if index == 2:
|
127 |
+
temp.append("选项三:" + i.page_content + "##")
|
128 |
+
if index == 3:
|
129 |
+
temp.append("选项四:" + i.page_content + "##")
|
130 |
+
|
131 |
+
system_prompt = ' '.join(temp)
|
132 |
+
|
133 |
+
return system_prompt, query
|
134 |
+
|
135 |
+
|
136 |
+
def chat(input, filter, embeddings):
|
137 |
+
system_prompt, query = chat_prerequisites(input, filter, embeddings)
|
138 |
+
logger.info('prerequisites satisfied')
|
139 |
+
completion = openai.ChatCompletion.create(
|
140 |
+
model="gpt-3.5-turbo",
|
141 |
+
messages=[
|
142 |
+
{"role": "system", "content": system_prompt},
|
143 |
+
{"role": "user", "content": query}
|
144 |
+
])
|
145 |
+
return completion.choices[0].message['content'], system_prompt
|
146 |
+
|
147 |
+
def chat_data_cleaning(input):
|
148 |
+
clean_prompt = '我要求你提取出这段文字中的岗位名称、岗位描述(用一句或者两句话概括),去除无关紧要的信息,比如工资,地点等等,并严格遵守"岗位名称: xxx # 岗位描述: xxx # "的格式进行回复'
|
149 |
+
completion = openai.ChatCompletion.create(
|
150 |
+
model="gpt-3.5-turbo",
|
151 |
+
messages=[
|
152 |
+
{"role": "system", "content": clean_prompt},
|
153 |
+
{"role": "user", "content": clean_prompt + input}
|
154 |
+
])
|
155 |
+
return completion.choices[0].message['content']
|
156 |
+
|
157 |
+
def local_emb2pinecone(PINECONE_API_KEY, PINECONE_API_ENV, level, emb_path, text_path, delete=False):
|
158 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
|
159 |
+
logger.info('Pinecone initialized')
|
160 |
+
logger.info(pinecone.list_indexes()[0])
|
161 |
+
l = load_json(emb_path)
|
162 |
+
print(f'level{level} loaded')
|
163 |
+
with open(text_path, 'r', encoding='utf-8') as f:
|
164 |
+
texts = f.readlines()
|
165 |
+
texts = [i.replace('\n', '') for i in texts]
|
166 |
+
index = pinecone.Index(pinecone.list_indexes()[0])
|
167 |
+
if delete:
|
168 |
+
if input('press y to delete all the vectors: ') == 'y':
|
169 |
+
index.delete(delete_all=True)
|
170 |
+
logger.info('delete all')
|
171 |
+
else:
|
172 |
+
pass
|
173 |
+
else:
|
174 |
+
pass
|
175 |
+
for key, value, text in zip(list(l.keys()), list(l.values()), texts):
|
176 |
+
index.upsert([(key, value, {"text": text, "level": level})])
|
177 |
+
logger.info('upload successfully')
|