AllenYkl commited on
Commit
15aca52
1 Parent(s): 221ae85

Update bin_public/utils/Pinecone.py

Browse files
Files changed (1) hide show
  1. bin_public/utils/Pinecone.py +71 -1
bin_public/utils/Pinecone.py CHANGED
@@ -104,4 +104,74 @@ def context_construction(api_key, query, model, pinecone_api_key, pinecone_api_e
104
  # response = chain.run(input_documents=docs, question=str(query))
105
  for i in docs:
106
  temp.append(i.page_content)
107
- return '用以下资料进行辅助回答\n' + ' '.join(temp), '\n' + ' '.join(temp), "Connecting to Pinecone"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # response = chain.run(input_documents=docs, question=str(query))
105
  for i in docs:
106
  temp.append(i.page_content)
107
+ return '用以下资料进行辅助回答\n' + ' '.join(temp), '\n' + ' '.join(temp), "Connecting to Pinecone"
108
+
109
+ def chat_prerequisites(input, filter, embeddings, top_k=4):
110
+ # filter : dic
111
+
112
+ # input_prompt = '只基于以下规范的两种分类对形如 "position_name: xx job_name: xx job_description: xxx"的描述进行分类,只要回复规范的类别名'
113
+ input_prompt = '接下来我会给你一段"不规范的招聘职位描述",以及4个用(选项一,选项二,选项三,选项四)四个选项表示的规范的职业分类描述。' \
114
+ '你需要将"不规范的招聘职位描述"归类为”选项一“或“选项二”或“选项三”或“选项四”。' \
115
+ '你只需要回复”选项一“或“选项二”或“选项三”或“选项四”,不要回复任何别的东西'
116
+ query = input_prompt + input
117
+
118
+ temp = []
119
+ docsearch = Pinecone.from_existing_index(index_name=pinecone.list_indexes()[0], embedding=embeddings)
120
+ docs = docsearch.similarity_search(query, k=top_k, filter=filter)
121
+ for index, i in enumerate(docs):
122
+ if index == 0:
123
+ temp.append("选项一:" + i.page_content + "##")
124
+ if index == 1:
125
+ temp.append("选项二:" + i.page_content + "##")
126
+ if index == 2:
127
+ temp.append("选项三:" + i.page_content + "##")
128
+ if index == 3:
129
+ temp.append("选项四:" + i.page_content + "##")
130
+
131
+ system_prompt = ' '.join(temp)
132
+
133
+ return system_prompt, query
134
+
135
+
136
+ def chat(input, filter, embeddings):
137
+ system_prompt, query = chat_prerequisites(input, filter, embeddings)
138
+ logger.info('prerequisites satisfied')
139
+ completion = openai.ChatCompletion.create(
140
+ model="gpt-3.5-turbo",
141
+ messages=[
142
+ {"role": "system", "content": system_prompt},
143
+ {"role": "user", "content": query}
144
+ ])
145
+ return completion.choices[0].message['content'], system_prompt
146
+
147
+ def chat_data_cleaning(input):
148
+ clean_prompt = '我要求你提取出这段文字中的岗位名称、岗位描述(用一句或者两句话概括),去除无关紧要的信息,比如工资,地点等等,并严格遵守"岗位名称: xxx # 岗位描述: xxx # "的格式进行回复'
149
+ completion = openai.ChatCompletion.create(
150
+ model="gpt-3.5-turbo",
151
+ messages=[
152
+ {"role": "system", "content": clean_prompt},
153
+ {"role": "user", "content": clean_prompt + input}
154
+ ])
155
+ return completion.choices[0].message['content']
156
+
157
+ def local_emb2pinecone(PINECONE_API_KEY, PINECONE_API_ENV, level, emb_path, text_path, delete=False):
158
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
159
+ logger.info('Pinecone initialized')
160
+ logger.info(pinecone.list_indexes()[0])
161
+ l = load_json(emb_path)
162
+ print(f'level{level} loaded')
163
+ with open(text_path, 'r', encoding='utf-8') as f:
164
+ texts = f.readlines()
165
+ texts = [i.replace('\n', '') for i in texts]
166
+ index = pinecone.Index(pinecone.list_indexes()[0])
167
+ if delete:
168
+ if input('press y to delete all the vectors: ') == 'y':
169
+ index.delete(delete_all=True)
170
+ logger.info('delete all')
171
+ else:
172
+ pass
173
+ else:
174
+ pass
175
+ for key, value, text in zip(list(l.keys()), list(l.values()), texts):
176
+ index.upsert([(key, value, {"text": text, "level": level})])
177
+ logger.info('upload successfully')