Tuchuanhuhuhu commited on
Commit
18cf6f9
1 Parent(s): bb4c268

feat: 切换不同索引类型时自动重建索引

Browse files
modules/index_func.py CHANGED
@@ -89,6 +89,7 @@ def construct_index(
89
  chunk_size_limit=600,
90
  embedding_limit=None,
91
  separator=" ",
 
92
  ):
93
  from langchain.chat_models import ChatOpenAI
94
  from langchain.vectorstores import FAISS
@@ -116,7 +117,7 @@ def construct_index(
116
  else:
117
  embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
118
  model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
119
- if os.path.exists(index_path):
120
  logging.info("找到了缓存的索引文件,加载中……")
121
  return FAISS.load_local(index_path, embeddings)
122
  else:
 
89
  chunk_size_limit=600,
90
  embedding_limit=None,
91
  separator=" ",
92
+ load_from_cache_if_possible=True,
93
  ):
94
  from langchain.chat_models import ChatOpenAI
95
  from langchain.vectorstores import FAISS
 
117
  else:
118
  embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
119
  model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
120
+ if os.path.exists(index_path) and load_from_cache_if_possible:
121
  logging.info("找到了缓存的索引文件,加载中……")
122
  return FAISS.load_local(index_path, embeddings)
123
  else:
modules/models/base_model.py CHANGED
@@ -342,7 +342,7 @@ class BaseLLMModel:
342
  chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
343
  return chatbot, status
344
 
345
- def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
346
  fake_inputs = None
347
  display_append = []
348
  limited_context = False
@@ -353,15 +353,18 @@ class BaseLLMModel:
353
  limited_context = True
354
  msg = "加载索引中……"
355
  logging.info(msg)
356
- index = construct_index(self.api_key, file_src=files)
357
  assert index is not None, "获取索引失败"
358
  msg = "索引获取成功,生成回答中……"
359
  logging.info(msg)
360
  with retrieve_proxy():
361
  retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
362
  "k": 6, "score_threshold": 0.5})
363
- relevant_documents = retriever.get_relevant_documents(
364
- real_inputs)
 
 
 
365
  reference_results = [[d.page_content.strip("�"), os.path.basename(
366
  d.metadata["source"])] for d in relevant_documents]
367
  reference_results = add_source_numbers(reference_results)
 
342
  chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
343
  return chatbot, status
344
 
345
+ def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=True):
346
  fake_inputs = None
347
  display_append = []
348
  limited_context = False
 
353
  limited_context = True
354
  msg = "加载索引中……"
355
  logging.info(msg)
356
+ index = construct_index(self.api_key, file_src=files, load_from_cache_if_possible=load_from_cache_if_possible)
357
  assert index is not None, "获取索引失败"
358
  msg = "索引获取成功,生成回答中……"
359
  logging.info(msg)
360
  with retrieve_proxy():
361
  retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
362
  "k": 6, "score_threshold": 0.5})
363
+ try:
364
+ relevant_documents = retriever.get_relevant_documents(
365
+ real_inputs)
366
+ except AssertionError:
367
+ return self.prepare_inputs(real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=False)
368
  reference_results = [[d.page_content.strip("�"), os.path.basename(
369
  d.metadata["source"])] for d in relevant_documents]
370
  reference_results = add_source_numbers(reference_results)
modules/utils.py CHANGED
@@ -684,7 +684,7 @@ def get_history_filepath(username):
684
  def beautify_err_msg(err_msg):
685
  if "insufficient_quota" in err_msg:
686
  return i18n("剩余配额不足,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
687
- if "The model: gpt-4 does not exist" in err_msg:
688
  return i18n("你没有权限访问 GPT4,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
689
  if "Resource not found" in err_msg:
690
  return i18n("请查看 config_example.json,配置 Azure OpenAI")
 
684
  def beautify_err_msg(err_msg):
685
  if "insufficient_quota" in err_msg:
686
  return i18n("剩余配额不足,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
687
+ if "The model `gpt-4` does not exist" in err_msg:
688
  return i18n("你没有权限访问 GPT4,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
689
  if "Resource not found" in err_msg:
690
  return i18n("请查看 config_example.json,配置 Azure OpenAI")