Spaces:

markqiu
/

prinvest_mate

Sleeping

App Files Files Community

Tuchuanhuhuhu commited on Oct 10, 2023

Commit

18cf6f9

•

1 Parent(s): bb4c268

feat: 切换不同索引类型时自动重建索引

Browse files

Files changed (3) hide show

modules/index_func.py +2 -1
modules/models/base_model.py +7 -4
modules/utils.py +1 -1

modules/index_func.py CHANGED Viewed

@@ -89,6 +89,7 @@ def construct_index(
     chunk_size_limit=600,
     embedding_limit=None,
     separator=" ",
 ):
     from langchain.chat_models import ChatOpenAI
     from langchain.vectorstores import FAISS
@@ -116,7 +117,7 @@ def construct_index(
         else:
             embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
                                           model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
-    if os.path.exists(index_path):
         logging.info("找到了缓存的索引文件，加载中……")
         return FAISS.load_local(index_path, embeddings)
     else:

     chunk_size_limit=600,
     embedding_limit=None,
     separator=" ",
+    load_from_cache_if_possible=True,
 ):
     from langchain.chat_models import ChatOpenAI
     from langchain.vectorstores import FAISS
         else:
             embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
                                           model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
+    if os.path.exists(index_path) and load_from_cache_if_possible:
         logging.info("找到了缓存的索引文件，加载中……")
         return FAISS.load_local(index_path, embeddings)
     else:

modules/models/base_model.py CHANGED Viewed

@@ -342,7 +342,7 @@ class BaseLLMModel:
             chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
         return chatbot, status
-    def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
         fake_inputs = None
         display_append = []
         limited_context = False
@@ -353,15 +353,18 @@ class BaseLLMModel:
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
-            index = construct_index(self.api_key, file_src=files)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
             with retrieve_proxy():
                 retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
                                                  "k": 6, "score_threshold": 0.5})
-                relevant_documents = retriever.get_relevant_documents(
-                    real_inputs)
             reference_results = [[d.page_content.strip("�"), os.path.basename(
                 d.metadata["source"])] for d in relevant_documents]
             reference_results = add_source_numbers(reference_results)

             chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
         return chatbot, status
+    def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=True):
         fake_inputs = None
         display_append = []
         limited_context = False
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
+            index = construct_index(self.api_key, file_src=files, load_from_cache_if_possible=load_from_cache_if_possible)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
             with retrieve_proxy():
                 retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
                                                  "k": 6, "score_threshold": 0.5})
+                try:
+                    relevant_documents = retriever.get_relevant_documents(
+                        real_inputs)
+                except AssertionError:
+                    return self.prepare_inputs(real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=False)
             reference_results = [[d.page_content.strip("�"), os.path.basename(
                 d.metadata["source"])] for d in relevant_documents]
             reference_results = add_source_numbers(reference_results)

modules/utils.py CHANGED Viewed

@@ -684,7 +684,7 @@ def get_history_filepath(username):
 def beautify_err_msg(err_msg):
     if "insufficient_quota" in  err_msg:
         return i18n("剩余配额不足，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
-    if "The model: gpt-4 does not exist" in err_msg:
         return i18n("你没有权限访问 GPT4，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
     if "Resource not found" in err_msg:
         return i18n("请查看 config_example.json，配置 Azure OpenAI")

 def beautify_err_msg(err_msg):
     if "insufficient_quota" in  err_msg:
         return i18n("剩余配额不足，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
+    if "The model `gpt-4` does not exist" in err_msg:
         return i18n("你没有权限访问 GPT4，[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
     if "Resource not found" in err_msg:
         return i18n("请查看 config_example.json，配置 Azure OpenAI")