Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
•
18cf6f9
1
Parent(s):
bb4c268
feat: 切换不同索引类型时自动重建索引
Browse files- modules/index_func.py +2 -1
- modules/models/base_model.py +7 -4
- modules/utils.py +1 -1
modules/index_func.py
CHANGED
@@ -89,6 +89,7 @@ def construct_index(
|
|
89 |
chunk_size_limit=600,
|
90 |
embedding_limit=None,
|
91 |
separator=" ",
|
|
|
92 |
):
|
93 |
from langchain.chat_models import ChatOpenAI
|
94 |
from langchain.vectorstores import FAISS
|
@@ -116,7 +117,7 @@ def construct_index(
|
|
116 |
else:
|
117 |
embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
|
118 |
model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
|
119 |
-
if os.path.exists(index_path):
|
120 |
logging.info("找到了缓存的索引文件,加载中……")
|
121 |
return FAISS.load_local(index_path, embeddings)
|
122 |
else:
|
|
|
89 |
chunk_size_limit=600,
|
90 |
embedding_limit=None,
|
91 |
separator=" ",
|
92 |
+
load_from_cache_if_possible=True,
|
93 |
):
|
94 |
from langchain.chat_models import ChatOpenAI
|
95 |
from langchain.vectorstores import FAISS
|
|
|
117 |
else:
|
118 |
embeddings = OpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_DEPLOYMENT_NAME"], openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
|
119 |
model=os.environ["AZURE_EMBEDDING_MODEL_NAME"], openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"], openai_api_type="azure")
|
120 |
+
if os.path.exists(index_path) and load_from_cache_if_possible:
|
121 |
logging.info("找到了缓存的索引文件,加载中……")
|
122 |
return FAISS.load_local(index_path, embeddings)
|
123 |
else:
|
modules/models/base_model.py
CHANGED
@@ -342,7 +342,7 @@ class BaseLLMModel:
|
|
342 |
chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
|
343 |
return chatbot, status
|
344 |
|
345 |
-
def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
|
346 |
fake_inputs = None
|
347 |
display_append = []
|
348 |
limited_context = False
|
@@ -353,15 +353,18 @@ class BaseLLMModel:
|
|
353 |
limited_context = True
|
354 |
msg = "加载索引中……"
|
355 |
logging.info(msg)
|
356 |
-
index = construct_index(self.api_key, file_src=files)
|
357 |
assert index is not None, "获取索引失败"
|
358 |
msg = "索引获取成功,生成回答中……"
|
359 |
logging.info(msg)
|
360 |
with retrieve_proxy():
|
361 |
retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
|
362 |
"k": 6, "score_threshold": 0.5})
|
363 |
-
|
364 |
-
|
|
|
|
|
|
|
365 |
reference_results = [[d.page_content.strip("�"), os.path.basename(
|
366 |
d.metadata["source"])] for d in relevant_documents]
|
367 |
reference_results = add_source_numbers(reference_results)
|
|
|
342 |
chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
|
343 |
return chatbot, status
|
344 |
|
345 |
+
def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=True):
|
346 |
fake_inputs = None
|
347 |
display_append = []
|
348 |
limited_context = False
|
|
|
353 |
limited_context = True
|
354 |
msg = "加载索引中……"
|
355 |
logging.info(msg)
|
356 |
+
index = construct_index(self.api_key, file_src=files, load_from_cache_if_possible=load_from_cache_if_possible)
|
357 |
assert index is not None, "获取索引失败"
|
358 |
msg = "索引获取成功,生成回答中……"
|
359 |
logging.info(msg)
|
360 |
with retrieve_proxy():
|
361 |
retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold", search_kwargs={
|
362 |
"k": 6, "score_threshold": 0.5})
|
363 |
+
try:
|
364 |
+
relevant_documents = retriever.get_relevant_documents(
|
365 |
+
real_inputs)
|
366 |
+
except AssertionError:
|
367 |
+
return self.prepare_inputs(real_inputs, use_websearch, files, reply_language, chatbot, load_from_cache_if_possible=False)
|
368 |
reference_results = [[d.page_content.strip("�"), os.path.basename(
|
369 |
d.metadata["source"])] for d in relevant_documents]
|
370 |
reference_results = add_source_numbers(reference_results)
|
modules/utils.py
CHANGED
@@ -684,7 +684,7 @@ def get_history_filepath(username):
|
|
684 |
def beautify_err_msg(err_msg):
|
685 |
if "insufficient_quota" in err_msg:
|
686 |
return i18n("剩余配额不足,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
|
687 |
-
if "The model
|
688 |
return i18n("你没有权限访问 GPT4,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
|
689 |
if "Resource not found" in err_msg:
|
690 |
return i18n("请查看 config_example.json,配置 Azure OpenAI")
|
|
|
684 |
def beautify_err_msg(err_msg):
|
685 |
if "insufficient_quota" in err_msg:
|
686 |
return i18n("剩余配额不足,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98#you-exceeded-your-current-quota-please-check-your-plan-and-billing-details)")
|
687 |
+
if "The model `gpt-4` does not exist" in err_msg:
|
688 |
return i18n("你没有权限访问 GPT4,[进一步了解](https://github.com/GaiZhenbiao/ChuanhuChatGPT/issues/843)")
|
689 |
if "Resource not found" in err_msg:
|
690 |
return i18n("请查看 config_example.json,配置 Azure OpenAI")
|