Spaces:

markqiu
/

prinvest_mate

Sleeping

Keldos commited on Jun 2, 2023

Commit

6f70034

1 Parent(s): 5cc61e4

Squash merge main into newgradio

commit 13edad804483914294548a9ffcf0fd028d39cc03
Author: Keldos <i@keldos.me>
Date: Fri Jun 2 12:05:54 2023 +0800
Revert 为适配 gradio 3.30.0 适配做出的更改

This reverts commit de613b48355a2594dabca66b2192723b67bf92b4.
This reverts commit 6c96d6f0ed17c74df567b4fd8ac4c1a60ea24c7a.

commit daf68a9ee8769e0373d00688139759355c874edc
Author: Tuchuanhuhuhu <gzblog@hdu.edu.cn>
Date: Fri Jun 2 10:51:39 2023 +0800

修复索引逻辑 #789

commit fe0c8bd35daacbca78e5f31f7aaadf76a69c7dfb
Author: Tuchuanhuhuhu <gzblog@hdu.edu.cn>
Date: Fri Jun 2 01:23:57 2023 +0800

改进生成 embedding 时的稳定性

commit dc61f72798839dca3ec16f20aa2810ef6df618a4
Author: Tuchuanhuhuhu <gzblog@hdu.edu.cn>
Date: Fri Jun 2 01:23:36 2023 +0800

改进 config 写入环境变量的方式

commit 4f8449f8cf0f43bde25298c7f85ee418a019e045
Author: Tuchuanhuhuhu <gzblog@hdu.edu.cn>
Date: Thu Jun 1 14:01:54 2023 +0800

更新 gradioclient 版本

commit bfac445e799c317b0f5e738ab394032a18de62eb
Author: Tuchuanhuhuhu <gzblog@hdu.edu.cn>
Date: Thu Jun 1 10:40:10 2023 +0800

增加 blocked path

Files changed (3) hide show

modules/config.py CHANGED Viewed

@@ -102,10 +102,9 @@ if api_host is not None:
     shared.state.set_api_host(api_host)
 default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
-os.environ["GOOGLE_CSE_ID"] = config.get("GOOGLE_CSE_ID", "")
-os.environ["GOOGLE_API_KEY"] = config.get("GOOGLE_API_KEY", "")
-os.environ["WOLFRAM_ALPHA_APPID"] = config.get("WOLFRAM_ALPHA_APPID", "")
-os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
 @contextmanager
 def retrieve_openai_api(api_key = None):

     shared.state.set_api_host(api_host)
 default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
+for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
+    if config.get(x, None) is not None:
+        os.environ[x] = config[x]
 @contextmanager
 def retrieve_openai_api(api_key = None):

modules/index_func.py CHANGED Viewed

@@ -51,7 +51,7 @@ def get_documents(file_src):
                         pdfReader = PyPDF2.PdfReader(pdfFileObj)
                         for page in tqdm(pdfReader.pages):
                             pdftext += page.extract_text()
-                texts = Document(page_content=pdftext, metadata={"source": filepath})
             elif file_type == ".docx":
                 logging.debug("Loading Word...")
                 from langchain.document_loaders import UnstructuredWordDocumentLoader
@@ -71,8 +71,7 @@ def get_documents(file_src):
                 logging.debug("Loading Excel...")
                 text_list = excel_to_string(filepath)
                 for elem in text_list:
-                    documents.append(Document(page_content=elem, metadata={"source": filepath}))
-                continue
             else:
                 logging.debug("Loading text file...")
                 from langchain.document_loaders import TextLoader
@@ -83,7 +82,7 @@ def get_documents(file_src):
             logging.error(f"Error loading file: {filename}")
             traceback.print_exc()
-        texts = text_splitter.split_documents([texts])
         documents.extend(texts)
     logging.debug("Documents loaded.")
     return documents

                         pdfReader = PyPDF2.PdfReader(pdfFileObj)
                         for page in tqdm(pdfReader.pages):
                             pdftext += page.extract_text()
+                texts = [Document(page_content=pdftext, metadata={"source": filepath})]
             elif file_type == ".docx":
                 logging.debug("Loading Word...")
                 from langchain.document_loaders import UnstructuredWordDocumentLoader
                 logging.debug("Loading Excel...")
                 text_list = excel_to_string(filepath)
                 for elem in text_list:
+                    texts.append(Document(page_content=elem, metadata={"source": filepath}))
             else:
                 logging.debug("Loading text file...")
                 from langchain.document_loaders import TextLoader
             logging.error(f"Error loading file: {filename}")
             traceback.print_exc()
+        texts = text_splitter.split_documents(texts)
         documents.extend(texts)
     logging.debug("Documents loaded.")
     return documents

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio==3.33.0
-gradio_client==0.1.4
 mdtex2html
 pypinyin
 tiktoken

 gradio==3.33.0
+gradio_client==0.2.4
 mdtex2html
 pypinyin
 tiktoken