JohnSmith9982 commited on
Commit
8971a40
1 Parent(s): 7a87049

Upload 58 files

Browse files
ChuanhuChatbot.py CHANGED
@@ -12,6 +12,7 @@ from modules.presets import *
12
  from modules.overwrites import *
13
  from modules.models.models import get_model
14
 
 
15
 
16
  gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
17
  gr.Chatbot.postprocess = postprocess
@@ -88,7 +89,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
88
  with gr.Row():
89
  single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
90
  use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
91
- # render_latex_checkbox = gr.Checkbox(label=i18n("渲染LaTeX公式"), value=render_latex, interactive=True, elem_id="render_latex_checkbox")
92
  language_select_dropdown = gr.Dropdown(
93
  label=i18n("选择回复语言(针对搜索&索引功能)"),
94
  choices=REPLY_LANGUAGES,
@@ -161,7 +161,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
161
 
162
  with gr.Tab(label=i18n("高级")):
163
  gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
164
- gr.HTML(APPEARANCE_SWITCHER, elem_classes="insert_block")
165
  use_streaming_checkbox = gr.Checkbox(
166
  label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
167
  )
@@ -265,7 +265,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
265
  default_btn = gr.Button(i18n("🔙 恢复默认设置"))
266
 
267
  gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
268
- gr.HTML(FOOTER.format(versions=versions_html()), elem_id="footer")
269
 
270
  # https://github.com/gradio-app/gradio/pull/3296
271
  def create_greeting(request: gr.Request):
@@ -469,10 +469,5 @@ if __name__ == "__main__":
469
  reload_javascript()
470
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
471
  blocked_paths=["config.json"],
472
- auth=auth_list if authflag else None,
473
- favicon_path="./assets/favicon.ico",
474
- inbrowser=not dockerflag, # 禁止在docker下开启inbrowser
475
  )
476
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
477
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码
478
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(auth=("在这里填写用户名", "在这里填写密码")) # 适合Nginx反向代理
 
12
  from modules.overwrites import *
13
  from modules.models.models import get_model
14
 
15
+ logging.getLogger("httpx").setLevel(logging.WARNING)
16
 
17
  gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
18
  gr.Chatbot.postprocess = postprocess
 
89
  with gr.Row():
90
  single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
91
  use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
 
92
  language_select_dropdown = gr.Dropdown(
93
  label=i18n("选择回复语言(针对搜索&索引功能)"),
94
  choices=REPLY_LANGUAGES,
 
161
 
162
  with gr.Tab(label=i18n("高级")):
163
  gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
164
+ gr.HTML(get_html("appearance_switcher.html").format(label=i18n("切换亮暗色主题")), elem_classes="insert_block")
165
  use_streaming_checkbox = gr.Checkbox(
166
  label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
167
  )
 
265
  default_btn = gr.Button(i18n("🔙 恢复默认设置"))
266
 
267
  gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
268
+ gr.HTML(get_html("footer.html").format(versions=versions_html()), elem_id="footer")
269
 
270
  # https://github.com/gradio-app/gradio/pull/3296
271
  def create_greeting(request: gr.Request):
 
469
  reload_javascript()
470
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
471
  blocked_paths=["config.json"],
472
+ favicon_path="./assets/favicon.ico"
 
 
473
  )
 
 
 
modules/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/config.cpython-311.pyc and b/modules/__pycache__/config.cpython-311.pyc differ
 
modules/__pycache__/config.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ
 
modules/__pycache__/index_func.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/index_func.cpython-311.pyc and b/modules/__pycache__/index_func.cpython-311.pyc differ
 
modules/__pycache__/index_func.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ
 
modules/__pycache__/overwrites.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-311.pyc and b/modules/__pycache__/overwrites.cpython-311.pyc differ
 
modules/__pycache__/overwrites.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ
 
modules/__pycache__/presets.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/presets.cpython-311.pyc and b/modules/__pycache__/presets.cpython-311.pyc differ
 
modules/__pycache__/presets.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ
 
modules/__pycache__/shared.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/shared.cpython-311.pyc and b/modules/__pycache__/shared.cpython-311.pyc differ
 
modules/__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/utils.cpython-311.pyc and b/modules/__pycache__/utils.cpython-311.pyc differ
 
modules/__pycache__/utils.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/utils.cpython-39.pyc and b/modules/__pycache__/utils.cpython-39.pyc differ
 
modules/config.py CHANGED
@@ -18,7 +18,6 @@ __all__ = [
18
  "log_level",
19
  "advance_docs",
20
  "update_doc_config",
21
- "render_latex",
22
  "usage_limit",
23
  "multi_api_key",
24
  "server_name",
@@ -43,11 +42,11 @@ hide_history_when_not_logged_in = config.get("hide_history_when_not_logged_in",
43
 
44
  if os.path.exists("api_key.txt"):
45
  logging.info("检测到api_key.txt文件,正在进行迁移...")
46
- with open("api_key.txt", "r") as f:
47
  config["openai_api_key"] = f.read().strip()
48
  os.rename("api_key.txt", "api_key(deprecated).txt")
49
  with open("config.json", "w", encoding='utf-8') as f:
50
- json.dump(config, f, indent=4)
51
 
52
  if os.path.exists("auth.json"):
53
  logging.info("检测到auth.json文件,正在进行迁移...")
@@ -63,7 +62,7 @@ if os.path.exists("auth.json"):
63
  config["users"] = auth_list
64
  os.rename("auth.json", "auth(deprecated).json")
65
  with open("config.json", "w", encoding='utf-8') as f:
66
- json.dump(config, f, indent=4)
67
 
68
  ## 处理docker if we are running in Docker
69
  dockerflag = config.get("dockerflag", False)
@@ -82,12 +81,6 @@ os.environ["MINIMAX_API_KEY"] = minimax_api_key
82
  minimax_group_id = config.get("minimax_group_id", "")
83
  os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
84
 
85
- render_latex = config.get("render_latex", True)
86
-
87
- if render_latex:
88
- os.environ["RENDER_LATEX"] = "yes"
89
- else:
90
- os.environ["RENDER_LATEX"] = "no"
91
 
92
  usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
93
 
@@ -109,10 +102,9 @@ if api_host is not None:
109
  shared.state.set_api_host(api_host)
110
 
111
  default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
112
- os.environ["GOOGLE_CSE_ID"] = config.get("GOOGLE_CSE_ID", "")
113
- os.environ["GOOGLE_API_KEY"] = config.get("GOOGLE_API_KEY", "")
114
- os.environ["WOLFRAM_ALPHA_APPID"] = config.get("WOLFRAM_ALPHA_APPID", "")
115
- os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
116
 
117
  @contextmanager
118
  def retrieve_openai_api(api_key = None):
 
18
  "log_level",
19
  "advance_docs",
20
  "update_doc_config",
 
21
  "usage_limit",
22
  "multi_api_key",
23
  "server_name",
 
42
 
43
  if os.path.exists("api_key.txt"):
44
  logging.info("检测到api_key.txt文件,正在进行迁移...")
45
+ with open("api_key.txt", "r", encoding="utf-8") as f:
46
  config["openai_api_key"] = f.read().strip()
47
  os.rename("api_key.txt", "api_key(deprecated).txt")
48
  with open("config.json", "w", encoding='utf-8') as f:
49
+ json.dump(config, f, indent=4, ensure_ascii=False)
50
 
51
  if os.path.exists("auth.json"):
52
  logging.info("检测到auth.json文件,正在进行迁移...")
 
62
  config["users"] = auth_list
63
  os.rename("auth.json", "auth(deprecated).json")
64
  with open("config.json", "w", encoding='utf-8') as f:
65
+ json.dump(config, f, indent=4, ensure_ascii=False)
66
 
67
  ## 处理docker if we are running in Docker
68
  dockerflag = config.get("dockerflag", False)
 
81
  minimax_group_id = config.get("minimax_group_id", "")
82
  os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
83
 
 
 
 
 
 
 
84
 
85
  usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
86
 
 
102
  shared.state.set_api_host(api_host)
103
 
104
  default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
105
+ for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
106
+ if config.get(x, None) is not None:
107
+ os.environ[x] = config[x]
 
108
 
109
  @contextmanager
110
  def retrieve_openai_api(api_key = None):
modules/index_func.py CHANGED
@@ -16,7 +16,7 @@ def get_index_name(file_src):
16
 
17
  md5_hash = hashlib.md5()
18
  for file_path in file_paths:
19
- with open(file_path, "rb") as f:
20
  while chunk := f.read(8192):
21
  md5_hash.update(chunk)
22
 
@@ -47,11 +47,11 @@ def get_documents(file_src):
47
  pdftext = parse_pdf(filepath, two_column).text
48
  except:
49
  pdftext = ""
50
- with open(filepath, "rb") as pdfFileObj:
51
  pdfReader = PyPDF2.PdfReader(pdfFileObj)
52
  for page in tqdm(pdfReader.pages):
53
  pdftext += page.extract_text()
54
- texts = Document(page_content=pdftext, metadata={"source": filepath})
55
  elif file_type == ".docx":
56
  logging.debug("Loading Word...")
57
  from langchain.document_loaders import UnstructuredWordDocumentLoader
@@ -70,9 +70,9 @@ def get_documents(file_src):
70
  elif file_type == ".xlsx":
71
  logging.debug("Loading Excel...")
72
  text_list = excel_to_string(filepath)
 
73
  for elem in text_list:
74
- documents.append(Document(page_content=elem, metadata={"source": filepath}))
75
- continue
76
  else:
77
  logging.debug("Loading text file...")
78
  from langchain.document_loaders import TextLoader
@@ -83,7 +83,7 @@ def get_documents(file_src):
83
  logging.error(f"Error loading file: {filename}")
84
  traceback.print_exc()
85
 
86
- texts = text_splitter.split_documents([texts])
87
  documents.extend(texts)
88
  logging.debug("Documents loaded.")
89
  return documents
 
16
 
17
  md5_hash = hashlib.md5()
18
  for file_path in file_paths:
19
+ with open(file_path, "rb", encoding="utf-8") as f:
20
  while chunk := f.read(8192):
21
  md5_hash.update(chunk)
22
 
 
47
  pdftext = parse_pdf(filepath, two_column).text
48
  except:
49
  pdftext = ""
50
+ with open(filepath, "rb", encoding="utf-8") as pdfFileObj:
51
  pdfReader = PyPDF2.PdfReader(pdfFileObj)
52
  for page in tqdm(pdfReader.pages):
53
  pdftext += page.extract_text()
54
+ texts = [Document(page_content=pdftext, metadata={"source": filepath})]
55
  elif file_type == ".docx":
56
  logging.debug("Loading Word...")
57
  from langchain.document_loaders import UnstructuredWordDocumentLoader
 
70
  elif file_type == ".xlsx":
71
  logging.debug("Loading Excel...")
72
  text_list = excel_to_string(filepath)
73
+ texts = []
74
  for elem in text_list:
75
+ texts.append(Document(page_content=elem, metadata={"source": filepath}))
 
76
  else:
77
  logging.debug("Loading text file...")
78
  from langchain.document_loaders import TextLoader
 
83
  logging.error(f"Error loading file: {filename}")
84
  traceback.print_exc()
85
 
86
+ texts = text_splitter.split_documents(texts)
87
  documents.extend(texts)
88
  logging.debug("Documents loaded.")
89
  return documents
modules/models/ChuanhuAgent.py CHANGED
@@ -14,7 +14,8 @@ from langchain.tools import BaseTool, StructuredTool, Tool, tool
14
  from langchain.callbacks.stdout import StdOutCallbackHandler
15
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
  from langchain.callbacks.manager import BaseCallbackManager
17
- from googlesearch import search
 
18
 
19
  from typing import Any, Dict, List, Optional, Union
20
 
@@ -93,7 +94,15 @@ class ChuanhuAgent_Client(BaseLLMModel):
93
  )
94
 
95
  def google_search_simple(self, query):
96
- results = [{"title": i.title, "link": i.url, "snippet": i.description} for i in search(query, advanced=True)]
 
 
 
 
 
 
 
 
97
  return str(results)
98
 
99
  def handle_file_upload(self, files, chatbot, language):
 
14
  from langchain.callbacks.stdout import StdOutCallbackHandler
15
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
  from langchain.callbacks.manager import BaseCallbackManager
17
+ from duckduckgo_search import DDGS
18
+ from itertools import islice
19
 
20
  from typing import Any, Dict, List, Optional, Union
21
 
 
94
  )
95
 
96
  def google_search_simple(self, query):
97
+ results = []
98
+ with DDGS() as ddgs:
99
+ ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
100
+ for r in islice(ddgs_gen, 10):
101
+ results.append({
102
+ "title": r["title"],
103
+ "link": r["href"],
104
+ "snippet": r["body"]
105
+ })
106
  return str(results)
107
 
108
  def handle_file_upload(self, files, chatbot, language):
modules/models/__pycache__/base_model.cpython-311.pyc CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-311.pyc and b/modules/models/__pycache__/base_model.cpython-311.pyc differ
 
modules/models/__pycache__/base_model.cpython-39.pyc CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ
 
modules/models/__pycache__/models.cpython-311.pyc CHANGED
Binary files a/modules/models/__pycache__/models.cpython-311.pyc and b/modules/models/__pycache__/models.cpython-311.pyc differ
 
modules/models/__pycache__/models.cpython-39.pyc CHANGED
Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ
 
modules/models/base_model.py CHANGED
@@ -13,7 +13,8 @@ import pathlib
13
 
14
  from tqdm import tqdm
15
  import colorama
16
- from googlesearch import search
 
17
  import asyncio
18
  import aiohttp
19
  from enum import Enum
@@ -335,16 +336,19 @@ class BaseLLMModel:
335
  .replace("{reply_language}", reply_language)
336
  )
337
  elif use_websearch:
338
- limited_context = True
339
- search_results = [i for i in search(real_inputs, advanced=True)]
 
 
 
340
  reference_results = []
341
  for idx, result in enumerate(search_results):
342
  logging.debug(f"搜索结果{idx + 1}:{result}")
343
- domain_name = urllib3.util.parse_url(result.url).host
344
- reference_results.append([result.description, result.url])
345
  display_append.append(
346
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
347
- f"<li><a href=\"{result.url}\" target=\"_blank\">{domain_name}</a></li>\n"
348
  )
349
  reference_results = add_source_numbers(reference_results)
350
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
@@ -637,7 +641,7 @@ class BaseLLMModel:
637
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
638
  else:
639
  history_file_path = filename
640
- with open(history_file_path, "r") as f:
641
  json_s = json.load(f)
642
  try:
643
  if type(json_s["history"][0]) == str:
 
13
 
14
  from tqdm import tqdm
15
  import colorama
16
+ from duckduckgo_search import DDGS
17
+ from itertools import islice
18
  import asyncio
19
  import aiohttp
20
  from enum import Enum
 
336
  .replace("{reply_language}", reply_language)
337
  )
338
  elif use_websearch:
339
+ search_results = []
340
+ with DDGS() as ddgs:
341
+ ddgs_gen = ddgs.text(real_inputs, backend="lite")
342
+ for r in islice(ddgs_gen, 10):
343
+ search_results.append(r)
344
  reference_results = []
345
  for idx, result in enumerate(search_results):
346
  logging.debug(f"搜索结果{idx + 1}:{result}")
347
+ domain_name = urllib3.util.parse_url(result['href']).host
348
+ reference_results.append([result['body'], result['href']])
349
  display_append.append(
350
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
351
+ f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
352
  )
353
  reference_results = add_source_numbers(reference_results)
354
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
 
641
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
642
  else:
643
  history_file_path = filename
644
+ with open(history_file_path, "r", encoding="utf-8") as f:
645
  json_s = json.load(f)
646
  try:
647
  if type(json_s["history"][0]) == str:
modules/models/models.py CHANGED
@@ -338,7 +338,7 @@ class LLaMA_Client(BaseLLMModel):
338
  pipeline_args = InferencerArguments(
339
  local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
340
 
341
- with open(pipeline_args.deepspeed, "r") as f:
342
  ds_config = json.load(f)
343
  LLAMA_MODEL = AutoModel.get_model(
344
  model_args,
@@ -623,7 +623,7 @@ def get_model(
623
 
624
 
625
  if __name__ == "__main__":
626
- with open("config.json", "r") as f:
627
  openai_api_key = cjson.load(f)["openai_api_key"]
628
  # set logging level to debug
629
  logging.basicConfig(level=logging.DEBUG)
 
338
  pipeline_args = InferencerArguments(
339
  local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
340
 
341
+ with open(pipeline_args.deepspeed, "r", encoding="utf-8") as f:
342
  ds_config = json.load(f)
343
  LLAMA_MODEL = AutoModel.get_model(
344
  model_args,
 
623
 
624
 
625
  if __name__ == "__main__":
626
+ with open("config.json", "r", encoding="utf-8") as f:
627
  openai_api_key = cjson.load(f)["openai_api_key"]
628
  # set logging level to debug
629
  logging.basicConfig(level=logging.DEBUG)
modules/overwrites.py CHANGED
@@ -2,12 +2,12 @@ from __future__ import annotations
2
  import logging
3
 
4
  from typing import List, Tuple
5
- import mdtex2html
6
  from gradio_client import utils as client_utils
 
 
7
 
8
  from modules.presets import *
9
  from modules.index_func import *
10
- from modules.config import render_latex
11
 
12
 
13
  def postprocess(
@@ -40,14 +40,18 @@ def postprocess(
40
  return processed_messages
41
 
42
  def postprocess_chat_messages(
43
- self, chat_message: str | Tuple | List | None, message_type: str
44
- ) -> str | Dict | None:
45
  if chat_message is None:
46
  return None
47
  elif isinstance(chat_message, (tuple, list)):
48
- filepath = chat_message[0]
 
 
 
 
 
49
  mime_type = client_utils.get_mimetype(filepath)
50
- filepath = self.make_temp_copy_if_needed(filepath)
51
  return {
52
  "name": filepath,
53
  "mime_type": mime_type,
@@ -56,12 +60,13 @@ def postprocess_chat_messages(
56
  "is_file": True,
57
  }
58
  elif isinstance(chat_message, str):
59
- if message_type == "bot":
60
- if not detect_converted_mark(chat_message):
61
- chat_message = convert_mdtext(chat_message)
62
- elif message_type == "user":
63
- if not detect_converted_mark(chat_message):
64
- chat_message = convert_asis(chat_message)
 
65
  return chat_message
66
  else:
67
  raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
@@ -75,11 +80,8 @@ with open("./assets/custom.js", "r", encoding="utf-8") as f, \
75
  def reload_javascript():
76
  print("Reloading javascript...")
77
  js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
78
- if render_latex:
79
- js += """\
80
- <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-MML-AM_CHTML"></script>
81
- <script type="text/x-mathjax-config">MathJax.Hub.Config({skipStartupTypeset: false, tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']],displayMath: [['$$','$$'], ['\\[','\\]']]}});</script>
82
- """
83
  def template_response(*args, **kwargs):
84
  res = GradioTemplateResponseOriginal(*args, **kwargs)
85
  res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
 
2
  import logging
3
 
4
  from typing import List, Tuple
 
5
  from gradio_client import utils as client_utils
6
+ from gradio import utils
7
+ import inspect
8
 
9
  from modules.presets import *
10
  from modules.index_func import *
 
11
 
12
 
13
  def postprocess(
 
40
  return processed_messages
41
 
42
  def postprocess_chat_messages(
43
+ self, chat_message: str | tuple | list | None, role: str
44
+ ) -> str | dict | None:
45
  if chat_message is None:
46
  return None
47
  elif isinstance(chat_message, (tuple, list)):
48
+ file_uri = chat_message[0]
49
+ if utils.validate_url(file_uri):
50
+ filepath = file_uri
51
+ else:
52
+ filepath = self.make_temp_copy_if_needed(file_uri)
53
+
54
  mime_type = client_utils.get_mimetype(filepath)
 
55
  return {
56
  "name": filepath,
57
  "mime_type": mime_type,
 
60
  "is_file": True,
61
  }
62
  elif isinstance(chat_message, str):
63
+ # chat_message = inspect.cleandoc(chat_message)
64
+ # escape html spaces
65
+ # chat_message = chat_message.replace(" ", "&nbsp;")
66
+ if role == "bot":
67
+ chat_message = convert_bot_before_marked(chat_message)
68
+ elif role == "user":
69
+ chat_message = convert_user_before_marked(chat_message)
70
  return chat_message
71
  else:
72
  raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
 
80
  def reload_javascript():
81
  print("Reloading javascript...")
82
  js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
83
+ # if render_latex:
84
+ # js += """\"""
 
 
 
85
  def template_response(*args, **kwargs):
86
  res = GradioTemplateResponseOriginal(*args, **kwargs)
87
  res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
modules/presets.py CHANGED
@@ -46,25 +46,18 @@ CHUANHU_TITLE = i18n("川虎Chat 🚀")
46
 
47
  CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
48
 
49
- FOOTER = """<div class="versions">{versions}</div>"""
50
-
51
- APPEARANCE_SWITCHER = """
52
- <div style="display: flex; justify-content: space-between;">
53
- <span style="margin-top: 4px !important;">"""+ i18n("切换亮暗色主题") + """</span>
54
- <span><label class="apSwitch" for="checkbox">
55
- <input type="checkbox" id="checkbox">
56
- <div class="apSlider"></div>
57
- </label></span>
58
- </div>
59
- """
60
 
61
  ONLINE_MODELS = [
62
  "gpt-3.5-turbo",
 
63
  "gpt-3.5-turbo-0301",
 
64
  "gpt-4",
65
  "gpt-4-0314",
 
66
  "gpt-4-32k",
67
  "gpt-4-32k-0314",
 
68
  "川虎助理",
69
  "川虎助理 Pro",
70
  "xmchat",
@@ -105,11 +98,15 @@ for dir_name in os.listdir("models"):
105
 
106
  MODEL_TOKEN_LIMIT = {
107
  "gpt-3.5-turbo": 4096,
 
108
  "gpt-3.5-turbo-0301": 4096,
 
109
  "gpt-4": 8192,
110
  "gpt-4-0314": 8192,
 
111
  "gpt-4-32k": 32768,
112
- "gpt-4-32k-0314": 32768
 
113
  }
114
 
115
  TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
@@ -238,4 +235,6 @@ small_and_beautiful_theme = gr.themes.Soft(
238
  block_title_background_fill_dark="*primary_900",
239
  block_label_background_fill_dark="*primary_900",
240
  input_background_fill="#F6F6F6",
 
 
241
  )
 
46
 
47
  CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
48
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  ONLINE_MODELS = [
51
  "gpt-3.5-turbo",
52
+ "gpt-3.5-turbo-16k",
53
  "gpt-3.5-turbo-0301",
54
+ "gpt-3.5-turbo-0613",
55
  "gpt-4",
56
  "gpt-4-0314",
57
+ "gpt-4-0613",
58
  "gpt-4-32k",
59
  "gpt-4-32k-0314",
60
+ "gpt-4-32k-0613",
61
  "川虎助理",
62
  "川虎助理 Pro",
63
  "xmchat",
 
98
 
99
  MODEL_TOKEN_LIMIT = {
100
  "gpt-3.5-turbo": 4096,
101
+ "gpt-3.5-turbo-16k": 16384,
102
  "gpt-3.5-turbo-0301": 4096,
103
+ "gpt-3.5-turbo-0613": 4096,
104
  "gpt-4": 8192,
105
  "gpt-4-0314": 8192,
106
+ "gpt-4-0613": 8192,
107
  "gpt-4-32k": 32768,
108
+ "gpt-4-32k-0314": 32768,
109
+ "gpt-4-32k-0613": 32768
110
  }
111
 
112
  TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
 
235
  block_title_background_fill_dark="*primary_900",
236
  block_label_background_fill_dark="*primary_900",
237
  input_background_fill="#F6F6F6",
238
+ chatbot_code_background_color="*neutral_950",
239
+ chatbot_code_background_color_dark="*neutral_950",
240
  )
modules/shared.py CHANGED
@@ -59,3 +59,6 @@ class State:
59
 
60
 
61
  state = State()
 
 
 
 
59
 
60
 
61
  state = State()
62
+
63
+ modules_path = os.path.dirname(os.path.realpath(__file__))
64
+ chuanhu_path = os.path.dirname(modules_path)
modules/utils.py CHANGED
@@ -16,7 +16,6 @@ import subprocess
16
  import gradio as gr
17
  from pypinyin import lazy_pinyin
18
  import tiktoken
19
- import mdtex2html
20
  from markdown import markdown
21
  from pygments import highlight
22
  from pygments.lexers import get_lexer_by_name
@@ -133,7 +132,7 @@ def count_token(message):
133
  return length
134
 
135
 
136
- def markdown_to_html_with_syntax_highlight(md_str):
137
  def replacer(match):
138
  lang = match.group(1) or "text"
139
  code = match.group(2)
@@ -155,7 +154,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
155
  return html_str
156
 
157
 
158
- def normalize_markdown(md_text: str) -> str:
159
  lines = md_text.split("\n")
160
  normalized_lines = []
161
  inside_list = False
@@ -179,7 +178,7 @@ def normalize_markdown(md_text: str) -> str:
179
  return "\n".join(normalized_lines)
180
 
181
 
182
- def convert_mdtext(md_text):
183
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
184
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
185
  code_blocks = code_block_pattern.findall(md_text)
@@ -203,15 +202,70 @@ def convert_mdtext(md_text):
203
  output += ALREADY_CONVERTED_MARK
204
  return output
205
 
206
-
207
- def convert_asis(userinput):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  return (
209
  f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
210
  + ALREADY_CONVERTED_MARK
211
  )
212
 
213
 
214
- def detect_converted_mark(userinput):
215
  try:
216
  if userinput.endswith(ALREADY_CONVERTED_MARK):
217
  return True
@@ -221,7 +275,7 @@ def detect_converted_mark(userinput):
221
  return True
222
 
223
 
224
- def detect_language(code):
225
  if code.startswith("\n"):
226
  first_line = ""
227
  else:
@@ -256,8 +310,8 @@ def save_file(filename, system, history, chatbot, user_name):
256
  history_file_path = filename
257
  else:
258
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
259
- with open(history_file_path, "w") as f:
260
- json.dump(json_s, f)
261
  elif filename.endswith(".md"):
262
  md_s = f"system: \n- {system} \n"
263
  for data in history:
@@ -497,6 +551,13 @@ def versions_html():
497
  <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
498
  """
499
 
 
 
 
 
 
 
 
500
  def add_source_numbers(lst, source_name = "Source", use_source = True):
501
  if use_source:
502
  return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
@@ -563,7 +624,7 @@ def toggle_like_btn_visibility(selected_model_name):
563
  def new_auto_history_filename(dirname):
564
  latest_file = get_latest_filepath(dirname)
565
  if latest_file:
566
- with open(os.path.join(dirname, latest_file), 'r') as f:
567
  if len(f.read()) == 0:
568
  return latest_file
569
  now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
 
16
  import gradio as gr
17
  from pypinyin import lazy_pinyin
18
  import tiktoken
 
19
  from markdown import markdown
20
  from pygments import highlight
21
  from pygments.lexers import get_lexer_by_name
 
132
  return length
133
 
134
 
135
+ def markdown_to_html_with_syntax_highlight(md_str): # deprecated
136
  def replacer(match):
137
  lang = match.group(1) or "text"
138
  code = match.group(2)
 
154
  return html_str
155
 
156
 
157
+ def normalize_markdown(md_text: str) -> str: # deprecated
158
  lines = md_text.split("\n")
159
  normalized_lines = []
160
  inside_list = False
 
178
  return "\n".join(normalized_lines)
179
 
180
 
181
+ def convert_mdtext(md_text): # deprecated
182
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
183
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
184
  code_blocks = code_block_pattern.findall(md_text)
 
202
  output += ALREADY_CONVERTED_MARK
203
  return output
204
 
205
+ def convert_bot_before_marked(chat_message):
206
+ """
207
+ 注意不能给输出加缩进, 否则会被marked解析成代码块
208
+ """
209
+ if '<div class="md-message">' in chat_message:
210
+ return chat_message
211
+ else:
212
+ code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
213
+ code_blocks = code_block_pattern.findall(chat_message)
214
+ non_code_parts = code_block_pattern.split(chat_message)[::2]
215
+ result = []
216
+
217
+ raw = f'<div class="raw-message hideM">{escape_markdown(chat_message)}</div>'
218
+ for non_code, code in zip(non_code_parts, code_blocks + [""]):
219
+ if non_code.strip():
220
+ result.append(non_code)
221
+ if code.strip():
222
+ code = f"\n```{code}\n```"
223
+ result.append(code)
224
+ result = "".join(result)
225
+ md = f'<div class="md-message">{result}\n</div>'
226
+ return raw + md
227
+
228
+ def convert_user_before_marked(chat_message):
229
+ if '<div class="user-message">' in chat_message:
230
+ return chat_message
231
+ else:
232
+ return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
233
+
234
+ def escape_markdown(text):
235
+ """
236
+ Escape Markdown special characters to HTML-safe equivalents.
237
+ """
238
+ escape_chars = {
239
+ ' ': '&nbsp;',
240
+ '_': '&#95;',
241
+ '*': '&#42;',
242
+ '[': '&#91;',
243
+ ']': '&#93;',
244
+ '(': '&#40;',
245
+ ')': '&#41;',
246
+ '{': '&#123;',
247
+ '}': '&#125;',
248
+ '#': '&#35;',
249
+ '+': '&#43;',
250
+ '-': '&#45;',
251
+ '.': '&#46;',
252
+ '!': '&#33;',
253
+ '`': '&#96;',
254
+ '>': '&#62;',
255
+ '<': '&#60;',
256
+ '|': '&#124;'
257
+ }
258
+ return ''.join(escape_chars.get(c, c) for c in text)
259
+
260
+
261
+ def convert_asis(userinput): # deprecated
262
  return (
263
  f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
264
  + ALREADY_CONVERTED_MARK
265
  )
266
 
267
 
268
+ def detect_converted_mark(userinput): # deprecated
269
  try:
270
  if userinput.endswith(ALREADY_CONVERTED_MARK):
271
  return True
 
275
  return True
276
 
277
 
278
+ def detect_language(code): # deprecated
279
  if code.startswith("\n"):
280
  first_line = ""
281
  else:
 
310
  history_file_path = filename
311
  else:
312
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
313
+ with open(history_file_path, "w", encoding='utf-8') as f:
314
+ json.dump(json_s, f, ensure_ascii=False)
315
  elif filename.endswith(".md"):
316
  md_s = f"system: \n- {system} \n"
317
  for data in history:
 
551
  <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
552
  """
553
 
554
+ def get_html(filename):
555
+ path = os.path.join(shared.chuanhu_path, "assets", "html", filename)
556
+ if os.path.exists(path):
557
+ with open(path, encoding="utf8") as file:
558
+ return file.read()
559
+ return ""
560
+
561
  def add_source_numbers(lst, source_name = "Source", use_source = True):
562
  if use_source:
563
  return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
 
624
  def new_auto_history_filename(dirname):
625
  latest_file = get_latest_filepath(dirname)
626
  if latest_file:
627
+ with open(os.path.join(dirname, latest_file), 'r', encoding="utf-8") as f:
628
  if len(f.read()) == 0:
629
  return latest_file
630
  now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')