JohnSmith9982 commited on
Commit
b28a1a9
1 Parent(s): ac74dc0

Upload 85 files

Browse files
Files changed (39) hide show
  1. ChuanhuChatbot.py +8 -9
  2. README.md +1 -1
  3. assets/custom.css +80 -72
  4. assets/custom.js +4 -4
  5. history/2023-06-14_15-05-04.json +0 -0
  6. modules/__pycache__/config.cpython-311.pyc +0 -0
  7. modules/__pycache__/config.cpython-39.pyc +0 -0
  8. modules/__pycache__/index_func.cpython-311.pyc +0 -0
  9. modules/__pycache__/index_func.cpython-39.pyc +0 -0
  10. modules/__pycache__/llama_func.cpython-39.pyc +0 -0
  11. modules/__pycache__/overwrites.cpython-311.pyc +0 -0
  12. modules/__pycache__/overwrites.cpython-39.pyc +0 -0
  13. modules/__pycache__/pdf_func.cpython-311.pyc +0 -0
  14. modules/__pycache__/pdf_func.cpython-39.pyc +0 -0
  15. modules/__pycache__/presets.cpython-311.pyc +0 -0
  16. modules/__pycache__/presets.cpython-39.pyc +0 -0
  17. modules/__pycache__/shared.cpython-311.pyc +0 -0
  18. modules/__pycache__/shared.cpython-39.pyc +0 -0
  19. modules/__pycache__/utils.cpython-311.pyc +0 -0
  20. modules/__pycache__/utils.cpython-39.pyc +0 -0
  21. modules/config.py +16 -12
  22. modules/index_func.py +141 -0
  23. modules/models/ChuanhuAgent.py +216 -0
  24. modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc +0 -0
  25. modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc +0 -0
  26. modules/models/__pycache__/base_model.cpython-311.pyc +0 -0
  27. modules/models/__pycache__/base_model.cpython-39.pyc +0 -0
  28. modules/models/__pycache__/minimax.cpython-39.pyc +0 -0
  29. modules/models/__pycache__/models.cpython-311.pyc +0 -0
  30. modules/models/__pycache__/models.cpython-39.pyc +0 -0
  31. modules/models/base_model.py +140 -49
  32. modules/models/minimax.py +161 -0
  33. modules/models/models.py +13 -6
  34. modules/overwrites.py +20 -28
  35. modules/pdf_func.py +7 -7
  36. modules/presets.py +21 -14
  37. modules/shared.py +17 -8
  38. modules/utils.py +75 -11
  39. requirements.txt +13 -6
ChuanhuChatbot.py CHANGED
@@ -12,10 +12,10 @@ from modules.presets import *
12
  from modules.overwrites import *
13
  from modules.models.models import get_model
14
 
 
15
 
16
  gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
17
  gr.Chatbot.postprocess = postprocess
18
- PromptHelper.compact_text_chunks = compact_text_chunks
19
 
20
  with open("assets/custom.css", "r", encoding="utf-8") as f:
21
  customCSS = f.read()
@@ -89,7 +89,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
89
  with gr.Row():
90
  single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
91
  use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
92
- # render_latex_checkbox = gr.Checkbox(label=i18n("渲染LaTeX公式"), value=render_latex, interactive=True, elem_id="render_latex_checkbox")
93
  language_select_dropdown = gr.Dropdown(
94
  label=i18n("选择回复语言(针对搜索&索引功能)"),
95
  choices=REPLY_LANGUAGES,
@@ -98,6 +97,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
98
  )
99
  index_files = gr.Files(label=i18n("上传"), type="file")
100
  two_column = gr.Checkbox(label=i18n("双栏pdf"), value=advance_docs["pdf"].get("two_column", False))
 
101
  # TODO: 公式ocr
102
  # formula_ocr = gr.Checkbox(label=i18n("识别公式"), value=advance_docs["pdf"].get("formula_ocr", False))
103
 
@@ -161,7 +161,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
161
 
162
  with gr.Tab(label=i18n("高级")):
163
  gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
164
- gr.HTML(APPEARANCE_SWITCHER, elem_classes="insert_block")
165
  use_streaming_checkbox = gr.Checkbox(
166
  label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
167
  )
@@ -265,7 +265,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
265
  default_btn = gr.Button(i18n("🔙 恢复默认设置"))
266
 
267
  gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
268
- gr.HTML(FOOTER.format(versions=versions_html()), elem_id="footer")
269
 
270
  # https://github.com/gradio-app/gradio/pull/3296
271
  def create_greeting(request: gr.Request):
@@ -333,7 +333,8 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
333
  submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
334
  submitBtn.click(**get_usage_args)
335
 
336
- index_files.change(handle_file_upload, [current_model, index_files, chatbot], [index_files, chatbot, status_display])
 
337
 
338
  emptyBtn.click(
339
  reset,
@@ -467,8 +468,6 @@ demo.title = i18n("川虎Chat 🚀")
467
  if __name__ == "__main__":
468
  reload_javascript()
469
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
470
- favicon_path="./assets/favicon.ico",
 
471
  )
472
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
473
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码
474
- # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(auth=("在这里填写用户名", "在这里填写密码")) # 适合Nginx反向代理
12
  from modules.overwrites import *
13
  from modules.models.models import get_model
14
 
15
+ logging.getLogger("httpx").setLevel(logging.WARNING)
16
 
17
  gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
18
  gr.Chatbot.postprocess = postprocess
 
19
 
20
  with open("assets/custom.css", "r", encoding="utf-8") as f:
21
  customCSS = f.read()
89
  with gr.Row():
90
  single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
91
  use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
 
92
  language_select_dropdown = gr.Dropdown(
93
  label=i18n("选择回复语言(针对搜索&索引功能)"),
94
  choices=REPLY_LANGUAGES,
97
  )
98
  index_files = gr.Files(label=i18n("上传"), type="file")
99
  two_column = gr.Checkbox(label=i18n("双栏pdf"), value=advance_docs["pdf"].get("two_column", False))
100
+ summarize_btn = gr.Button(i18n("总结"))
101
  # TODO: 公式ocr
102
  # formula_ocr = gr.Checkbox(label=i18n("识别公式"), value=advance_docs["pdf"].get("formula_ocr", False))
103
 
161
 
162
  with gr.Tab(label=i18n("高级")):
163
  gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
164
+ gr.HTML(get_html("appearance_switcher.html").format(label=i18n("切换亮暗色主题")), elem_classes="insert_block")
165
  use_streaming_checkbox = gr.Checkbox(
166
  label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
167
  )
265
  default_btn = gr.Button(i18n("🔙 恢复默认设置"))
266
 
267
  gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
268
+ gr.HTML(get_html("footer.html").format(versions=versions_html()), elem_id="footer")
269
 
270
  # https://github.com/gradio-app/gradio/pull/3296
271
  def create_greeting(request: gr.Request):
333
  submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
334
  submitBtn.click(**get_usage_args)
335
 
336
+ index_files.change(handle_file_upload, [current_model, index_files, chatbot, language_select_dropdown], [index_files, chatbot, status_display])
337
+ summarize_btn.click(handle_summarize_index, [current_model, index_files, chatbot, language_select_dropdown], [chatbot, status_display])
338
 
339
  emptyBtn.click(
340
  reset,
468
  if __name__ == "__main__":
469
  reload_javascript()
470
  demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
471
+ blocked_paths=["config.json"],
472
+ favicon_path="./assets/favicon.ico"
473
  )
 
 
 
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🐯
4
  colorFrom: green
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.28.0
8
  app_file: ChuanhuChatbot.py
9
  pinned: false
10
  license: gpl-3.0
4
  colorFrom: green
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.33.1
8
  app_file: ChuanhuChatbot.py
9
  pinned: false
10
  license: gpl-3.0
assets/custom.css CHANGED
@@ -405,7 +405,7 @@ thead th {
405
  padding: .5em .2em;
406
  }
407
  /* 行内代码 */
408
- code {
409
  display: inline;
410
  white-space: break-spaces;
411
  border-radius: 6px;
@@ -414,13 +414,13 @@ code {
414
  background-color: rgba(175,184,193,0.2);
415
  }
416
  /* 代码块 */
417
- pre code {
418
  display: block;
419
  overflow: auto;
420
  white-space: pre;
421
  background-color: hsla(0, 0%, 0%, 80%)!important;
422
  border-radius: 10px;
423
- padding: 1.4em 1.2em 0em 1.4em;
424
  margin: 0.6em 2em 1em 0.2em;
425
  color: #FFF;
426
  box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
@@ -428,73 +428,81 @@ pre code {
428
  .message pre {
429
  padding: 0 !important;
430
  }
 
 
 
 
 
 
 
 
431
  /* 代码高亮样式 */
432
- .highlight .hll { background-color: #49483e }
433
- .highlight .c { color: #75715e } /* Comment */
434
- .highlight .err { color: #960050; background-color: #1e0010 } /* Error */
435
- .highlight .k { color: #66d9ef } /* Keyword */
436
- .highlight .l { color: #ae81ff } /* Literal */
437
- .highlight .n { color: #f8f8f2 } /* Name */
438
- .highlight .o { color: #f92672 } /* Operator */
439
- .highlight .p { color: #f8f8f2 } /* Punctuation */
440
- .highlight .ch { color: #75715e } /* Comment.Hashbang */
441
- .highlight .cm { color: #75715e } /* Comment.Multiline */
442
- .highlight .cp { color: #75715e } /* Comment.Preproc */
443
- .highlight .cpf { color: #75715e } /* Comment.PreprocFile */
444
- .highlight .c1 { color: #75715e } /* Comment.Single */
445
- .highlight .cs { color: #75715e } /* Comment.Special */
446
- .highlight .gd { color: #f92672 } /* Generic.Deleted */
447
- .highlight .ge { font-style: italic } /* Generic.Emph */
448
- .highlight .gi { color: #a6e22e } /* Generic.Inserted */
449
- .highlight .gs { font-weight: bold } /* Generic.Strong */
450
- .highlight .gu { color: #75715e } /* Generic.Subheading */
451
- .highlight .kc { color: #66d9ef } /* Keyword.Constant */
452
- .highlight .kd { color: #66d9ef } /* Keyword.Declaration */
453
- .highlight .kn { color: #f92672 } /* Keyword.Namespace */
454
- .highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
455
- .highlight .kr { color: #66d9ef } /* Keyword.Reserved */
456
- .highlight .kt { color: #66d9ef } /* Keyword.Type */
457
- .highlight .ld { color: #e6db74 } /* Literal.Date */
458
- .highlight .m { color: #ae81ff } /* Literal.Number */
459
- .highlight .s { color: #e6db74 } /* Literal.String */
460
- .highlight .na { color: #a6e22e } /* Name.Attribute */
461
- .highlight .nb { color: #f8f8f2 } /* Name.Builtin */
462
- .highlight .nc { color: #a6e22e } /* Name.Class */
463
- .highlight .no { color: #66d9ef } /* Name.Constant */
464
- .highlight .nd { color: #a6e22e } /* Name.Decorator */
465
- .highlight .ni { color: #f8f8f2 } /* Name.Entity */
466
- .highlight .ne { color: #a6e22e } /* Name.Exception */
467
- .highlight .nf { color: #a6e22e } /* Name.Function */
468
- .highlight .nl { color: #f8f8f2 } /* Name.Label */
469
- .highlight .nn { color: #f8f8f2 } /* Name.Namespace */
470
- .highlight .nx { color: #a6e22e } /* Name.Other */
471
- .highlight .py { color: #f8f8f2 } /* Name.Property */
472
- .highlight .nt { color: #f92672 } /* Name.Tag */
473
- .highlight .nv { color: #f8f8f2 } /* Name.Variable */
474
- .highlight .ow { color: #f92672 } /* Operator.Word */
475
- .highlight .w { color: #f8f8f2 } /* Text.Whitespace */
476
- .highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
477
- .highlight .mf { color: #ae81ff } /* Literal.Number.Float */
478
- .highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
479
- .highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
480
- .highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
481
- .highlight .sa { color: #e6db74 } /* Literal.String.Affix */
482
- .highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
483
- .highlight .sc { color: #e6db74 } /* Literal.String.Char */
484
- .highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
485
- .highlight .sd { color: #e6db74 } /* Literal.String.Doc */
486
- .highlight .s2 { color: #e6db74 } /* Literal.String.Double */
487
- .highlight .se { color: #ae81ff } /* Literal.String.Escape */
488
- .highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
489
- .highlight .si { color: #e6db74 } /* Literal.String.Interpol */
490
- .highlight .sx { color: #e6db74 } /* Literal.String.Other */
491
- .highlight .sr { color: #e6db74 } /* Literal.String.Regex */
492
- .highlight .s1 { color: #e6db74 } /* Literal.String.Single */
493
- .highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
494
- .highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
495
- .highlight .fm { color: #a6e22e } /* Name.Function.Magic */
496
- .highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
497
- .highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
498
- .highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
499
- .highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
500
- .highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
405
  padding: .5em .2em;
406
  }
407
  /* 行内代码 */
408
+ .message :not(pre) code {
409
  display: inline;
410
  white-space: break-spaces;
411
  border-radius: 6px;
414
  background-color: rgba(175,184,193,0.2);
415
  }
416
  /* 代码块 */
417
+ .message pre code {
418
  display: block;
419
  overflow: auto;
420
  white-space: pre;
421
  background-color: hsla(0, 0%, 0%, 80%)!important;
422
  border-radius: 10px;
423
+ padding: 1.2em 1em 0em .5em;
424
  margin: 0.6em 2em 1em 0.2em;
425
  color: #FFF;
426
  box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
428
  .message pre {
429
  padding: 0 !important;
430
  }
431
+ .message pre code div.highlight {
432
+ background-color: unset !important;
433
+ }
434
+
435
+ button.copy-button {
436
+ display: none;
437
+ }
438
+
439
  /* 代码高亮样式 */
440
+ .highlight .hll { background-color: #49483e !important }
441
+ .highlight .c { color: #75715e !important } /* Comment */
442
+ .highlight .err { color: #960050 !important; background-color: #1e0010 } /* Error */
443
+ .highlight .k { color: #66d9ef !important} /* Keyword */
444
+ .highlight .l { color: #ae81ff !important} /* Literal */
445
+ .highlight .n { color: #f8f8f2 !important} /* Name */
446
+ .highlight .o { color: #f92672 !important} /* Operator */
447
+ .highlight .p { color: #f8f8f2 !important} /* Punctuation */
448
+ .highlight .ch { color: #75715e !important} /* Comment.Hashbang */
449
+ .highlight .cm { color: #75715e !important} /* Comment.Multiline */
450
+ .highlight .cp { color: #75715e !important} /* Comment.Preproc */
451
+ .highlight .cpf { color: #75715e !important} /* Comment.PreprocFile */
452
+ .highlight .c1 { color: #75715e !important} /* Comment.Single */
453
+ .highlight .cs { color: #75715e !important} /* Comment.Special */
454
+ .highlight .gd { color: #f92672 !important} /* Generic.Deleted */
455
+ .highlight .ge { font-style: italic !important} /* Generic.Emph */
456
+ .highlight .gi { color: #a6e22e !important} /* Generic.Inserted */
457
+ .highlight .gs { font-weight: bold !important} /* Generic.Strong */
458
+ .highlight .gu { color: #75715e !important} /* Generic.Subheading */
459
+ .highlight .kc { color: #66d9ef !important} /* Keyword.Constant */
460
+ .highlight .kd { color: #66d9ef !important} /* Keyword.Declaration */
461
+ .highlight .kn { color: #f92672 !important} /* Keyword.Namespace */
462
+ .highlight .kp { color: #66d9ef !important} /* Keyword.Pseudo */
463
+ .highlight .kr { color: #66d9ef !important} /* Keyword.Reserved */
464
+ .highlight .kt { color: #66d9ef !important} /* Keyword.Type */
465
+ .highlight .ld { color: #e6db74 !important} /* Literal.Date */
466
+ .highlight .m { color: #ae81ff !important} /* Literal.Number */
467
+ .highlight .s { color: #e6db74 !important} /* Literal.String */
468
+ .highlight .na { color: #a6e22e !important} /* Name.Attribute */
469
+ .highlight .nb { color: #f8f8f2 !important} /* Name.Builtin */
470
+ .highlight .nc { color: #a6e22e !important} /* Name.Class */
471
+ .highlight .no { color: #66d9ef !important} /* Name.Constant */
472
+ .highlight .nd { color: #a6e22e !important} /* Name.Decorator */
473
+ .highlight .ni { color: #f8f8f2 !important} /* Name.Entity */
474
+ .highlight .ne { color: #a6e22e !important} /* Name.Exception */
475
+ .highlight .nf { color: #a6e22e !important} /* Name.Function */
476
+ .highlight .nl { color: #f8f8f2 !important} /* Name.Label */
477
+ .highlight .nn { color: #f8f8f2 !important} /* Name.Namespace */
478
+ .highlight .nx { color: #a6e22e !important} /* Name.Other */
479
+ .highlight .py { color: #f8f8f2 !important} /* Name.Property */
480
+ .highlight .nt { color: #f92672 !important} /* Name.Tag */
481
+ .highlight .nv { color: #f8f8f2 !important} /* Name.Variable */
482
+ .highlight .ow { color: #f92672 !important} /* Operator.Word */
483
+ .highlight .w { color: #f8f8f2 !important} /* Text.Whitespace */
484
+ .highlight .mb { color: #ae81ff !important} /* Literal.Number.Bin */
485
+ .highlight .mf { color: #ae81ff !important} /* Literal.Number.Float */
486
+ .highlight .mh { color: #ae81ff !important} /* Literal.Number.Hex */
487
+ .highlight .mi { color: #ae81ff !important} /* Literal.Number.Integer */
488
+ .highlight .mo { color: #ae81ff !important} /* Literal.Number.Oct */
489
+ .highlight .sa { color: #e6db74 !important} /* Literal.String.Affix */
490
+ .highlight .sb { color: #e6db74 !important} /* Literal.String.Backtick */
491
+ .highlight .sc { color: #e6db74 !important} /* Literal.String.Char */
492
+ .highlight .dl { color: #e6db74 !important} /* Literal.String.Delimiter */
493
+ .highlight .sd { color: #e6db74 !important} /* Literal.String.Doc */
494
+ .highlight .s2 { color: #e6db74 !important} /* Literal.String.Double */
495
+ .highlight .se { color: #ae81ff !important} /* Literal.String.Escape */
496
+ .highlight .sh { color: #e6db74 !important} /* Literal.String.Heredoc */
497
+ .highlight .si { color: #e6db74 !important} /* Literal.String.Interpol */
498
+ .highlight .sx { color: #e6db74 !important} /* Literal.String.Other */
499
+ .highlight .sr { color: #e6db74 !important} /* Literal.String.Regex */
500
+ .highlight .s1 { color: #e6db74 !important} /* Literal.String.Single */
501
+ .highlight .ss { color: #e6db74 !important} /* Literal.String.Symbol */
502
+ .highlight .bp { color: #f8f8f2 !important} /* Name.Builtin.Pseudo */
503
+ .highlight .fm { color: #a6e22e !important} /* Name.Function.Magic */
504
+ .highlight .vc { color: #f8f8f2 !important} /* Name.Variable.Class */
505
+ .highlight .vg { color: #f8f8f2 !important} /* Name.Variable.Global */
506
+ .highlight .vi { color: #f8f8f2 !important} /* Name.Variable.Instance */
507
+ .highlight .vm { color: #f8f8f2 !important} /* Name.Variable.Magic */
508
+ .highlight .il { color: #ae81ff !important} /* Literal.Number.Integer.Long */
assets/custom.js CHANGED
@@ -245,11 +245,11 @@ function showOrHideUserInfo() {
245
 
246
  function toggleDarkMode(isEnabled) {
247
  if (isEnabled) {
248
- gradioContainer.classList.add("dark");
249
- document.body.style.setProperty("background-color", "var(--neutral-950)", "important");
250
  } else {
251
- gradioContainer.classList.remove("dark");
252
- document.body.style.backgroundColor = "";
253
  }
254
  }
255
  function adjustDarkMode() {
245
 
246
  function toggleDarkMode(isEnabled) {
247
  if (isEnabled) {
248
+ document.body.classList.add("dark");
249
+ // document.body.style.setProperty("background-color", "var(--neutral-950)", "important");
250
  } else {
251
+ document.body.classList.remove("dark");
252
+ // document.body.style.backgroundColor = "";
253
  }
254
  }
255
  function adjustDarkMode() {
history/2023-06-14_15-05-04.json ADDED
File without changes
modules/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/config.cpython-311.pyc and b/modules/__pycache__/config.cpython-311.pyc differ
modules/__pycache__/config.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ
modules/__pycache__/index_func.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/index_func.cpython-311.pyc and b/modules/__pycache__/index_func.cpython-311.pyc differ
modules/__pycache__/index_func.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ
modules/__pycache__/llama_func.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/llama_func.cpython-39.pyc and b/modules/__pycache__/llama_func.cpython-39.pyc differ
modules/__pycache__/overwrites.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-311.pyc and b/modules/__pycache__/overwrites.cpython-311.pyc differ
modules/__pycache__/overwrites.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ
modules/__pycache__/pdf_func.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/pdf_func.cpython-311.pyc and b/modules/__pycache__/pdf_func.cpython-311.pyc differ
modules/__pycache__/pdf_func.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/pdf_func.cpython-39.pyc and b/modules/__pycache__/pdf_func.cpython-39.pyc differ
modules/__pycache__/presets.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/presets.cpython-311.pyc and b/modules/__pycache__/presets.cpython-311.pyc differ
modules/__pycache__/presets.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ
modules/__pycache__/shared.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/shared.cpython-311.pyc and b/modules/__pycache__/shared.cpython-311.pyc differ
modules/__pycache__/shared.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/shared.cpython-39.pyc and b/modules/__pycache__/shared.cpython-39.pyc differ
modules/__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/modules/__pycache__/utils.cpython-311.pyc and b/modules/__pycache__/utils.cpython-311.pyc differ
modules/__pycache__/utils.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/utils.cpython-39.pyc and b/modules/__pycache__/utils.cpython-39.pyc differ
modules/config.py CHANGED
@@ -18,13 +18,13 @@ __all__ = [
18
  "log_level",
19
  "advance_docs",
20
  "update_doc_config",
21
- "render_latex",
22
  "usage_limit",
23
  "multi_api_key",
24
  "server_name",
25
  "server_port",
26
  "share",
27
- "hide_history_when_not_logged_in"
 
28
  ]
29
 
30
  # 添加一个统一的config文件,避免文件过多造成的疑惑(优先级最低)
@@ -42,11 +42,11 @@ hide_history_when_not_logged_in = config.get("hide_history_when_not_logged_in",
42
 
43
  if os.path.exists("api_key.txt"):
44
  logging.info("检测到api_key.txt文件,正在进行迁移...")
45
- with open("api_key.txt", "r") as f:
46
  config["openai_api_key"] = f.read().strip()
47
  os.rename("api_key.txt", "api_key(deprecated).txt")
48
  with open("config.json", "w", encoding='utf-8') as f:
49
- json.dump(config, f, indent=4)
50
 
51
  if os.path.exists("auth.json"):
52
  logging.info("检测到auth.json文件,正在进行迁移...")
@@ -62,7 +62,7 @@ if os.path.exists("auth.json"):
62
  config["users"] = auth_list
63
  os.rename("auth.json", "auth(deprecated).json")
64
  with open("config.json", "w", encoding='utf-8') as f:
65
- json.dump(config, f, indent=4)
66
 
67
  ## 处理docker if we are running in Docker
68
  dockerflag = config.get("dockerflag", False)
@@ -76,12 +76,11 @@ my_api_key = os.environ.get("OPENAI_API_KEY", my_api_key)
76
  xmchat_api_key = config.get("xmchat_api_key", "")
77
  os.environ["XMCHAT_API_KEY"] = xmchat_api_key
78
 
79
- render_latex = config.get("render_latex", True)
 
 
 
80
 
81
- if render_latex:
82
- os.environ["RENDER_LATEX"] = "yes"
83
- else:
84
- os.environ["RENDER_LATEX"] = "no"
85
 
86
  usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
87
 
@@ -98,10 +97,15 @@ auth_list = config.get("users", []) # 实际上是使用者的列表
98
  authflag = len(auth_list) > 0 # 是否开启认证的状态值,改为判断auth_list长度
99
 
100
  # 处理自定义的api_host,优先读环境变量的配置,如果存在则自动装配
101
- api_host = os.environ.get("api_host", config.get("api_host", ""))
102
- if api_host:
103
  shared.state.set_api_host(api_host)
104
 
 
 
 
 
 
105
  @contextmanager
106
  def retrieve_openai_api(api_key = None):
107
  old_api_key = os.environ.get("OPENAI_API_KEY", "")
18
  "log_level",
19
  "advance_docs",
20
  "update_doc_config",
 
21
  "usage_limit",
22
  "multi_api_key",
23
  "server_name",
24
  "server_port",
25
  "share",
26
+ "hide_history_when_not_logged_in",
27
+ "default_chuanhu_assistant_model"
28
  ]
29
 
30
  # 添加一个统一的config文件,避免文件过多造成的疑惑(优先级最低)
42
 
43
  if os.path.exists("api_key.txt"):
44
  logging.info("检测到api_key.txt文件,正在进行迁移...")
45
+ with open("api_key.txt", "r", encoding="utf-8") as f:
46
  config["openai_api_key"] = f.read().strip()
47
  os.rename("api_key.txt", "api_key(deprecated).txt")
48
  with open("config.json", "w", encoding='utf-8') as f:
49
+ json.dump(config, f, indent=4, ensure_ascii=False)
50
 
51
  if os.path.exists("auth.json"):
52
  logging.info("检测到auth.json文件,正在进行迁移...")
62
  config["users"] = auth_list
63
  os.rename("auth.json", "auth(deprecated).json")
64
  with open("config.json", "w", encoding='utf-8') as f:
65
+ json.dump(config, f, indent=4, ensure_ascii=False)
66
 
67
  ## 处理docker if we are running in Docker
68
  dockerflag = config.get("dockerflag", False)
76
  xmchat_api_key = config.get("xmchat_api_key", "")
77
  os.environ["XMCHAT_API_KEY"] = xmchat_api_key
78
 
79
+ minimax_api_key = config.get("minimax_api_key", "")
80
+ os.environ["MINIMAX_API_KEY"] = minimax_api_key
81
+ minimax_group_id = config.get("minimax_group_id", "")
82
+ os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
83
 
 
 
 
 
84
 
85
  usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
86
 
97
  authflag = len(auth_list) > 0 # 是否开启认证的状态值,改为判断auth_list长度
98
 
99
  # 处理自定义的api_host,优先读环境变量的配置,如果存在则自动装配
100
+ api_host = os.environ.get("OPENAI_API_BASE", config.get("openai_api_base", None))
101
+ if api_host is not None:
102
  shared.state.set_api_host(api_host)
103
 
104
+ default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
105
+ for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
106
+ if config.get(x, None) is not None:
107
+ os.environ[x] = config[x]
108
+
109
  @contextmanager
110
  def retrieve_openai_api(api_key = None):
111
  old_api_key = os.environ.get("OPENAI_API_KEY", "")
modules/index_func.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ import colorama
5
+ import PyPDF2
6
+ from tqdm import tqdm
7
+
8
+ from modules.presets import *
9
+ from modules.utils import *
10
+ from modules.config import local_embedding
11
+
12
+
13
+ def get_index_name(file_src):
14
+ file_paths = [x.name for x in file_src]
15
+ file_paths.sort(key=lambda x: os.path.basename(x))
16
+
17
+ md5_hash = hashlib.md5()
18
+ for file_path in file_paths:
19
+ with open(file_path, "rb", encoding="utf-8") as f:
20
+ while chunk := f.read(8192):
21
+ md5_hash.update(chunk)
22
+
23
+ return md5_hash.hexdigest()
24
+
25
+
26
+ def get_documents(file_src):
27
+ from langchain.schema import Document
28
+ from langchain.text_splitter import TokenTextSplitter
29
+ text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
30
+
31
+ documents = []
32
+ logging.debug("Loading documents...")
33
+ logging.debug(f"file_src: {file_src}")
34
+ for file in file_src:
35
+ filepath = file.name
36
+ filename = os.path.basename(filepath)
37
+ file_type = os.path.splitext(filename)[1]
38
+ logging.info(f"loading file: {filename}")
39
+ try:
40
+ if file_type == ".pdf":
41
+ logging.debug("Loading PDF...")
42
+ try:
43
+ from modules.pdf_func import parse_pdf
44
+ from modules.config import advance_docs
45
+
46
+ two_column = advance_docs["pdf"].get("two_column", False)
47
+ pdftext = parse_pdf(filepath, two_column).text
48
+ except:
49
+ pdftext = ""
50
+ with open(filepath, "rb", encoding="utf-8") as pdfFileObj:
51
+ pdfReader = PyPDF2.PdfReader(pdfFileObj)
52
+ for page in tqdm(pdfReader.pages):
53
+ pdftext += page.extract_text()
54
+ texts = [Document(page_content=pdftext, metadata={"source": filepath})]
55
+ elif file_type == ".docx":
56
+ logging.debug("Loading Word...")
57
+ from langchain.document_loaders import UnstructuredWordDocumentLoader
58
+ loader = UnstructuredWordDocumentLoader(filepath)
59
+ texts = loader.load()
60
+ elif file_type == ".pptx":
61
+ logging.debug("Loading PowerPoint...")
62
+ from langchain.document_loaders import UnstructuredPowerPointLoader
63
+ loader = UnstructuredPowerPointLoader(filepath)
64
+ texts = loader.load()
65
+ elif file_type == ".epub":
66
+ logging.debug("Loading EPUB...")
67
+ from langchain.document_loaders import UnstructuredEPubLoader
68
+ loader = UnstructuredEPubLoader(filepath)
69
+ texts = loader.load()
70
+ elif file_type == ".xlsx":
71
+ logging.debug("Loading Excel...")
72
+ text_list = excel_to_string(filepath)
73
+ texts = []
74
+ for elem in text_list:
75
+ texts.append(Document(page_content=elem, metadata={"source": filepath}))
76
+ else:
77
+ logging.debug("Loading text file...")
78
+ from langchain.document_loaders import TextLoader
79
+ loader = TextLoader(filepath, "utf8")
80
+ texts = loader.load()
81
+ except Exception as e:
82
+ import traceback
83
+ logging.error(f"Error loading file: {filename}")
84
+ traceback.print_exc()
85
+
86
+ texts = text_splitter.split_documents(texts)
87
+ documents.extend(texts)
88
+ logging.debug("Documents loaded.")
89
+ return documents
90
+
91
+
92
+ def construct_index(
93
+ api_key,
94
+ file_src,
95
+ max_input_size=4096,
96
+ num_outputs=5,
97
+ max_chunk_overlap=20,
98
+ chunk_size_limit=600,
99
+ embedding_limit=None,
100
+ separator=" ",
101
+ ):
102
+ from langchain.chat_models import ChatOpenAI
103
+ from langchain.vectorstores import FAISS
104
+
105
+ if api_key:
106
+ os.environ["OPENAI_API_KEY"] = api_key
107
+ else:
108
+ # 由于一个依赖的愚蠢的设计,这里必须要有一个API KEY
109
+ os.environ["OPENAI_API_KEY"] = "sk-xxxxxxx"
110
+ chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
111
+ embedding_limit = None if embedding_limit == 0 else embedding_limit
112
+ separator = " " if separator == "" else separator
113
+
114
+ index_name = get_index_name(file_src)
115
+ index_path = f"./index/{index_name}"
116
+ if local_embedding:
117
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
118
+ embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2")
119
+ else:
120
+ from langchain.embeddings import OpenAIEmbeddings
121
+ embeddings = OpenAIEmbeddings(openai_api_base=os.environ.get("OPENAI_API_BASE", None), openai_api_key=os.environ.get("OPENAI_EMBEDDING_API_KEY", api_key))
122
+ if os.path.exists(index_path):
123
+ logging.info("找到了缓存的索引文件,加载中……")
124
+ return FAISS.load_local(index_path, embeddings)
125
+ else:
126
+ try:
127
+ documents = get_documents(file_src)
128
+ logging.info("构建索引中……")
129
+ with retrieve_proxy():
130
+ index = FAISS.from_documents(documents, embeddings)
131
+ logging.debug("索引构建完成!")
132
+ os.makedirs("./index", exist_ok=True)
133
+ index.save_local(index_path)
134
+ logging.debug("索引已保存至本地!")
135
+ return index
136
+
137
+ except Exception as e:
138
+ import traceback
139
+ logging.error("索引构建失败!%s", e)
140
+ traceback.print_exc()
141
+ return None
modules/models/ChuanhuAgent.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.summarize import load_summarize_chain
2
+ from langchain import PromptTemplate, LLMChain
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.text_splitter import TokenTextSplitter
6
+ from langchain.embeddings import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.agents import load_tools
10
+ from langchain.agents import initialize_agent
11
+ from langchain.agents import AgentType
12
+ from langchain.docstore.document import Document
13
+ from langchain.tools import BaseTool, StructuredTool, Tool, tool
14
+ from langchain.callbacks.stdout import StdOutCallbackHandler
15
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
16
+ from langchain.callbacks.manager import BaseCallbackManager
17
+ from duckduckgo_search import DDGS
18
+ from itertools import islice
19
+
20
+ from typing import Any, Dict, List, Optional, Union
21
+
22
+ from langchain.callbacks.base import BaseCallbackHandler
23
+ from langchain.input import print_text
24
+ from langchain.schema import AgentAction, AgentFinish, LLMResult
25
+
26
+ from pydantic import BaseModel, Field
27
+
28
+ import requests
29
+ from bs4 import BeautifulSoup
30
+ from threading import Thread, Condition
31
+ from collections import deque
32
+
33
+ from .base_model import BaseLLMModel, CallbackToIterator, ChuanhuCallbackHandler
34
+ from ..config import default_chuanhu_assistant_model
35
+ from ..presets import SUMMARIZE_PROMPT, i18n
36
+ from ..index_func import construct_index
37
+
38
+ from langchain.callbacks import get_openai_callback
39
+ import os
40
+ import gradio as gr
41
+ import logging
42
+
43
+ class GoogleSearchInput(BaseModel):
44
+ keywords: str = Field(description="keywords to search")
45
+
46
+ class WebBrowsingInput(BaseModel):
47
+ url: str = Field(description="URL of a webpage")
48
+
49
+ class WebAskingInput(BaseModel):
50
+ url: str = Field(description="URL of a webpage")
51
+ question: str = Field(description="Question that you want to know the answer to, based on the webpage's content.")
52
+
53
+
54
+ class ChuanhuAgent_Client(BaseLLMModel):
55
+ def __init__(self, model_name, openai_api_key, user_name="") -> None:
56
+ super().__init__(model_name=model_name, user=user_name)
57
+ self.text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
58
+ self.api_key = openai_api_key
59
+ self.llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name=default_chuanhu_assistant_model, openai_api_base=os.environ.get("OPENAI_API_BASE", None))
60
+ self.cheap_llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name="gpt-3.5-turbo", openai_api_base=os.environ.get("OPENAI_API_BASE", None))
61
+ PROMPT = PromptTemplate(template=SUMMARIZE_PROMPT, input_variables=["text"])
62
+ self.summarize_chain = load_summarize_chain(self.cheap_llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
63
+ self.index_summary = None
64
+ self.index = None
65
+ if "Pro" in self.model_name:
66
+ self.tools = load_tools(["google-search-results-json", "llm-math", "arxiv", "wikipedia", "wolfram-alpha"], llm=self.llm)
67
+ else:
68
+ self.tools = load_tools(["ddg-search", "llm-math", "arxiv", "wikipedia"], llm=self.llm)
69
+ self.tools.append(
70
+ Tool.from_function(
71
+ func=self.google_search_simple,
72
+ name="Google Search JSON",
73
+ description="useful when you need to search the web.",
74
+ args_schema=GoogleSearchInput
75
+ )
76
+ )
77
+
78
+ self.tools.append(
79
+ Tool.from_function(
80
+ func=self.summary_url,
81
+ name="Summary Webpage",
82
+ description="useful when you need to know the overall content of a webpage.",
83
+ args_schema=WebBrowsingInput
84
+ )
85
+ )
86
+
87
+ self.tools.append(
88
+ StructuredTool.from_function(
89
+ func=self.ask_url,
90
+ name="Ask Webpage",
91
+ description="useful when you need to ask detailed questions about a webpage.",
92
+ args_schema=WebAskingInput
93
+ )
94
+ )
95
+
96
+ def google_search_simple(self, query):
97
+ results = []
98
+ with DDGS() as ddgs:
99
+ ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
100
+ for r in islice(ddgs_gen, 10):
101
+ results.append({
102
+ "title": r["title"],
103
+ "link": r["href"],
104
+ "snippet": r["body"]
105
+ })
106
+ return str(results)
107
+
108
+ def handle_file_upload(self, files, chatbot, language):
109
+ """if the model accepts multi modal input, implement this function"""
110
+ status = gr.Markdown.update()
111
+ if files:
112
+ index = construct_index(self.api_key, file_src=files)
113
+ assert index is not None, "获取索引失败"
114
+ self.index = index
115
+ status = i18n("索引构建完成")
116
+ # Summarize the document
117
+ logging.info(i18n("生成内容总结中……"))
118
+ with get_openai_callback() as cb:
119
+ os.environ["OPENAI_API_KEY"] = self.api_key
120
+ from langchain.chains.summarize import load_summarize_chain
121
+ from langchain.prompts import PromptTemplate
122
+ from langchain.chat_models import ChatOpenAI
123
+ prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
124
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
125
+ llm = ChatOpenAI()
126
+ chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
127
+ summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
128
+ logging.info(f"Summary: {summary}")
129
+ self.index_summary = summary
130
+ chatbot.append((f"Uploaded {len(files)} files", summary))
131
+ logging.info(cb)
132
+ return gr.Files.update(), chatbot, status
133
+
134
+ def query_index(self, query):
135
+ if self.index is not None:
136
+ retriever = self.index.as_retriever()
137
+ qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever)
138
+ return qa.run(query)
139
+ else:
140
+ "Error during query."
141
+
142
+ def summary(self, text):
143
+ texts = Document(page_content=text)
144
+ texts = self.text_splitter.split_documents([texts])
145
+ return self.summarize_chain({"input_documents": texts}, return_only_outputs=True)["output_text"]
146
+
147
+ def fetch_url_content(self, url):
148
+ response = requests.get(url)
149
+ soup = BeautifulSoup(response.text, 'html.parser')
150
+
151
+ # 提取所有的文本
152
+ text = ''.join(s.getText() for s in soup.find_all('p'))
153
+ logging.info(f"Extracted text from {url}")
154
+ return text
155
+
156
+ def summary_url(self, url):
157
+ text = self.fetch_url_content(url)
158
+ if text == "":
159
+ return "URL unavailable."
160
+ text_summary = self.summary(text)
161
+ url_content = "webpage content summary:\n" + text_summary
162
+
163
+ return url_content
164
+
165
+ def ask_url(self, url, question):
166
+ text = self.fetch_url_content(url)
167
+ if text == "":
168
+ return "URL unavailable."
169
+ texts = Document(page_content=text)
170
+ texts = self.text_splitter.split_documents([texts])
171
+ # use embedding
172
+ embeddings = OpenAIEmbeddings(openai_api_key=self.api_key, openai_api_base=os.environ.get("OPENAI_API_BASE", None))
173
+
174
+ # create vectorstore
175
+ db = FAISS.from_documents(texts, embeddings)
176
+ retriever = db.as_retriever()
177
+ qa = RetrievalQA.from_chain_type(llm=self.cheap_llm, chain_type="stuff", retriever=retriever)
178
+ return qa.run(f"{question} Reply in 中文")
179
+
180
+ def get_answer_at_once(self):
181
+ question = self.history[-1]["content"]
182
+ # llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
183
+ agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
184
+ reply = agent.run(input=f"{question} Reply in 简体中文")
185
+ return reply, -1
186
+
187
+ def get_answer_stream_iter(self):
188
+ question = self.history[-1]["content"]
189
+ it = CallbackToIterator()
190
+ manager = BaseCallbackManager(handlers=[ChuanhuCallbackHandler(it.callback)])
191
+ def thread_func():
192
+ tools = self.tools
193
+ if self.index is not None:
194
+ tools.append(
195
+ Tool.from_function(
196
+ func=self.query_index,
197
+ name="Query Knowledge Base",
198
+ description=f"useful when you need to know about: {self.index_summary}",
199
+ args_schema=WebBrowsingInput
200
+ )
201
+ )
202
+ agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager)
203
+ try:
204
+ reply = agent.run(input=f"{question} Reply in 简体中文")
205
+ except Exception as e:
206
+ import traceback
207
+ traceback.print_exc()
208
+ reply = str(e)
209
+ it.callback(reply)
210
+ it.finish()
211
+ t = Thread(target=thread_func)
212
+ t.start()
213
+ partial_text = ""
214
+ for value in it:
215
+ partial_text += value
216
+ yield partial_text
modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc CHANGED
Binary files a/modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc and b/modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc differ
modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc CHANGED
Binary files a/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc and b/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc differ
modules/models/__pycache__/base_model.cpython-311.pyc CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-311.pyc and b/modules/models/__pycache__/base_model.cpython-311.pyc differ
modules/models/__pycache__/base_model.cpython-39.pyc CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ
modules/models/__pycache__/minimax.cpython-39.pyc ADDED
Binary file (4.35 kB). View file
modules/models/__pycache__/models.cpython-311.pyc CHANGED
Binary files a/modules/models/__pycache__/models.cpython-311.pyc and b/modules/models/__pycache__/models.cpython-311.pyc differ
modules/models/__pycache__/models.cpython-39.pyc CHANGED
Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ
modules/models/base_model.py CHANGED
@@ -13,17 +13,110 @@ import pathlib
13
 
14
  from tqdm import tqdm
15
  import colorama
16
- from duckduckgo_search import ddg
 
17
  import asyncio
18
  import aiohttp
19
  from enum import Enum
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  from ..presets import *
22
- from ..llama_func import *
23
  from ..utils import *
24
  from .. import shared
25
  from ..config import retrieve_proxy
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  class ModelType(Enum):
29
  Unknown = -1
@@ -34,6 +127,8 @@ class ModelType(Enum):
34
  StableLM = 4
35
  MOSS = 5
36
  YuanAI = 6
 
 
37
 
38
  @classmethod
39
  def get_type(cls, model_name: str):
@@ -53,6 +148,10 @@ class ModelType(Enum):
53
  model_type = ModelType.MOSS
54
  elif "yuanai" in model_name_lower:
55
  model_type = ModelType.YuanAI
 
 
 
 
56
  else:
57
  model_type = ModelType.Unknown
58
  return model_type
@@ -146,6 +245,8 @@ class BaseLLMModel:
146
 
147
  stream_iter = self.get_answer_stream_iter()
148
 
 
 
149
  for partial_text in stream_iter:
150
  chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
151
  self.all_token_counts[-1] += 1
@@ -178,67 +279,54 @@ class BaseLLMModel:
178
  status_text = self.token_message()
179
  return chatbot, status_text
180
 
181
- def handle_file_upload(self, files, chatbot):
182
  """if the model accepts multi modal input, implement this function"""
183
  status = gr.Markdown.update()
184
  if files:
185
- construct_index(self.api_key, file_src=files)
186
- status = "索引构建完成"
187
  return gr.Files.update(), chatbot, status
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
190
  fake_inputs = None
191
  display_append = []
192
  limited_context = False
193
  fake_inputs = real_inputs
194
  if files:
195
- from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
196
- from llama_index.indices.query.schema import QueryBundle
197
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
198
- from langchain.chat_models import ChatOpenAI
199
- from llama_index import (
200
- GPTSimpleVectorIndex,
201
- ServiceContext,
202
- LangchainEmbedding,
203
- OpenAIEmbedding,
204
- )
205
  limited_context = True
206
  msg = "加载索引中……"
207
  logging.info(msg)
208
- # yield chatbot + [(inputs, "")], msg
209
  index = construct_index(self.api_key, file_src=files)
210
  assert index is not None, "获取索引失败"
211
  msg = "索引获取成功,生成回答中……"
212
  logging.info(msg)
213
- if local_embedding or self.model_type != ModelType.OpenAI:
214
- embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2"))
215
- else:
216
- embed_model = OpenAIEmbedding()
217
- # yield chatbot + [(inputs, "")], msg
218
  with retrieve_proxy():
219
- prompt_helper = PromptHelper(
220
- max_input_size=4096,
221
- num_output=5,
222
- max_chunk_overlap=20,
223
- chunk_size_limit=600,
224
- )
225
- from llama_index import ServiceContext
226
-
227
- service_context = ServiceContext.from_defaults(
228
- prompt_helper=prompt_helper, embed_model=embed_model
229
- )
230
- query_object = GPTVectorStoreIndexQuery(
231
- index.index_struct,
232
- service_context=service_context,
233
- similarity_top_k=5,
234
- vector_store=index._vector_store,
235
- docstore=index._docstore,
236
- response_synthesizer=None
237
- )
238
- query_bundle = QueryBundle(real_inputs)
239
- nodes = query_object.retrieve(query_bundle)
240
- reference_results = [n.node.text for n in nodes]
241
- reference_results = add_source_numbers(reference_results, use_source=False)
242
  display_append = add_details(reference_results)
243
  display_append = "\n\n" + "".join(display_append)
244
  real_inputs = (
@@ -248,16 +336,19 @@ class BaseLLMModel:
248
  .replace("{reply_language}", reply_language)
249
  )
250
  elif use_websearch:
251
- limited_context = True
252
- search_results = ddg(real_inputs, max_results=5)
 
 
 
253
  reference_results = []
254
  for idx, result in enumerate(search_results):
255
  logging.debug(f"搜索结果{idx + 1}:{result}")
256
- domain_name = urllib3.util.parse_url(result["href"]).host
257
- reference_results.append([result["body"], result["href"]])
258
  display_append.append(
259
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
260
- f"<li><a href=\"{result['href']}\" target=\"_blank\">{domain_name}</a></li>\n"
261
  )
262
  reference_results = add_source_numbers(reference_results)
263
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
@@ -550,7 +641,7 @@ class BaseLLMModel:
550
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
551
  else:
552
  history_file_path = filename
553
- with open(history_file_path, "r") as f:
554
  json_s = json.load(f)
555
  try:
556
  if type(json_s["history"][0]) == str:
13
 
14
  from tqdm import tqdm
15
  import colorama
16
+ from duckduckgo_search import DDGS
17
+ from itertools import islice
18
  import asyncio
19
  import aiohttp
20
  from enum import Enum
21
 
22
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
23
+ from langchain.callbacks.manager import BaseCallbackManager
24
+
25
+ from typing import Any, Dict, List, Optional, Union
26
+
27
+ from langchain.callbacks.base import BaseCallbackHandler
28
+ from langchain.input import print_text
29
+ from langchain.schema import AgentAction, AgentFinish, LLMResult
30
+ from threading import Thread, Condition
31
+ from collections import deque
32
+
33
  from ..presets import *
34
+ from ..index_func import *
35
  from ..utils import *
36
  from .. import shared
37
  from ..config import retrieve_proxy
38
 
39
+ class CallbackToIterator:
40
+ def __init__(self):
41
+ self.queue = deque()
42
+ self.cond = Condition()
43
+ self.finished = False
44
+
45
+ def callback(self, result):
46
+ with self.cond:
47
+ self.queue.append(result)
48
+ self.cond.notify() # Wake up the generator.
49
+
50
+ def __iter__(self):
51
+ return self
52
+
53
+ def __next__(self):
54
+ with self.cond:
55
+ while not self.queue and not self.finished: # Wait for a value to be added to the queue.
56
+ self.cond.wait()
57
+ if not self.queue:
58
+ raise StopIteration()
59
+ return self.queue.popleft()
60
+
61
+ def finish(self):
62
+ with self.cond:
63
+ self.finished = True
64
+ self.cond.notify() # Wake up the generator if it's waiting.
65
+
66
+ def get_action_description(text):
67
+ match = re.search('```(.*?)```', text, re.S)
68
+ json_text = match.group(1)
69
+ # 把json转化为python字典
70
+ json_dict = json.loads(json_text)
71
+ # 提取'action'和'action_input'的值
72
+ action_name = json_dict['action']
73
+ action_input = json_dict['action_input']
74
+ if action_name != "Final Answer":
75
+ return f'<p style="font-size: smaller; color: gray;">{action_name}: {action_input}</p>'
76
+ else:
77
+ return ""
78
+
79
+ class ChuanhuCallbackHandler(BaseCallbackHandler):
80
+
81
+ def __init__(self, callback) -> None:
82
+ """Initialize callback handler."""
83
+ self.callback = callback
84
+
85
+ def on_agent_action(
86
+ self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
87
+ ) -> Any:
88
+ self.callback(get_action_description(action.log))
89
+
90
+ def on_tool_end(
91
+ self,
92
+ output: str,
93
+ color: Optional[str] = None,
94
+ observation_prefix: Optional[str] = None,
95
+ llm_prefix: Optional[str] = None,
96
+ **kwargs: Any,
97
+ ) -> None:
98
+ """If not the final action, print out observation."""
99
+ # if observation_prefix is not None:
100
+ # self.callback(f"\n\n{observation_prefix}")
101
+ # self.callback(output)
102
+ # if llm_prefix is not None:
103
+ # self.callback(f"\n\n{llm_prefix}")
104
+ if observation_prefix is not None:
105
+ logging.info(observation_prefix)
106
+ self.callback(output)
107
+ if llm_prefix is not None:
108
+ logging.info(llm_prefix)
109
+
110
+ def on_agent_finish(
111
+ self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
112
+ ) -> None:
113
+ # self.callback(f"{finish.log}\n\n")
114
+ logging.info(finish.log)
115
+
116
+ def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
117
+ """Run on new LLM token. Only available when streaming is enabled."""
118
+ self.callback(token)
119
+
120
 
121
  class ModelType(Enum):
122
  Unknown = -1
127
  StableLM = 4
128
  MOSS = 5
129
  YuanAI = 6
130
+ Minimax = 7
131
+ ChuanhuAgent = 8
132
 
133
  @classmethod
134
  def get_type(cls, model_name: str):
148
  model_type = ModelType.MOSS
149
  elif "yuanai" in model_name_lower:
150
  model_type = ModelType.YuanAI
151
+ elif "minimax" in model_name_lower:
152
+ model_type = ModelType.Minimax
153
+ elif "川虎助理" in model_name_lower:
154
+ model_type = ModelType.ChuanhuAgent
155
  else:
156
  model_type = ModelType.Unknown
157
  return model_type
245
 
246
  stream_iter = self.get_answer_stream_iter()
247
 
248
+ if display_append:
249
+ display_append = "<hr>" +display_append
250
  for partial_text in stream_iter:
251
  chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
252
  self.all_token_counts[-1] += 1
279
  status_text = self.token_message()
280
  return chatbot, status_text
281
 
282
+ def handle_file_upload(self, files, chatbot, language):
283
  """if the model accepts multi modal input, implement this function"""
284
  status = gr.Markdown.update()
285
  if files:
286
+ index = construct_index(self.api_key, file_src=files)
287
+ status = i18n("索引构建完成")
288
  return gr.Files.update(), chatbot, status
289
 
290
+ def summarize_index(self, files, chatbot, language):
291
+ status = gr.Markdown.update()
292
+ if files:
293
+ index = construct_index(self.api_key, file_src=files)
294
+ status = i18n("总结完成")
295
+ logging.info(i18n("生成内容总结中……"))
296
+ os.environ["OPENAI_API_KEY"] = self.api_key
297
+ from langchain.chains.summarize import load_summarize_chain
298
+ from langchain.prompts import PromptTemplate
299
+ from langchain.chat_models import ChatOpenAI
300
+ from langchain.callbacks import StdOutCallbackHandler
301
+ prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
302
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
303
+ llm = ChatOpenAI()
304
+ chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
305
+ summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
306
+ print(i18n("总结") + f": {summary}")
307
+ chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
308
+ return chatbot, status
309
+
310
  def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
311
  fake_inputs = None
312
  display_append = []
313
  limited_context = False
314
  fake_inputs = real_inputs
315
  if files:
 
 
316
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
317
+ from langchain.vectorstores.base import VectorStoreRetriever
 
 
 
 
 
 
318
  limited_context = True
319
  msg = "加载索引中……"
320
  logging.info(msg)
 
321
  index = construct_index(self.api_key, file_src=files)
322
  assert index is not None, "获取索引失败"
323
  msg = "索引获取成功,生成回答中……"
324
  logging.info(msg)
 
 
 
 
 
325
  with retrieve_proxy():
326
+ retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold",search_kwargs={"k":6, "score_threshold": 0.5})
327
+ relevant_documents = retriever.get_relevant_documents(real_inputs)
328
+ reference_results = [[d.page_content.strip("�"), os.path.basename(d.metadata["source"])] for d in relevant_documents]
329
+ reference_results = add_source_numbers(reference_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  display_append = add_details(reference_results)
331
  display_append = "\n\n" + "".join(display_append)
332
  real_inputs = (
336
  .replace("{reply_language}", reply_language)
337
  )
338
  elif use_websearch:
339
+ search_results = []
340
+ with DDGS() as ddgs:
341
+ ddgs_gen = ddgs.text(real_inputs, backend="lite")
342
+ for r in islice(ddgs_gen, 10):
343
+ search_results.append(r)
344
  reference_results = []
345
  for idx, result in enumerate(search_results):
346
  logging.debug(f"搜索结果{idx + 1}:{result}")
347
+ domain_name = urllib3.util.parse_url(result['href']).host
348
+ reference_results.append([result['body'], result['href']])
349
  display_append.append(
350
  # f"{idx+1}. [{domain_name}]({result['href']})\n"
351
+ f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
352
  )
353
  reference_results = add_source_numbers(reference_results)
354
  display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
641
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
642
  else:
643
  history_file_path = filename
644
+ with open(history_file_path, "r", encoding="utf-8") as f:
645
  json_s = json.load(f)
646
  try:
647
  if type(json_s["history"][0]) == str:
modules/models/minimax.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import colorama
5
+ import requests
6
+ import logging
7
+
8
+ from modules.models.base_model import BaseLLMModel
9
+ from modules.presets import STANDARD_ERROR_MSG, GENERAL_ERROR_MSG, TIMEOUT_STREAMING, TIMEOUT_ALL, i18n
10
+
11
+ group_id = os.environ.get("MINIMAX_GROUP_ID", "")
12
+
13
+
14
+ class MiniMax_Client(BaseLLMModel):
15
+ """
16
+ MiniMax Client
17
+ 接口文档见 https://api.minimax.chat/document/guides/chat
18
+ """
19
+
20
+ def __init__(self, model_name, api_key, user_name="", system_prompt=None):
21
+ super().__init__(model_name=model_name, user=user_name)
22
+ self.url = f'https://api.minimax.chat/v1/text/chatcompletion?GroupId={group_id}'
23
+ self.history = []
24
+ self.api_key = api_key
25
+ self.system_prompt = system_prompt
26
+ self.headers = {
27
+ "Authorization": f"Bearer {api_key}",
28
+ "Content-Type": "application/json"
29
+ }
30
+
31
+ def get_answer_at_once(self):
32
+ # minimax temperature is (0,1] and base model temperature is [0,2], and yuan 0.9 == base 1 so need to convert
33
+ temperature = self.temperature * 0.9 if self.temperature <= 1 else 0.9 + (self.temperature - 1) / 10
34
+
35
+ request_body = {
36
+ "model": self.model_name.replace('minimax-', ''),
37
+ "temperature": temperature,
38
+ "skip_info_mask": True,
39
+ 'messages': [{"sender_type": "USER", "text": self.history[-1]['content']}]
40
+ }
41
+ if self.n_choices:
42
+ request_body['beam_width'] = self.n_choices
43
+ if self.system_prompt:
44
+ request_body['prompt'] = self.system_prompt
45
+ if self.max_generation_token:
46
+ request_body['tokens_to_generate'] = self.max_generation_token
47
+ if self.top_p:
48
+ request_body['top_p'] = self.top_p
49
+
50
+ response = requests.post(self.url, headers=self.headers, json=request_body)
51
+
52
+ res = response.json()
53
+ answer = res['reply']
54
+ total_token_count = res["usage"]["total_tokens"]
55
+ return answer, total_token_count
56
+
57
+ def get_answer_stream_iter(self):
58
+ response = self._get_response(stream=True)
59
+ if response is not None:
60
+ iter = self._decode_chat_response(response)
61
+ partial_text = ""
62
+ for i in iter:
63
+ partial_text += i
64
+ yield partial_text
65
+ else:
66
+ yield STANDARD_ERROR_MSG + GENERAL_ERROR_MSG
67
+
68
+ def _get_response(self, stream=False):
69
+ minimax_api_key = self.api_key
70
+ history = self.history
71
+ logging.debug(colorama.Fore.YELLOW +
72
+ f"{history}" + colorama.Fore.RESET)
73
+ headers = {
74
+ "Content-Type": "application/json",
75
+ "Authorization": f"Bearer {minimax_api_key}",
76
+ }
77
+
78
+ temperature = self.temperature * 0.9 if self.temperature <= 1 else 0.9 + (self.temperature - 1) / 10
79
+
80
+ messages = []
81
+ for msg in self.history:
82
+ if msg['role'] == 'user':
83
+ messages.append({"sender_type": "USER", "text": msg['content']})
84
+ else:
85
+ messages.append({"sender_type": "BOT", "text": msg['content']})
86
+
87
+ request_body = {
88
+ "model": self.model_name.replace('minimax-', ''),
89
+ "temperature": temperature,
90
+ "skip_info_mask": True,
91
+ 'messages': messages
92
+ }
93
+ if self.n_choices:
94
+ request_body['beam_width'] = self.n_choices
95
+ if self.system_prompt:
96
+ lines = self.system_prompt.splitlines()
97
+ if lines[0].find(":") != -1 and len(lines[0]) < 20:
98
+ request_body["role_meta"] = {
99
+ "user_name": lines[0].split(":")[0],
100
+ "bot_name": lines[0].split(":")[1]
101
+ }
102
+ lines.pop()
103
+ request_body["prompt"] = "\n".join(lines)
104
+ if self.max_generation_token:
105
+ request_body['tokens_to_generate'] = self.max_generation_token
106
+ else:
107
+ request_body['tokens_to_generate'] = 512
108
+ if self.top_p:
109
+ request_body['top_p'] = self.top_p
110
+
111
+ if stream:
112
+ timeout = TIMEOUT_STREAMING
113
+ request_body['stream'] = True
114
+ request_body['use_standard_sse'] = True
115
+ else:
116
+ timeout = TIMEOUT_ALL
117
+ try:
118
+ response = requests.post(
119
+ self.url,
120
+ headers=headers,
121
+ json=request_body,
122
+ stream=stream,
123
+ timeout=timeout,
124
+ )
125
+ except:
126
+ return None
127
+
128
+ return response
129
+
130
+ def _decode_chat_response(self, response):
131
+ error_msg = ""
132
+ for chunk in response.iter_lines():
133
+ if chunk:
134
+ chunk = chunk.decode()
135
+ chunk_length = len(chunk)
136
+ print(chunk)
137
+ try:
138
+ chunk = json.loads(chunk[6:])
139
+ except json.JSONDecodeError:
140
+ print(i18n("JSON解析错误,��到的内容: ") + f"{chunk}")
141
+ error_msg += chunk
142
+ continue
143
+ if chunk_length > 6 and "delta" in chunk["choices"][0]:
144
+ if "finish_reason" in chunk["choices"][0] and chunk["choices"][0]["finish_reason"] == "stop":
145
+ self.all_token_counts.append(chunk["usage"]["total_tokens"] - sum(self.all_token_counts))
146
+ break
147
+ try:
148
+ yield chunk["choices"][0]["delta"]
149
+ except Exception as e:
150
+ logging.error(f"Error: {e}")
151
+ continue
152
+ if error_msg:
153
+ try:
154
+ error_msg = json.loads(error_msg)
155
+ if 'base_resp' in error_msg:
156
+ status_code = error_msg['base_resp']['status_code']
157
+ status_msg = error_msg['base_resp']['status_msg']
158
+ raise Exception(f"{status_code} - {status_msg}")
159
+ except json.JSONDecodeError:
160
+ pass
161
+ raise Exception(error_msg)
modules/models/models.py CHANGED
@@ -15,14 +15,13 @@ from PIL import Image
15
 
16
  from tqdm import tqdm
17
  import colorama
18
- from duckduckgo_search import ddg
19
  import asyncio
20
  import aiohttp
21
  from enum import Enum
22
  import uuid
23
 
24
  from ..presets import *
25
- from ..llama_func import *
26
  from ..utils import *
27
  from .. import shared
28
  from ..config import retrieve_proxy, usage_limit
@@ -339,7 +338,7 @@ class LLaMA_Client(BaseLLMModel):
339
  pipeline_args = InferencerArguments(
340
  local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
341
 
342
- with open(pipeline_args.deepspeed, "r") as f:
343
  ds_config = json.load(f)
344
  LLAMA_MODEL = AutoModel.get_model(
345
  model_args,
@@ -494,7 +493,7 @@ class XMChat(BaseLLMModel):
494
  limited_context = False
495
  return limited_context, fake_inputs, display_append, real_inputs, chatbot
496
 
497
- def handle_file_upload(self, files, chatbot):
498
  """if the model accepts multi modal input, implement this function"""
499
  if files:
500
  for file in files:
@@ -557,6 +556,7 @@ def get_model(
557
  config.local_embedding = True
558
  # del current_model.model
559
  model = None
 
560
  try:
561
  if model_type == ModelType.OpenAI:
562
  logging.info(f"正在加载OpenAI模型: {model_name}")
@@ -602,10 +602,17 @@ def get_model(
602
  elif model_type == ModelType.YuanAI:
603
  from .inspurai import Yuan_Client
604
  model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
 
 
 
 
 
 
 
 
605
  elif model_type == ModelType.Unknown:
606
  raise ValueError(f"未知模型: {model_name}")
607
  logging.info(msg)
608
- chatbot = gr.Chatbot.update(label=model_name)
609
  except Exception as e:
610
  logging.error(e)
611
  msg = f"{STANDARD_ERROR_MSG}: {e}"
@@ -616,7 +623,7 @@ def get_model(
616
 
617
 
618
  if __name__ == "__main__":
619
- with open("config.json", "r") as f:
620
  openai_api_key = cjson.load(f)["openai_api_key"]
621
  # set logging level to debug
622
  logging.basicConfig(level=logging.DEBUG)
15
 
16
  from tqdm import tqdm
17
  import colorama
 
18
  import asyncio
19
  import aiohttp
20
  from enum import Enum
21
  import uuid
22
 
23
  from ..presets import *
24
+ from ..index_func import *
25
  from ..utils import *
26
  from .. import shared
27
  from ..config import retrieve_proxy, usage_limit
338
  pipeline_args = InferencerArguments(
339
  local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
340
 
341
+ with open(pipeline_args.deepspeed, "r", encoding="utf-8") as f:
342
  ds_config = json.load(f)
343
  LLAMA_MODEL = AutoModel.get_model(
344
  model_args,
493
  limited_context = False
494
  return limited_context, fake_inputs, display_append, real_inputs, chatbot
495
 
496
+ def handle_file_upload(self, files, chatbot, language):
497
  """if the model accepts multi modal input, implement this function"""
498
  if files:
499
  for file in files:
556
  config.local_embedding = True
557
  # del current_model.model
558
  model = None
559
+ chatbot = gr.Chatbot.update(label=model_name)
560
  try:
561
  if model_type == ModelType.OpenAI:
562
  logging.info(f"正在加载OpenAI模型: {model_name}")
602
  elif model_type == ModelType.YuanAI:
603
  from .inspurai import Yuan_Client
604
  model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
605
+ elif model_type == ModelType.Minimax:
606
+ from .minimax import MiniMax_Client
607
+ if os.environ.get("MINIMAX_API_KEY") != "":
608
+ access_key = os.environ.get("MINIMAX_API_KEY")
609
+ model = MiniMax_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
610
+ elif model_type == ModelType.ChuanhuAgent:
611
+ from .ChuanhuAgent import ChuanhuAgent_Client
612
+ model = ChuanhuAgent_Client(model_name, access_key, user_name=user_name)
613
  elif model_type == ModelType.Unknown:
614
  raise ValueError(f"未知模型: {model_name}")
615
  logging.info(msg)
 
616
  except Exception as e:
617
  logging.error(e)
618
  msg = f"{STANDARD_ERROR_MSG}: {e}"
623
 
624
 
625
  if __name__ == "__main__":
626
+ with open("config.json", "r", encoding="utf-8") as f:
627
  openai_api_key = cjson.load(f)["openai_api_key"]
628
  # set logging level to debug
629
  logging.basicConfig(level=logging.DEBUG)
modules/overwrites.py CHANGED
@@ -1,23 +1,13 @@
1
  from __future__ import annotations
2
  import logging
3
 
4
- from llama_index import Prompt
5
  from typing import List, Tuple
6
- import mdtex2html
7
  from gradio_client import utils as client_utils
 
 
8
 
9
  from modules.presets import *
10
- from modules.llama_func import *
11
- from modules.config import render_latex
12
-
13
- def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]:
14
- logging.debug("Compacting text chunks...🚀🚀🚀")
15
- combined_str = [c.strip() for c in text_chunks if c.strip()]
16
- combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
17
- combined_str = "\n\n".join(combined_str)
18
- # resplit based on self.max_chunk_overlap
19
- text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
20
- return text_splitter.split_text(combined_str)
21
 
22
 
23
  def postprocess(
@@ -50,14 +40,18 @@ def postprocess(
50
  return processed_messages
51
 
52
  def postprocess_chat_messages(
53
- self, chat_message: str | Tuple | List | None, message_type: str
54
- ) -> str | Dict | None:
55
  if chat_message is None:
56
  return None
57
  elif isinstance(chat_message, (tuple, list)):
58
- filepath = chat_message[0]
 
 
 
 
 
59
  mime_type = client_utils.get_mimetype(filepath)
60
- filepath = self.make_temp_copy_if_needed(filepath)
61
  return {
62
  "name": filepath,
63
  "mime_type": mime_type,
@@ -66,12 +60,13 @@ def postprocess_chat_messages(
66
  "is_file": True,
67
  }
68
  elif isinstance(chat_message, str):
69
- if message_type == "bot":
70
- if not detect_converted_mark(chat_message):
71
- chat_message = convert_mdtext(chat_message)
72
- elif message_type == "user":
73
- if not detect_converted_mark(chat_message):
74
- chat_message = convert_asis(chat_message)
 
75
  return chat_message
76
  else:
77
  raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
@@ -85,11 +80,8 @@ with open("./assets/custom.js", "r", encoding="utf-8") as f, \
85
  def reload_javascript():
86
  print("Reloading javascript...")
87
  js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
88
- if render_latex:
89
- js += """\
90
- <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-MML-AM_CHTML"></script>
91
- <script type="text/x-mathjax-config">MathJax.Hub.Config({skipStartupTypeset: false, tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']],displayMath: [['$$','$$'], ['\\[','\\]']]}});</script>
92
- """
93
  def template_response(*args, **kwargs):
94
  res = GradioTemplateResponseOriginal(*args, **kwargs)
95
  res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
1
  from __future__ import annotations
2
  import logging
3
 
 
4
  from typing import List, Tuple
 
5
  from gradio_client import utils as client_utils
6
+ from gradio import utils
7
+ import inspect
8
 
9
  from modules.presets import *
10
+ from modules.index_func import *
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  def postprocess(
40
  return processed_messages
41
 
42
  def postprocess_chat_messages(
43
+ self, chat_message: str | tuple | list | None, role: str
44
+ ) -> str | dict | None:
45
  if chat_message is None:
46
  return None
47
  elif isinstance(chat_message, (tuple, list)):
48
+ file_uri = chat_message[0]
49
+ if utils.validate_url(file_uri):
50
+ filepath = file_uri
51
+ else:
52
+ filepath = self.make_temp_copy_if_needed(file_uri)
53
+
54
  mime_type = client_utils.get_mimetype(filepath)
 
55
  return {
56
  "name": filepath,
57
  "mime_type": mime_type,
60
  "is_file": True,
61
  }
62
  elif isinstance(chat_message, str):
63
+ # chat_message = inspect.cleandoc(chat_message)
64
+ # escape html spaces
65
+ # chat_message = chat_message.replace(" ", "&nbsp;")
66
+ if role == "bot":
67
+ chat_message = convert_bot_before_marked(chat_message)
68
+ elif role == "user":
69
+ chat_message = convert_user_before_marked(chat_message)
70
  return chat_message
71
  else:
72
  raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
80
  def reload_javascript():
81
  print("Reloading javascript...")
82
  js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
83
+ # if render_latex:
84
+ # js += """\"""
 
 
 
85
  def template_response(*args, **kwargs):
86
  res = GradioTemplateResponseOriginal(*args, **kwargs)
87
  res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
modules/pdf_func.py CHANGED
@@ -1,11 +1,11 @@
1
  from types import SimpleNamespace
2
  import pdfplumber
3
  import logging
4
- from llama_index import Document
5
 
6
  def prepare_table_config(crop_page):
7
  """Prepare table查找边界, 要求page为原始page
8
-
9
  From https://github.com/jsvine/pdfplumber/issues/242
10
  """
11
  page = crop_page.root_page # root/parent
@@ -60,7 +60,7 @@ def get_title_with_cropped_page(first_page):
60
  title_bottom = word.bottom
61
  elif word.text == "Abstract": # 获取页面abstract
62
  top = word.top
63
-
64
  user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
65
  # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
66
  return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
@@ -75,7 +75,7 @@ def get_column_cropped_pages(pages, two_column=True):
75
  new_pages.append(right)
76
  else:
77
  new_pages.append(page)
78
-
79
  return new_pages
80
 
81
  def parse_pdf(filename, two_column = True):
@@ -94,7 +94,7 @@ def parse_pdf(filename, two_column = True):
94
  name_top=name_top,
95
  name_bottom=name_bottom,
96
  record_chapter_name = True,
97
-
98
  page_start=page_start,
99
  page_stop=None,
100
 
@@ -114,7 +114,7 @@ def parse_pdf(filename, two_column = True):
114
  if word.size >= 11: # 出现chapter name
115
  if cur_chapter is None:
116
  cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
117
- elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
118
  # 不再继续写chapter name
119
  cur_chapter.page_stop = page.page_number # stop id
120
  chapters.append(cur_chapter)
@@ -143,7 +143,7 @@ def parse_pdf(filename, two_column = True):
143
  text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
144
 
145
  logging.getLogger().setLevel(level)
146
- return Document(text=text, extra_info={"title": title})
147
 
148
  BASE_POINTS = """
149
  1. Who are the authors?
1
  from types import SimpleNamespace
2
  import pdfplumber
3
  import logging
4
+ from langchain.docstore.document import Document
5
 
6
  def prepare_table_config(crop_page):
7
  """Prepare table查找边界, 要求page为原始page
8
+
9
  From https://github.com/jsvine/pdfplumber/issues/242
10
  """
11
  page = crop_page.root_page # root/parent
60
  title_bottom = word.bottom
61
  elif word.text == "Abstract": # 获取页面abstract
62
  top = word.top
63
+
64
  user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
65
  # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
66
  return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
75
  new_pages.append(right)
76
  else:
77
  new_pages.append(page)
78
+
79
  return new_pages
80
 
81
  def parse_pdf(filename, two_column = True):
94
  name_top=name_top,
95
  name_bottom=name_bottom,
96
  record_chapter_name = True,
97
+
98
  page_start=page_start,
99
  page_stop=None,
100
 
114
  if word.size >= 11: # 出现chapter name
115
  if cur_chapter is None:
116
  cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
117
+ elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
118
  # 不再继续写chapter name
119
  cur_chapter.page_stop = page.page_number # stop id
120
  chapters.append(cur_chapter)
143
  text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
144
 
145
  logging.getLogger().setLevel(level)
146
+ return Document(page_content=text, metadata={"title": title})
147
 
148
  BASE_POINTS = """
149
  1. Who are the authors?
modules/presets.py CHANGED
@@ -46,32 +46,27 @@ CHUANHU_TITLE = i18n("川虎Chat 🚀")
46
 
47
  CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
48
 
49
- FOOTER = """<div class="versions">{versions}</div>"""
50
-
51
- APPEARANCE_SWITCHER = """
52
- <div style="display: flex; justify-content: space-between;">
53
- <span style="margin-top: 4px !important;">"""+ i18n("切换亮暗色主题") + """</span>
54
- <span><label class="apSwitch" for="checkbox">
55
- <input type="checkbox" id="checkbox">
56
- <div class="apSlider"></div>
57
- </label></span>
58
- </div>
59
- """
60
-
61
- SUMMARIZE_PROMPT = "你是谁?我们刚才聊了什么?" # 总结对话时的 prompt
62
 
63
  ONLINE_MODELS = [
64
  "gpt-3.5-turbo",
 
65
  "gpt-3.5-turbo-0301",
 
66
  "gpt-4",
67
  "gpt-4-0314",
 
68
  "gpt-4-32k",
69
  "gpt-4-32k-0314",
 
 
 
70
  "xmchat",
71
  "yuanai-1.0-base_10B",
72
  "yuanai-1.0-translate",
73
  "yuanai-1.0-dialog",
74
  "yuanai-1.0-rhythm_poems",
 
 
75
  ]
76
 
77
  LOCAL_MODELS = [
@@ -103,11 +98,15 @@ for dir_name in os.listdir("models"):
103
 
104
  MODEL_TOKEN_LIMIT = {
105
  "gpt-3.5-turbo": 4096,
 
106
  "gpt-3.5-turbo-0301": 4096,
 
107
  "gpt-4": 8192,
108
  "gpt-4-0314": 8192,
 
109
  "gpt-4-32k": 32768,
110
- "gpt-4-32k-0314": 32768
 
111
  }
112
 
113
  TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
@@ -164,6 +163,12 @@ Reply in {reply_language}
164
  If the context isn't useful, return the original answer.
165
  """
166
 
 
 
 
 
 
 
167
  ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
168
 
169
  small_and_beautiful_theme = gr.themes.Soft(
@@ -230,4 +235,6 @@ small_and_beautiful_theme = gr.themes.Soft(
230
  block_title_background_fill_dark="*primary_900",
231
  block_label_background_fill_dark="*primary_900",
232
  input_background_fill="#F6F6F6",
 
 
233
  )
46
 
47
  CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  ONLINE_MODELS = [
51
  "gpt-3.5-turbo",
52
+ "gpt-3.5-turbo-16k",
53
  "gpt-3.5-turbo-0301",
54
+ "gpt-3.5-turbo-0613",
55
  "gpt-4",
56
  "gpt-4-0314",
57
+ "gpt-4-0613",
58
  "gpt-4-32k",
59
  "gpt-4-32k-0314",
60
+ "gpt-4-32k-0613",
61
+ "川虎助理",
62
+ "川虎助理 Pro",
63
  "xmchat",
64
  "yuanai-1.0-base_10B",
65
  "yuanai-1.0-translate",
66
  "yuanai-1.0-dialog",
67
  "yuanai-1.0-rhythm_poems",
68
+ "minimax-abab4-chat",
69
+ "minimax-abab5-chat",
70
  ]
71
 
72
  LOCAL_MODELS = [
98
 
99
  MODEL_TOKEN_LIMIT = {
100
  "gpt-3.5-turbo": 4096,
101
+ "gpt-3.5-turbo-16k": 16384,
102
  "gpt-3.5-turbo-0301": 4096,
103
+ "gpt-3.5-turbo-0613": 4096,
104
  "gpt-4": 8192,
105
  "gpt-4-0314": 8192,
106
+ "gpt-4-0613": 8192,
107
  "gpt-4-32k": 32768,
108
+ "gpt-4-32k-0314": 32768,
109
+ "gpt-4-32k-0613": 32768
110
  }
111
 
112
  TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
163
  If the context isn't useful, return the original answer.
164
  """
165
 
166
+ SUMMARIZE_PROMPT = """Write a concise summary of the following:
167
+
168
+ {text}
169
+
170
+ CONCISE SUMMARY IN 中文:"""
171
+
172
  ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
173
 
174
  small_and_beautiful_theme = gr.themes.Soft(
235
  block_title_background_fill_dark="*primary_900",
236
  block_label_background_fill_dark="*primary_900",
237
  input_background_fill="#F6F6F6",
238
+ chatbot_code_background_color="*neutral_950",
239
+ chatbot_code_background_color_dark="*neutral_950",
240
  )
modules/shared.py CHANGED
@@ -1,6 +1,7 @@
1
  from modules.presets import COMPLETION_URL, BALANCE_API_URL, USAGE_API_URL, API_HOST
2
  import os
3
  import queue
 
4
 
5
  class State:
6
  interrupted = False
@@ -15,23 +16,28 @@ class State:
15
  def recover(self):
16
  self.interrupted = False
17
 
18
- def set_api_host(self, api_host):
19
- self.completion_url = f"https://{api_host}/v1/chat/completions"
20
- self.balance_api_url = f"https://{api_host}/dashboard/billing/credit_grants"
21
- self.usage_api_url = f"https://{api_host}/dashboard/billing/usage"
22
- os.environ["OPENAI_API_BASE"] = f"https://{api_host}/v1"
 
 
 
 
 
23
 
24
  def reset_api_host(self):
25
  self.completion_url = COMPLETION_URL
26
  self.balance_api_url = BALANCE_API_URL
27
  self.usage_api_url = USAGE_API_URL
28
- os.environ["OPENAI_API_BASE"] = f"https://{API_HOST}/v1"
29
  return API_HOST
30
 
31
  def reset_all(self):
32
  self.interrupted = False
33
  self.completion_url = COMPLETION_URL
34
-
35
  def set_api_key_queue(self, api_key_list):
36
  self.multi_api_key = True
37
  self.api_key_queue = queue.Queue()
@@ -50,6 +56,9 @@ class State:
50
  return ret
51
 
52
  return wrapped
53
-
54
 
55
  state = State()
 
 
 
1
  from modules.presets import COMPLETION_URL, BALANCE_API_URL, USAGE_API_URL, API_HOST
2
  import os
3
  import queue
4
+ import openai
5
 
6
  class State:
7
  interrupted = False
16
  def recover(self):
17
  self.interrupted = False
18
 
19
+ def set_api_host(self, api_host: str):
20
+ api_host = api_host.rstrip("/")
21
+ if not api_host.startswith("http"):
22
+ api_host = f"https://{api_host}"
23
+ if api_host.endswith("/v1"):
24
+ api_host = api_host[:-3]
25
+ self.completion_url = f"{api_host}/v1/chat/completions"
26
+ self.balance_api_url = f"{api_host}/dashboard/billing/credit_grants"
27
+ self.usage_api_url = f"{api_host}/dashboard/billing/usage"
28
+ os.environ["OPENAI_API_BASE"] = api_host
29
 
30
  def reset_api_host(self):
31
  self.completion_url = COMPLETION_URL
32
  self.balance_api_url = BALANCE_API_URL
33
  self.usage_api_url = USAGE_API_URL
34
+ os.environ["OPENAI_API_BASE"] = f"https://{API_HOST}"
35
  return API_HOST
36
 
37
  def reset_all(self):
38
  self.interrupted = False
39
  self.completion_url = COMPLETION_URL
40
+
41
  def set_api_key_queue(self, api_key_list):
42
  self.multi_api_key = True
43
  self.api_key_queue = queue.Queue()
56
  return ret
57
 
58
  return wrapped
59
+
60
 
61
  state = State()
62
+
63
+ modules_path = os.path.dirname(os.path.realpath(__file__))
64
+ chuanhu_path = os.path.dirname(modules_path)
modules/utils.py CHANGED
@@ -16,7 +16,6 @@ import subprocess
16
  import gradio as gr
17
  from pypinyin import lazy_pinyin
18
  import tiktoken
19
- import mdtex2html
20
  from markdown import markdown
21
  from pygments import highlight
22
  from pygments.lexers import get_lexer_by_name
@@ -116,6 +115,9 @@ def set_single_turn(current_model, *args):
116
  def handle_file_upload(current_model, *args):
117
  return current_model.handle_file_upload(*args)
118
 
 
 
 
119
  def like(current_model, *args):
120
  return current_model.like(*args)
121
 
@@ -130,7 +132,7 @@ def count_token(message):
130
  return length
131
 
132
 
133
- def markdown_to_html_with_syntax_highlight(md_str):
134
  def replacer(match):
135
  lang = match.group(1) or "text"
136
  code = match.group(2)
@@ -152,7 +154,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
152
  return html_str
153
 
154
 
155
- def normalize_markdown(md_text: str) -> str:
156
  lines = md_text.split("\n")
157
  normalized_lines = []
158
  inside_list = False
@@ -176,7 +178,7 @@ def normalize_markdown(md_text: str) -> str:
176
  return "\n".join(normalized_lines)
177
 
178
 
179
- def convert_mdtext(md_text):
180
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
181
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
182
  code_blocks = code_block_pattern.findall(md_text)
@@ -200,15 +202,70 @@ def convert_mdtext(md_text):
200
  output += ALREADY_CONVERTED_MARK
201
  return output
202
 
203
-
204
- def convert_asis(userinput):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  return (
206
  f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
207
  + ALREADY_CONVERTED_MARK
208
  )
209
 
210
 
211
- def detect_converted_mark(userinput):
212
  try:
213
  if userinput.endswith(ALREADY_CONVERTED_MARK):
214
  return True
@@ -218,7 +275,7 @@ def detect_converted_mark(userinput):
218
  return True
219
 
220
 
221
- def detect_language(code):
222
  if code.startswith("\n"):
223
  first_line = ""
224
  else:
@@ -253,8 +310,8 @@ def save_file(filename, system, history, chatbot, user_name):
253
  history_file_path = filename
254
  else:
255
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
256
- with open(history_file_path, "w") as f:
257
- json.dump(json_s, f)
258
  elif filename.endswith(".md"):
259
  md_s = f"system: \n- {system} \n"
260
  for data in history:
@@ -494,6 +551,13 @@ def versions_html():
494
  <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
495
  """
496
 
 
 
 
 
 
 
 
497
  def add_source_numbers(lst, source_name = "Source", use_source = True):
498
  if use_source:
499
  return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
@@ -560,7 +624,7 @@ def toggle_like_btn_visibility(selected_model_name):
560
  def new_auto_history_filename(dirname):
561
  latest_file = get_latest_filepath(dirname)
562
  if latest_file:
563
- with open(os.path.join(dirname, latest_file), 'r') as f:
564
  if len(f.read()) == 0:
565
  return latest_file
566
  now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
16
  import gradio as gr
17
  from pypinyin import lazy_pinyin
18
  import tiktoken
 
19
  from markdown import markdown
20
  from pygments import highlight
21
  from pygments.lexers import get_lexer_by_name
115
  def handle_file_upload(current_model, *args):
116
  return current_model.handle_file_upload(*args)
117
 
118
+ def handle_summarize_index(current_model, *args):
119
+ return current_model.summarize_index(*args)
120
+
121
  def like(current_model, *args):
122
  return current_model.like(*args)
123
 
132
  return length
133
 
134
 
135
+ def markdown_to_html_with_syntax_highlight(md_str): # deprecated
136
  def replacer(match):
137
  lang = match.group(1) or "text"
138
  code = match.group(2)
154
  return html_str
155
 
156
 
157
+ def normalize_markdown(md_text: str) -> str: # deprecated
158
  lines = md_text.split("\n")
159
  normalized_lines = []
160
  inside_list = False
178
  return "\n".join(normalized_lines)
179
 
180
 
181
+ def convert_mdtext(md_text): # deprecated
182
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
183
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
184
  code_blocks = code_block_pattern.findall(md_text)
202
  output += ALREADY_CONVERTED_MARK
203
  return output
204
 
205
+ def convert_bot_before_marked(chat_message):
206
+ """
207
+ 注意不能给输出加缩进, 否则会被marked解析成代码块
208
+ """
209
+ if '<div class="md-message">' in chat_message:
210
+ return chat_message
211
+ else:
212
+ code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
213
+ code_blocks = code_block_pattern.findall(chat_message)
214
+ non_code_parts = code_block_pattern.split(chat_message)[::2]
215
+ result = []
216
+
217
+ raw = f'<div class="raw-message hideM">{escape_markdown(chat_message)}</div>'
218
+ for non_code, code in zip(non_code_parts, code_blocks + [""]):
219
+ if non_code.strip():
220
+ result.append(non_code)
221
+ if code.strip():
222
+ code = f"\n```{code}\n```"
223
+ result.append(code)
224
+ result = "".join(result)
225
+ md = f'<div class="md-message">{result}\n</div>'
226
+ return raw + md
227
+
228
+ def convert_user_before_marked(chat_message):
229
+ if '<div class="user-message">' in chat_message:
230
+ return chat_message
231
+ else:
232
+ return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
233
+
234
+ def escape_markdown(text):
235
+ """
236
+ Escape Markdown special characters to HTML-safe equivalents.
237
+ """
238
+ escape_chars = {
239
+ ' ': '&nbsp;',
240
+ '_': '&#95;',
241
+ '*': '&#42;',
242
+ '[': '&#91;',
243
+ ']': '&#93;',
244
+ '(': '&#40;',
245
+ ')': '&#41;',
246
+ '{': '&#123;',
247
+ '}': '&#125;',
248
+ '#': '&#35;',
249
+ '+': '&#43;',
250
+ '-': '&#45;',
251
+ '.': '&#46;',
252
+ '!': '&#33;',
253
+ '`': '&#96;',
254
+ '>': '&#62;',
255
+ '<': '&#60;',
256
+ '|': '&#124;'
257
+ }
258
+ return ''.join(escape_chars.get(c, c) for c in text)
259
+
260
+
261
+ def convert_asis(userinput): # deprecated
262
  return (
263
  f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
264
  + ALREADY_CONVERTED_MARK
265
  )
266
 
267
 
268
+ def detect_converted_mark(userinput): # deprecated
269
  try:
270
  if userinput.endswith(ALREADY_CONVERTED_MARK):
271
  return True
275
  return True
276
 
277
 
278
+ def detect_language(code): # deprecated
279
  if code.startswith("\n"):
280
  first_line = ""
281
  else:
310
  history_file_path = filename
311
  else:
312
  history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
313
+ with open(history_file_path, "w", encoding='utf-8') as f:
314
+ json.dump(json_s, f, ensure_ascii=False)
315
  elif filename.endswith(".md"):
316
  md_s = f"system: \n- {system} \n"
317
  for data in history:
551
  <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
552
  """
553
 
554
+ def get_html(filename):
555
+ path = os.path.join(shared.chuanhu_path, "assets", "html", filename)
556
+ if os.path.exists(path):
557
+ with open(path, encoding="utf8") as file:
558
+ return file.read()
559
+ return ""
560
+
561
  def add_source_numbers(lst, source_name = "Source", use_source = True):
562
  if use_source:
563
  return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
624
  def new_auto_history_filename(dirname):
625
  latest_file = get_latest_filepath(dirname)
626
  if latest_file:
627
+ with open(os.path.join(dirname, latest_file), 'r', encoding="utf-8") as f:
628
  if len(f.read()) == 0:
629
  return latest_file
630
  now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
requirements.txt CHANGED
@@ -1,18 +1,25 @@
1
- gradio==3.28.0
2
- gradio_client==0.1.4
3
- mdtex2html
4
  pypinyin
5
  tiktoken
6
  socksio
7
  tqdm
8
  colorama
9
- duckduckgo_search==2.9.5
10
  Pygments
11
- llama_index==0.5.25
12
- langchain<0.0.150
13
  markdown
14
  PyPDF2
15
  pdfplumber
16
  pandas
17
  commentjson
18
  openpyxl
 
 
 
 
 
 
 
 
 
1
+ gradio==3.33.1
2
+ gradio_client==0.2.5
 
3
  pypinyin
4
  tiktoken
5
  socksio
6
  tqdm
7
  colorama
8
+ googlesearch-python
9
  Pygments
10
+ langchain==0.0.173
 
11
  markdown
12
  PyPDF2
13
  pdfplumber
14
  pandas
15
  commentjson
16
  openpyxl
17
+ pandoc
18
+ wolframalpha
19
+ faiss-cpu
20
+ duckduckgo-search
21
+ arxiv
22
+ wikipedia
23
+ google.generativeai
24
+ openai
25
+ unstructured