JunchuanYu commited on
Commit
6ffa208
1 Parent(s): a1fbe28

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +40 -23
utils.py CHANGED
@@ -1,5 +1,6 @@
1
  # -*- coding:utf-8 -*-
2
  from __future__ import annotations
 
3
  import logging
4
  import json
5
  import os
@@ -10,12 +11,10 @@ import gradio as gr
10
  from pypinyin import lazy_pinyin
11
  import tiktoken
12
  import mdtex2html
13
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
14
  from markdown import markdown
15
  from pygments import highlight
16
  from pygments.lexers import get_lexer_by_name
17
  from pygments.formatters import HtmlFormatter
18
-
19
 
20
  use_websearch_checkbox=False
21
  use_streaming_checkbox=True
@@ -60,25 +59,6 @@ The default model role of the app is the original assistant of ChatGPT, but you
60
 
61
  MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-0301",]
62
 
63
- def postprocess(
64
- self, y: List[Tuple[str | None, str | None]]
65
- ) -> List[Tuple[str | None, str | None]]:
66
- """
67
- Parameters:
68
- y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
69
- Returns:
70
- List of tuples representing the message and response. Each message and response will be a string of HTML.
71
- """
72
- if y is None:
73
- return []
74
- for i, (message, response) in enumerate(y):
75
- y[i] = (
76
- # None if message is None else markdown.markdown(message),
77
- # None if response is None else markdown.markdown(response),
78
- None if message is None else message,
79
- None if response is None else mdtex2html.convert(response),
80
- )
81
- return y
82
 
83
  if TYPE_CHECKING:
84
  from typing import TypedDict
@@ -94,6 +74,7 @@ def count_token(message):
94
  length = len(encoding.encode(input_str))
95
  return length
96
 
 
97
  def markdown_to_html_with_syntax_highlight(md_str):
98
  def replacer(match):
99
  lang = match.group(1) or "text"
@@ -115,6 +96,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
115
  html_str = markdown(md_str)
116
  return html_str
117
 
 
118
  def normalize_markdown(md_text: str) -> str:
119
  lines = md_text.split("\n")
120
  normalized_lines = []
@@ -138,6 +120,24 @@ def normalize_markdown(md_text: str) -> str:
138
 
139
  return "\n".join(normalized_lines)
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def convert_mdtext(md_text):
142
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
143
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
@@ -153,8 +153,6 @@ def convert_mdtext(md_text):
153
  else:
154
  result.append(mdtex2html.convert(non_code, extensions=["tables"]))
155
  if code.strip():
156
- # _, code = detect_language(code) # 暂时去除代码高亮功能,因为在大段代码的情况下会出现问题
157
- # code = code.replace("\n\n", "\n") # 暂时去除代码中的空行,因为在大段代码的情况下会出现问题
158
  code = f"```{code}\n\n```"
159
  code = markdown_to_html_with_syntax_highlight(code)
160
  result.append(code)
@@ -184,6 +182,7 @@ def construct_assistant(text):
184
  def construct_token_message(token, stream=False):
185
  return f"Token count: {token}"
186
 
 
187
  def save_file(filename, system, history, chatbot):
188
  logging.info("saving......")
189
  os.makedirs(HISTORY_DIR, exist_ok=True)
@@ -198,8 +197,10 @@ def save_file(filename, system, history, chatbot):
198
  md_s += f"\n{data['role']}: \n- {data['content']} \n"
199
  with open(os.path.join(HISTORY_DIR, filename), "w", encoding="utf8") as f:
200
  f.write(md_s)
 
201
  return os.path.join(HISTORY_DIR, filename)
202
 
 
203
  def save_chat_history(filename, system, history, chatbot):
204
  if filename == "":
205
  return
@@ -207,6 +208,7 @@ def save_chat_history(filename, system, history, chatbot):
207
  filename += ".json"
208
  return save_file(filename, system, history, chatbot)
209
 
 
210
  def export_markdown(filename, system, history, chatbot):
211
  if filename == "":
212
  return
@@ -214,7 +216,9 @@ def export_markdown(filename, system, history, chatbot):
214
  filename += ".md"
215
  return save_file(filename, system, history, chatbot)
216
 
 
217
  def load_chat_history(filename, system, history, chatbot):
 
218
  if type(filename) != str:
219
  filename = filename.name
220
  try:
@@ -222,6 +226,7 @@ def load_chat_history(filename, system, history, chatbot):
222
  json_s = json.load(f)
223
  try:
224
  if type(json_s["history"][0]) == str:
 
225
  new_history = []
226
  for index, item in enumerate(json_s["history"]):
227
  if index % 2 == 0:
@@ -231,14 +236,20 @@ def load_chat_history(filename, system, history, chatbot):
231
  json_s["history"] = new_history
232
  logging.info(new_history)
233
  except:
 
234
  pass
 
235
  return filename, json_s["system"], json_s["history"], json_s["chatbot"]
236
  except FileNotFoundError:
 
237
  return filename, system, history, chatbot
238
 
 
239
  def load_template(filename, mode=0):
 
240
  lines = []
241
  logging.info("Loading template...")
 
242
  if filename.endswith(".json"):
243
  with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
244
  lines = json.load(f)
@@ -261,6 +272,7 @@ def load_template(filename, mode=0):
261
  def sorted_by_pinyin(list):
262
  return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
263
 
 
264
  def get_template_content(templates, selection, original_system_prompt):
265
  logging.info(f"Prompt: {selection}")
266
  try:
@@ -268,13 +280,16 @@ def get_template_content(templates, selection, original_system_prompt):
268
  except:
269
  return original_system_prompt
270
 
 
271
  def reset_state():
272
  logging.info("Reset")
273
  return [], [], [], construct_token_message(0)
274
 
 
275
  def reset_textbox():
276
  return gr.update(value="")
277
 
 
278
  def hide_middle_chars(s):
279
  if len(s) <= 8:
280
  return s
@@ -289,3 +304,5 @@ def submit_key(key):
289
  msg = f"API-Key: {hide_middle_chars(key)}"
290
  logging.info(msg)
291
  return key, msg
 
 
 
1
  # -*- coding:utf-8 -*-
2
  from __future__ import annotations
3
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
4
  import logging
5
  import json
6
  import os
 
11
  from pypinyin import lazy_pinyin
12
  import tiktoken
13
  import mdtex2html
 
14
  from markdown import markdown
15
  from pygments import highlight
16
  from pygments.lexers import get_lexer_by_name
17
  from pygments.formatters import HtmlFormatter
 
18
 
19
  use_websearch_checkbox=False
20
  use_streaming_checkbox=True
 
59
 
60
  MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-0301",]
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if TYPE_CHECKING:
64
  from typing import TypedDict
 
74
  length = len(encoding.encode(input_str))
75
  return length
76
 
77
+
78
  def markdown_to_html_with_syntax_highlight(md_str):
79
  def replacer(match):
80
  lang = match.group(1) or "text"
 
96
  html_str = markdown(md_str)
97
  return html_str
98
 
99
+
100
  def normalize_markdown(md_text: str) -> str:
101
  lines = md_text.split("\n")
102
  normalized_lines = []
 
120
 
121
  return "\n".join(normalized_lines)
122
 
123
+ def postprocess(
124
+ self, y: List[Tuple[str | None, str | None]]
125
+ ) -> List[Tuple[str | None, str | None]]:
126
+ """
127
+ Parameters:
128
+ y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
129
+ Returns:
130
+ List of tuples representing the message and response. Each message and response will be a string of HTML.
131
+ """
132
+ if y is None or y == []:
133
+ return []
134
+ tag_regex = re.compile(r"^<\w+>[^<]+</\w+>")
135
+ if tag_regex.search(y[-1][1]):
136
+ y[-1] = (convert_user(y[-1][0]), y[-1][1])
137
+ else:
138
+ y[-1] = (convert_user(y[-1][0]), convert_mdtext(y[-1][1]))
139
+ return y
140
+
141
  def convert_mdtext(md_text):
142
  code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
143
  inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
 
153
  else:
154
  result.append(mdtex2html.convert(non_code, extensions=["tables"]))
155
  if code.strip():
 
 
156
  code = f"```{code}\n\n```"
157
  code = markdown_to_html_with_syntax_highlight(code)
158
  result.append(code)
 
182
  def construct_token_message(token, stream=False):
183
  return f"Token count: {token}"
184
 
185
+
186
  def save_file(filename, system, history, chatbot):
187
  logging.info("saving......")
188
  os.makedirs(HISTORY_DIR, exist_ok=True)
 
197
  md_s += f"\n{data['role']}: \n- {data['content']} \n"
198
  with open(os.path.join(HISTORY_DIR, filename), "w", encoding="utf8") as f:
199
  f.write(md_s)
200
+ # logging.info("保存对话历史完毕")
201
  return os.path.join(HISTORY_DIR, filename)
202
 
203
+
204
  def save_chat_history(filename, system, history, chatbot):
205
  if filename == "":
206
  return
 
208
  filename += ".json"
209
  return save_file(filename, system, history, chatbot)
210
 
211
+
212
  def export_markdown(filename, system, history, chatbot):
213
  if filename == "":
214
  return
 
216
  filename += ".md"
217
  return save_file(filename, system, history, chatbot)
218
 
219
+
220
  def load_chat_history(filename, system, history, chatbot):
221
+ # logging.info("加载对话历史中……")
222
  if type(filename) != str:
223
  filename = filename.name
224
  try:
 
226
  json_s = json.load(f)
227
  try:
228
  if type(json_s["history"][0]) == str:
229
+ # logging.info("历史记录格式为旧版,正在转换……")
230
  new_history = []
231
  for index, item in enumerate(json_s["history"]):
232
  if index % 2 == 0:
 
236
  json_s["history"] = new_history
237
  logging.info(new_history)
238
  except:
239
+ # 没有对话历史
240
  pass
241
+ # logging.info("加载对话历史完毕")
242
  return filename, json_s["system"], json_s["history"], json_s["chatbot"]
243
  except FileNotFoundError:
244
+ # logging.info("没有找到对话历史文件,不执行任何操作")
245
  return filename, system, history, chatbot
246
 
247
+
248
  def load_template(filename, mode=0):
249
+ # logging.info(f"加载模板文件{filename},模式为{mode}(0为返回字典和下拉菜单,1为返回下拉菜单,2为返回字典)")
250
  lines = []
251
  logging.info("Loading template...")
252
+ # filename='中文Prompts.json'
253
  if filename.endswith(".json"):
254
  with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
255
  lines = json.load(f)
 
272
  def sorted_by_pinyin(list):
273
  return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
274
 
275
+
276
  def get_template_content(templates, selection, original_system_prompt):
277
  logging.info(f"Prompt: {selection}")
278
  try:
 
280
  except:
281
  return original_system_prompt
282
 
283
+
284
  def reset_state():
285
  logging.info("Reset")
286
  return [], [], [], construct_token_message(0)
287
 
288
+
289
  def reset_textbox():
290
  return gr.update(value="")
291
 
292
+
293
  def hide_middle_chars(s):
294
  if len(s) <= 8:
295
  return s
 
304
  msg = f"API-Key: {hide_middle_chars(key)}"
305
  logging.info(msg)
306
  return key, msg
307
+
308
+