AllenYkl commited on
Commit
3ce70bc
1 Parent(s): 76c8222

Delete utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -435
utils.py DELETED
@@ -1,435 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- from __future__ import annotations
3
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
4
- import logging
5
- import json
6
- import gradio as gr
7
- # import openai
8
- import os
9
- import traceback
10
- import requests
11
- # import markdown
12
- import csv
13
- import mdtex2html
14
- from pypinyin import lazy_pinyin
15
- from presets import *
16
- import tiktoken
17
- from tqdm import tqdm
18
- import colorama
19
- from duckduckgo_search import ddg
20
- import datetime
21
- import os
22
- #from config import *
23
-
24
- #logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s")
25
-
26
- if TYPE_CHECKING:
27
- from typing import TypedDict
28
-
29
- class DataframeData(TypedDict):
30
- headers: List[str]
31
- data: List[List[str | int | bool]]
32
-
33
- initial_prompt = "You are a helpful assistant."
34
- API_URL = "https://api.openai.com/v1/chat/completions"
35
- HISTORY_DIR = "history"
36
- TEMPLATES_DIR = "templates"
37
-
38
- def postprocess(
39
- self, y: List[Tuple[str | None, str | None]]
40
- ) -> List[Tuple[str | None, str | None]]:
41
- """
42
- Parameters:
43
- y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
44
- Returns:
45
- List of tuples representing the message and response. Each message and response will be a string of HTML.
46
- """
47
- if y is None:
48
- return []
49
- for i, (message, response) in enumerate(y):
50
- y[i] = (
51
- # None if message is None else markdown.markdown(message),
52
- # None if response is None else markdown.markdown(response),
53
- None if message is None else mdtex2html.convert((message)),
54
- None if response is None else mdtex2html.convert(response),
55
- )
56
- return y
57
-
58
- def count_token(input_str):
59
- encoding = tiktoken.get_encoding("cl100k_base")
60
- length = len(encoding.encode(input_str))
61
- return length
62
-
63
- def parse_text(text):
64
- lines = text.split("\n")
65
- lines = [line for line in lines if line != ""]
66
- count = 0
67
- for i, line in enumerate(lines):
68
- if "```" in line:
69
- count += 1
70
- items = line.split('`')
71
- if count % 2 == 1:
72
- lines[i] = f'<pre><code class="language-{items[-1]}">'
73
- else:
74
- lines[i] = f'<br></code></pre>'
75
- else:
76
- if i > 0:
77
- if count % 2 == 1:
78
- line = line.replace("`", "\`")
79
- line = line.replace("<", "&lt;")
80
- line = line.replace(">", "&gt;")
81
- line = line.replace(" ", "&nbsp;")
82
- line = line.replace("*", "&ast;")
83
- line = line.replace("_", "&lowbar;")
84
- line = line.replace("-", "&#45;")
85
- line = line.replace(".", "&#46;")
86
- line = line.replace("!", "&#33;")
87
- line = line.replace("(", "&#40;")
88
- line = line.replace(")", "&#41;")
89
- line = line.replace("$", "&#36;")
90
- lines[i] = "<br>"+line
91
- text = "".join(lines)
92
- return text
93
-
94
- def construct_text(role, text):
95
- return {"role": role, "content": text}
96
-
97
- def construct_user(text):
98
- return construct_text("user", text)
99
-
100
- def construct_system(text):
101
- return construct_text("system", text)
102
-
103
- def construct_assistant(text):
104
- return construct_text("assistant", text)
105
-
106
- def construct_token_message(token, stream=False):
107
- return f"Token 计数: {token}"
108
-
109
- def get_response(openai_api_key, system_prompt, history, temperature, top_p, stream, selected_model):
110
- headers = {
111
- "Content-Type": "application/json",
112
- "Authorization": f"Bearer {openai_api_key}"
113
- }
114
-
115
- history = [construct_system(system_prompt), *history]
116
-
117
- payload = {
118
- "model": selected_model,
119
- "messages": history, # [{"role": "user", "content": f"{inputs}"}],
120
- "temperature": temperature, # 1.0,
121
- "top_p": top_p, # 1.0,
122
- "n": 1,
123
- "stream": stream,
124
- "presence_penalty": 0,
125
- "frequency_penalty": 0,
126
- }
127
- if stream:
128
- timeout = timeout_streaming
129
- else:
130
- timeout = timeout_all
131
- response = requests.post(API_URL, headers=headers, json=payload, stream=True, timeout=timeout)
132
- return response
133
-
134
- def stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, all_token_counts, top_p, temperature, selected_model):
135
- def get_return_value():
136
- return chatbot, history, status_text, all_token_counts
137
-
138
- logging.info("实时回答模式")
139
- partial_words = ""
140
- counter = 0
141
- status_text = "开始实时传输回答……"
142
- history.append(construct_user(inputs))
143
- history.append(construct_assistant(""))
144
- chatbot.append((parse_text(inputs), ""))
145
- user_token_count = 0
146
- if len(all_token_counts) == 0:
147
- system_prompt_token_count = count_token(system_prompt)
148
- user_token_count = count_token(inputs) + system_prompt_token_count
149
- else:
150
- user_token_count = count_token(inputs)
151
- all_token_counts.append(user_token_count)
152
- logging.info(f"输入token计数: {user_token_count}")
153
- yield get_return_value()
154
- try:
155
- response = get_response(openai_api_key, system_prompt, history, temperature, top_p, True, selected_model)
156
- except requests.exceptions.ConnectTimeout:
157
- status_text = standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
158
- yield get_return_value()
159
- return
160
- except requests.exceptions.ReadTimeout:
161
- status_text = standard_error_msg + read_timeout_prompt + error_retrieve_prompt
162
- yield get_return_value()
163
- return
164
-
165
- yield get_return_value()
166
- error_json_str = ""
167
-
168
- for chunk in tqdm(response.iter_lines()):
169
- if counter == 0:
170
- counter += 1
171
- continue
172
- counter += 1
173
- # check whether each line is non-empty
174
- if chunk:
175
- chunk = chunk.decode()
176
- chunklength = len(chunk)
177
- try:
178
- chunk = json.loads(chunk[6:])
179
- except json.JSONDecodeError:
180
- logging.info(chunk)
181
- error_json_str += chunk
182
- status_text = f"JSON解析错误。请重置对话。收到的内容: {error_json_str}"
183
- yield get_return_value()
184
- continue
185
- # decode each line as response data is in bytes
186
- if chunklength > 6 and "delta" in chunk['choices'][0]:
187
- finish_reason = chunk['choices'][0]['finish_reason']
188
- status_text = construct_token_message(sum(all_token_counts), stream=True)
189
- if finish_reason == "stop":
190
- yield get_return_value()
191
- break
192
- try:
193
- partial_words = partial_words + chunk['choices'][0]["delta"]["content"]
194
- except KeyError:
195
- status_text = standard_error_msg + "API回复中找不到内容。很可能是Token计数达到上限。请重置对话。当前Token计数: " + str(sum(all_token_counts))
196
- yield get_return_value()
197
- break
198
- history[-1] = construct_assistant(partial_words)
199
- chatbot[-1] = (parse_text(inputs), parse_text(partial_words))
200
- all_token_counts[-1] += 1
201
- yield get_return_value()
202
-
203
-
204
- def predict_all(openai_api_key, system_prompt, history, inputs, chatbot, all_token_counts, top_p, temperature, selected_model):
205
- logging.info("一次性回答模式")
206
- history.append(construct_user(inputs))
207
- history.append(construct_assistant(""))
208
- chatbot.append((parse_text(inputs), ""))
209
- all_token_counts.append(count_token(inputs))
210
- try:
211
- response = get_response(openai_api_key, system_prompt, history, temperature, top_p, False, selected_model)
212
- except requests.exceptions.ConnectTimeout:
213
- status_text = standard_error_msg + connection_timeout_prompt + error_retrieve_prompt
214
- return chatbot, history, status_text, all_token_counts
215
- except requests.exceptions.ProxyError:
216
- status_text = standard_error_msg + proxy_error_prompt + error_retrieve_prompt
217
- return chatbot, history, status_text, all_token_counts
218
- except requests.exceptions.SSLError:
219
- status_text = standard_error_msg + ssl_error_prompt + error_retrieve_prompt
220
- return chatbot, history, status_text, all_token_counts
221
- response = json.loads(response.text)
222
- content = response["choices"][0]["message"]["content"]
223
- history[-1] = construct_assistant(content)
224
- chatbot[-1] = (parse_text(inputs), parse_text(content))
225
- total_token_count = response["usage"]["total_tokens"]
226
- all_token_counts[-1] = total_token_count - sum(all_token_counts)
227
- status_text = construct_token_message(total_token_count)
228
- return chatbot, history, status_text, all_token_counts
229
-
230
-
231
- def predict(openai_api_key, system_prompt, history, inputs, chatbot, all_token_counts, top_p, temperature, stream=False, selected_model = MODELS[0], use_websearch_checkbox = False, should_check_token_count = True): # repetition_penalty, top_k
232
- logging.info("输入为:" +colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
233
- if use_websearch_checkbox:
234
- results = ddg(inputs, max_results=3)
235
- web_results = []
236
- for idx, result in enumerate(results):
237
- logging.info(f"搜索结果{idx + 1}:{result}")
238
- web_results.append(f'[{idx+1}]"{result["body"]}"\nURL: {result["href"]}')
239
- web_results = "\n\n".join(web_results)
240
- today = datetime.datetime.today().strftime("%Y-%m-%d")
241
- inputs = websearch_prompt.replace("{current_date}", today).replace("{query}", inputs).replace("{web_results}", web_results)
242
- if len(openai_api_key) != 51:
243
- status_text = standard_error_msg + no_apikey_msg
244
- logging.info(status_text)
245
- chatbot.append((parse_text(inputs), ""))
246
- if len(history) == 0:
247
- history.append(construct_user(inputs))
248
- history.append("")
249
- all_token_counts.append(0)
250
- else:
251
- history[-2] = construct_user(inputs)
252
- yield chatbot, history, status_text, all_token_counts
253
- return
254
- if stream:
255
- yield chatbot, history, "开始生成回答……", all_token_counts
256
- if stream:
257
- logging.info("使用流式传输")
258
- iter = stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, all_token_counts, top_p, temperature, selected_model)
259
- for chatbot, history, status_text, all_token_counts in iter:
260
- yield chatbot, history, status_text, all_token_counts
261
- else:
262
- logging.info("不使用流式传输")
263
- chatbot, history, status_text, all_token_counts = predict_all(openai_api_key, system_prompt, history, inputs, chatbot, all_token_counts, top_p, temperature, selected_model)
264
- yield chatbot, history, status_text, all_token_counts
265
- logging.info(f"传输完毕。当前token计数为{all_token_counts}")
266
- if len(history) > 1 and history[-1]['content'] != inputs:
267
- logging.info("回答为:" +colorama.Fore.BLUE + f"{history[-1]['content']}" + colorama.Style.RESET_ALL)
268
- if stream:
269
- max_token = max_token_streaming
270
- else:
271
- max_token = max_token_all
272
- if sum(all_token_counts) > max_token and should_check_token_count:
273
- status_text = f"精简token中{all_token_counts}/{max_token}"
274
- logging.info(status_text)
275
- yield chatbot, history, status_text, all_token_counts
276
- iter = reduce_token_size(openai_api_key, system_prompt, history, chatbot, all_token_counts, top_p, temperature, stream=False, selected_model=selected_model, hidden=True)
277
- for chatbot, history, status_text, all_token_counts in iter:
278
- status_text = f"Token 达到上限,已自动降低Token计数至 {status_text}"
279
- yield chatbot, history, status_text, all_token_counts
280
-
281
-
282
- def retry(openai_api_key, system_prompt, history, chatbot, token_count, top_p, temperature, stream=False, selected_model = MODELS[0]):
283
- logging.info("重试中……")
284
- if len(history) == 0:
285
- yield chatbot, history, f"{standard_error_msg}上下文是空的", token_count
286
- return
287
- history.pop()
288
- inputs = history.pop()["content"]
289
- token_count.pop()
290
- iter = predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature, stream=stream, selected_model=selected_model)
291
- logging.info("重试完毕")
292
- for x in iter:
293
- yield x
294
-
295
-
296
- def reduce_token_size(openai_api_key, system_prompt, history, chatbot, token_count, top_p, temperature, stream=False, selected_model = MODELS[0], hidden=False):
297
- logging.info("开始减少token数量……")
298
- iter = predict(openai_api_key, system_prompt, history, summarize_prompt, chatbot, token_count, top_p, temperature, stream=stream, selected_model = selected_model, should_check_token_count=False)
299
- logging.info(f"chatbot: {chatbot}")
300
- for chatbot, history, status_text, previous_token_count in iter:
301
- history = history[-2:]
302
- token_count = previous_token_count[-1:]
303
- if hidden:
304
- chatbot.pop()
305
- yield chatbot, history, construct_token_message(sum(token_count), stream=stream), token_count
306
- logging.info("减少token数量完毕")
307
-
308
-
309
- def delete_last_conversation(chatbot, history, previous_token_count):
310
- if len(chatbot) > 0 and standard_error_msg in chatbot[-1][1]:
311
- logging.info("由于包含报错信息,只删除chatbot记录")
312
- chatbot.pop()
313
- return chatbot, history
314
- if len(history) > 0:
315
- logging.info("删除了一组对话历史")
316
- history.pop()
317
- history.pop()
318
- if len(chatbot) > 0:
319
- logging.info("删除了一组chatbot对话")
320
- chatbot.pop()
321
- if len(previous_token_count) > 0:
322
- logging.info("删除了一组对话的token计数记录")
323
- previous_token_count.pop()
324
- return chatbot, history, previous_token_count, construct_token_message(sum(previous_token_count))
325
-
326
-
327
- def save_chat_history(filename, system, history, chatbot):
328
- logging.info("保存对话历史中……")
329
- if filename == "":
330
- return
331
- if not filename.endswith(".json"):
332
- filename += ".json"
333
- os.makedirs(HISTORY_DIR, exist_ok=True)
334
- json_s = {"system": system, "history": history, "chatbot": chatbot}
335
- logging.info(json_s)
336
- with open(os.path.join(HISTORY_DIR, filename), "w") as f:
337
- json.dump(json_s, f, ensure_ascii=False, indent=4)
338
- logging.info("保存对话历史完毕")
339
-
340
-
341
- def load_chat_history(filename, system, history, chatbot):
342
- logging.info("加载对话历史中……")
343
- try:
344
- with open(os.path.join(HISTORY_DIR, filename), "r") as f:
345
- json_s = json.load(f)
346
- try:
347
- if type(json_s["history"][0]) == str:
348
- logging.info("历史记录格式为旧版,正在转换��…")
349
- new_history = []
350
- for index, item in enumerate(json_s["history"]):
351
- if index % 2 == 0:
352
- new_history.append(construct_user(item))
353
- else:
354
- new_history.append(construct_assistant(item))
355
- json_s["history"] = new_history
356
- logging.info(new_history)
357
- except:
358
- # 没有对话历史
359
- pass
360
- logging.info("加载对话历史完毕")
361
- return filename, json_s["system"], json_s["history"], json_s["chatbot"]
362
- except FileNotFoundError:
363
- logging.info("没有找到对话历史文件,不执行任何操作")
364
- return filename, system, history, chatbot
365
-
366
- def sorted_by_pinyin(list):
367
- return sorted(list, key=lambda char: lazy_pinyin(char)[0][0])
368
-
369
- def get_file_names(dir, plain=False, filetypes=[".json"]):
370
- logging.info(f"获取文件名列表,目录为{dir},文件类型为{filetypes},是否为纯文本列表{plain}")
371
- files = []
372
- try:
373
- for type in filetypes:
374
- files += [f for f in os.listdir(dir) if f.endswith(type)]
375
- except FileNotFoundError:
376
- files = []
377
- files = sorted_by_pinyin(files)
378
- if files == []:
379
- files = [""]
380
- if plain:
381
- return files
382
- else:
383
- return gr.Dropdown.update(choices=files)
384
-
385
- def get_history_names(plain=False):
386
- logging.info("获取历史记录文件名列表")
387
- return get_file_names(HISTORY_DIR, plain)
388
-
389
- def load_template(filename, mode=0):
390
- logging.info(f"加载模板文件{filename},模式为{mode}(0为返回字典和下拉菜单,1为返回下拉菜单,2为返回字典)")
391
- lines = []
392
- logging.info("Loading template...")
393
- if filename.endswith(".json"):
394
- with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as f:
395
- lines = json.load(f)
396
- lines = [[i["act"], i["prompt"]] for i in lines]
397
- else:
398
- with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as csvfile:
399
- reader = csv.reader(csvfile)
400
- lines = list(reader)
401
- lines = lines[1:]
402
- if mode == 1:
403
- return sorted_by_pinyin([row[0] for row in lines])
404
- elif mode == 2:
405
- return {row[0]:row[1] for row in lines}
406
- else:
407
- choices = sorted_by_pinyin([row[0] for row in lines])
408
- return {row[0]:row[1] for row in lines}, gr.Dropdown.update(choices=choices, value=choices[0])
409
-
410
- def get_template_names(plain=False):
411
- logging.info("获取模板文件名列表")
412
- return get_file_names(TEMPLATES_DIR, plain, filetypes=[".csv", "json"])
413
-
414
- def get_template_content(templates, selection, original_system_prompt):
415
- logging.info(f"应用模板中,选择为{selection},原始系统提示为{original_system_prompt}")
416
- try:
417
- return templates[selection]
418
- except:
419
- return original_system_prompt
420
-
421
- def reset_state():
422
- logging.info("重置状态")
423
- return [], [], [], construct_token_message(0)
424
-
425
- def reset_textbox():
426
- return gr.update(value='')
427
-
428
- def key_preprocessing(input):
429
-
430
- if input.startswith(os.environ["INVITE_CODE"]):
431
- api_key = os.environ["OPENAI_API"]
432
- else:
433
- api_key = input
434
-
435
- return api_key