JohnSmith9982 commited on
Commit
890e483
1 Parent(s): c5e5944

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +290 -0
utils.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Contains all of the components that can be used with Gradio Interface / Blocks.
2
+ Along with the docs for each component, you can find the names of example demos that use
3
+ each component. These demos are located in the `demo` directory."""
4
+
5
+ from __future__ import annotations
6
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
7
+ import json
8
+ import gradio as gr
9
+ # import openai
10
+ import os
11
+ import traceback
12
+ import requests
13
+ # import markdown
14
+ import csv
15
+ import mdtex2html
16
+
17
+ if TYPE_CHECKING:
18
+ from typing import TypedDict
19
+
20
+ class DataframeData(TypedDict):
21
+ headers: List[str]
22
+ data: List[List[str | int | bool]]
23
+
24
+ initial_prompt = "You are a helpful assistant."
25
+ API_URL = "https://api.openai.com/v1/chat/completions"
26
+ HISTORY_DIR = "history"
27
+ TEMPLATES_DIR = "templates"
28
+
29
+ def postprocess(
30
+ self, y: List[Tuple[str | None, str | None]]
31
+ ) -> List[Tuple[str | None, str | None]]:
32
+ """
33
+ Parameters:
34
+ y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
35
+ Returns:
36
+ List of tuples representing the message and response. Each message and response will be a string of HTML.
37
+ """
38
+ if y is None:
39
+ return []
40
+ for i, (message, response) in enumerate(y):
41
+ y[i] = (
42
+ # None if message is None else markdown.markdown(message),
43
+ # None if response is None else markdown.markdown(response),
44
+ None if message is None else mdtex2html.convert(message),
45
+ None if response is None else mdtex2html.convert(response),
46
+ )
47
+ return y
48
+
49
+ def parse_text(text):
50
+ lines = text.split("\n")
51
+ lines = [line for line in lines if line != ""]
52
+ count = 0
53
+ firstline = False
54
+ for i, line in enumerate(lines):
55
+ if "```" in line:
56
+ count += 1
57
+ items = line.split('`')
58
+ if count % 2 == 1:
59
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
60
+ else:
61
+ lines[i] = f'<br></code></pre>'
62
+ else:
63
+ if i > 0:
64
+ if count % 2 == 1:
65
+ # line = line.replace("‘", "'")
66
+ # line = line.replace("“", '"')
67
+ line = line.replace("`", "\`")
68
+ # line = line.replace("\"", "`\"`")
69
+ # line = line.replace("\'", "`\'`")
70
+ # line = line.replace("'``'", "''")
71
+ # line = line.replace("&", "&amp;")
72
+ line = line.replace("<", "&lt;")
73
+ line = line.replace(">", "&gt;")
74
+ line = line.replace(" ", "&nbsp;")
75
+ line = line.replace("*", "&ast;")
76
+ line = line.replace("_", "&lowbar;")
77
+ line = line.replace("-", "&#45;")
78
+ line = line.replace(".", "&#46;")
79
+ line = line.replace("!", "&#33;")
80
+ line = line.replace("(", "&#40;")
81
+ line = line.replace(")", "&#41;")
82
+ line = line.replace("$", "&#36;")
83
+ lines[i] = "<br>"+line
84
+ text = "".join(lines)
85
+ return text
86
+
87
+ def predict(inputs, top_p, temperature, openai_api_key, chatbot=[], history=[], system_prompt=initial_prompt, retry=False, summary=False, retry_on_crash = False, stream = True): # repetition_penalty, top_k
88
+
89
+ if retry_on_crash:
90
+ retry = True
91
+
92
+ headers = {
93
+ "Content-Type": "application/json",
94
+ "Authorization": f"Bearer {openai_api_key}"
95
+ }
96
+
97
+ chat_counter = len(history) // 2
98
+
99
+ print(f"chat_counter - {chat_counter}")
100
+
101
+ messages = []
102
+ if chat_counter:
103
+ for index in range(0, 2*chat_counter, 2):
104
+ temp1 = {}
105
+ temp1["role"] = "user"
106
+ temp1["content"] = history[index]
107
+ temp2 = {}
108
+ temp2["role"] = "assistant"
109
+ temp2["content"] = history[index+1]
110
+ if temp1["content"] != "":
111
+ if temp2["content"] != "" or retry:
112
+ messages.append(temp1)
113
+ messages.append(temp2)
114
+ else:
115
+ messages[-1]['content'] = temp2['content']
116
+ if retry and chat_counter:
117
+ if retry_on_crash:
118
+ messages = messages[-6:]
119
+ messages.pop()
120
+ elif summary:
121
+ history = [*[i["content"] for i in messages[-2:]], "我们刚刚聊了什么?"]
122
+ messages.append(compose_user(
123
+ "请帮我总结一下上述对话的内容,实现减少字数的同时,保证对话的质量。在总结中不要加入这一句话。"))
124
+ else:
125
+ temp3 = {}
126
+ temp3["role"] = "user"
127
+ temp3["content"] = inputs
128
+ messages.append(temp3)
129
+ chat_counter += 1
130
+ messages = [compose_system(system_prompt), *messages]
131
+ # messages
132
+ payload = {
133
+ "model": "gpt-3.5-turbo",
134
+ "messages": messages, # [{"role": "user", "content": f"{inputs}"}],
135
+ "temperature": temperature, # 1.0,
136
+ "top_p": top_p, # 1.0,
137
+ "n": 1,
138
+ "stream": stream,
139
+ "presence_penalty": 0,
140
+ "frequency_penalty": 0,
141
+ }
142
+
143
+ if not summary:
144
+ history.append(inputs)
145
+ else:
146
+ print("精简中...")
147
+
148
+ print(f"payload: {payload}")
149
+ # make a POST request to the API endpoint using the requests.post method, passing in stream=True
150
+ try:
151
+ response = requests.post(API_URL, headers=headers, json=payload, stream=True)
152
+ except:
153
+ history.append("")
154
+ chatbot.append(inputs, "")
155
+ yield history, chatbot, f"出现了网络错误"
156
+ return
157
+
158
+ token_counter = 0
159
+ partial_words = ""
160
+
161
+ counter = 0
162
+ if stream:
163
+ chatbot.append((parse_text(history[-1]), ""))
164
+ for chunk in response.iter_lines():
165
+ if counter == 0:
166
+ counter += 1
167
+ continue
168
+ counter += 1
169
+ # check whether each line is non-empty
170
+ if chunk:
171
+ # decode each line as response data is in bytes
172
+ try:
173
+ if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
174
+ chunkjson = json.loads(chunk.decode()[6:])
175
+ status_text = f"id: {chunkjson['id']}, finish_reason: {chunkjson['choices'][0]['finish_reason']}"
176
+ yield chatbot, history, status_text
177
+ break
178
+ except Exception as e:
179
+ traceback.print_exc()
180
+ if not retry_on_crash:
181
+ print("正在尝试使用缩短的context重新生成……")
182
+ chatbot.pop()
183
+ history.append("")
184
+ yield next(predict(inputs, top_p, temperature, openai_api_key, chatbot, history, system_prompt, retry, summary=False, retry_on_crash=True, stream=False))
185
+ else:
186
+ msg = "☹️发生了错误:生成失败,请检查网络"
187
+ print(msg)
188
+ history.append(inputs, "")
189
+ chatbot.append(inputs, msg)
190
+ yield chatbot, history, "status: ERROR"
191
+ break
192
+ chunkjson = json.loads(chunk.decode()[6:])
193
+ status_text = f"id: {chunkjson['id']}, finish_reason: {chunkjson['choices'][0]['finish_reason']}"
194
+ partial_words = partial_words + \
195
+ json.loads(chunk.decode()[6:])[
196
+ 'choices'][0]["delta"]["content"]
197
+ if token_counter == 0:
198
+ history.append(" " + partial_words)
199
+ else:
200
+ history[-1] = partial_words
201
+ chatbot[-1] = (parse_text(history[-2]), parse_text(history[-1]))
202
+ token_counter += 1
203
+ yield chatbot, history, status_text
204
+ else:
205
+ try:
206
+ responsejson = json.loads(response.text)
207
+ content = responsejson["choices"][0]["message"]["content"]
208
+ history.append(content)
209
+ chatbot.append((parse_text(history[-2]), parse_text(content)))
210
+ status_text = "精简完成"
211
+ except:
212
+ chatbot.append((parse_text(history[-1]), "☹️发生了错误,请检查网络连接或者稍后再试。"))
213
+ status_text = "status: ERROR"
214
+ yield chatbot, history, status_text
215
+
216
+
217
+
218
+ def delete_last_conversation(chatbot, history):
219
+ if "☹️发生了错误" in chatbot[-1][1]:
220
+ chatbot.pop()
221
+ print(history)
222
+ return chatbot, history
223
+ history.pop()
224
+ history.pop()
225
+ print(history)
226
+ return chatbot, history
227
+
228
+ def save_chat_history(filename, system, history, chatbot):
229
+ if filename == "":
230
+ return
231
+ if not filename.endswith(".json"):
232
+ filename += ".json"
233
+ os.makedirs(HISTORY_DIR, exist_ok=True)
234
+ json_s = {"system": system, "history": history, "chatbot": chatbot}
235
+ print(json_s)
236
+ with open(os.path.join(HISTORY_DIR, filename), "w") as f:
237
+ json.dump(json_s, f)
238
+
239
+
240
+ def load_chat_history(filename):
241
+ with open(os.path.join(HISTORY_DIR, filename), "r") as f:
242
+ json_s = json.load(f)
243
+ print(json_s)
244
+ return filename, json_s["system"], json_s["history"], json_s["chatbot"]
245
+
246
+
247
+ def get_file_names(dir, plain=False, filetype=".json"):
248
+ # find all json files in the current directory and return their names
249
+ try:
250
+ files = sorted([f for f in os.listdir(dir) if f.endswith(filetype)])
251
+ except FileNotFoundError:
252
+ files = []
253
+ if plain:
254
+ return files
255
+ else:
256
+ return gr.Dropdown.update(choices=files)
257
+
258
+ def get_history_names(plain=False):
259
+ return get_file_names(HISTORY_DIR, plain)
260
+
261
+ def load_template(filename, mode=0):
262
+ lines = []
263
+ with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as csvfile:
264
+ reader = csv.reader(csvfile)
265
+ lines = list(reader)
266
+ lines = lines[1:]
267
+ if mode == 1:
268
+ return sorted([row[0] for row in lines])
269
+ elif mode == 2:
270
+ return {row[0]:row[1] for row in lines}
271
+ else:
272
+ return {row[0]:row[1] for row in lines}, gr.Dropdown.update(choices=sorted([row[0] for row in lines]))
273
+
274
+ def get_template_names(plain=False):
275
+ return get_file_names(TEMPLATES_DIR, plain, filetype=".csv")
276
+
277
+ def reset_state():
278
+ return [], []
279
+
280
+
281
+ def compose_system(system_prompt):
282
+ return {"role": "system", "content": system_prompt}
283
+
284
+
285
+ def compose_user(user_input):
286
+ return {"role": "user", "content": user_input}
287
+
288
+
289
+ def reset_textbox():
290
+ return gr.update(value='')