zhangjf commited on
Commit
04bdc20
1 Parent(s): 9f1e78d

write the app.py and upload data

Browse files
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.json filter=lfs diff=lfs merge=lfs -text
36
+ dialogues_set/* filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import tiktoken
3
+
4
+ openai.api_key = os.getenv('API_KEY')
5
+
6
+ import collections
7
+ import datetime
8
+ import time
9
+ import json
10
+ import os
11
+
12
+ timezone = datetime.tz.gettz('Asia/Shanghai')
13
+ timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).replace(tzinfo=timezone).strftime('%Y-%m-%d %H:%M:%S')
14
+
15
+ def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
16
+ """Returns the number of tokens used by a list of messages."""
17
+ try:
18
+ encoding = tiktoken.encoding_for_model(model)
19
+ except KeyError:
20
+ encoding = tiktoken.get_encoding("cl100k_base")
21
+ if model == "gpt-3.5-turbo": # note: future models may deviate from this
22
+ num_tokens = 0
23
+ for message in messages:
24
+ num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
25
+ for key, value in message.items():
26
+ num_tokens += len(encoding.encode(value))
27
+ if key == "name": # if there's a name, the role is omitted
28
+ num_tokens += -1 # role is always required and always 1 token
29
+ num_tokens += 2 # every reply is primed with <im_start>assistant
30
+ return num_tokens
31
+ else:
32
+ raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
33
+ See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
34
+
35
+ qas = []
36
+ qs = []
37
+ start_time = None
38
+
39
+ def read_qs():
40
+ directory = "./dialogues_set"
41
+ filenames = [
42
+ 'dialogues_film.json',
43
+ 'dialogues_jindong.json',
44
+ 'dialogues_music.json',
45
+ 'dialogues_natural.json',
46
+ 'dialogues_taobao.json',
47
+ 'dialogues_travel_kd.json'
48
+ ]
49
+ for filename in filenames:
50
+ with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
51
+ for idx,line in enumerate(f):
52
+ idx2query = json.loads(line)
53
+ query = idx2query[str(idx)]
54
+ qs.append(query)
55
+ print(f"read {len(qs)} queries from files")
56
+
57
+ if os.path.exists("qas.json"):
58
+ with open("qas.json", "r", encoding="utf-8") as f:
59
+ qas = json.loads(f.read())
60
+ print(f"read {len(qas)} query-responses from qas.json")
61
+
62
+ existed_qs = collections.Counter([qa["q"] for qa in qas])
63
+ remained_qs = []
64
+ for q in qs:
65
+ if existed_qs[q]>0:
66
+ existed_qs[q] -= 1
67
+ else:
68
+ remained_qs.append(q)
69
+ print(f"filter out {len(qs)-len(remained_qs)} with reference to qas.json")
70
+ qs = remained_qs
71
+
72
+ return qs
73
+
74
+
75
+ def ask(query, max_attempt_times=3):
76
+ answer = None
77
+ attempt_times = 0
78
+ while answer is None and attempt_times<max_attempt_times:
79
+ attempt_times += 1
80
+ try:
81
+ response = openai.ChatCompletion.create(
82
+ model="gpt-3.5-turbo",
83
+ messages=[
84
+ {"role": "user", "content": query}
85
+ ]
86
+ )["choices"][0]["message"]["content"]
87
+ except Exception as e:
88
+ print(e)
89
+ print(f"retry in {attempt_times*10} seconds...")
90
+ time.sleep(attempt_times*10)
91
+ return answer
92
+
93
+
94
+ def askingChatGPT(min_interval_seconds=10):
95
+ qs = read_qs()
96
+
97
+ start_time = time.time()
98
+ for i, q in enumerate(qs):
99
+ ask_start_time = time.time()
100
+
101
+ a = ask(q)
102
+ qas.append({"q":q, "a":a})
103
+
104
+ ask_end_time = time.time()
105
+ elapsed_time = ask_end_time - ask_start_time
106
+ delayTime = min_interval_seconds - elapsed_time
107
+ if delayTime>0:
108
+ time.sleep(delayTime)
109
+
110
+ print(f"{timestamp2string(time.time())}: iterations: {i} / {len(qs)} | elapsed time of this query (s): {elapsed_time:.2f}")
111
+
112
+ return
113
+
114
+
115
+ thread = threading.Thread(target=my_function)
116
+ thread.daemon = True
117
+ thread.start()
118
+
119
+
120
+ import gradio as gr
121
+
122
+
123
+ def showcase(api_key):
124
+ if not api_key==openai.api_key:
125
+ chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
126
+ else:
127
+ recent_qas = qas[:10]
128
+ chatbot_ret = [(f"Your entered api_key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")]
129
+ for qa in recent_qas:
130
+ chatbot_ret += [(qa["q"],qa["a"])]
131
+ return chatbot_ret
132
+
133
+
134
+ def download(api_key):
135
+ if not api_key==openai.api_key:
136
+ chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
137
+ file_ret = gr.File.update(value=None, visible=False)
138
+ else:
139
+ chatbot_ret = [(f"Your entered api_key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")]
140
+ filename = f"qas{len(qas)}.json"
141
+ with open(filename, "w", encoding="utf-8") as f:
142
+ f.write(json.dumps(qas, ensure_ascii=False, indent=2))
143
+ file_ret = gr.File.update(value=filename, visible=True)
144
+ return chatbot_ret, file_ret
145
+
146
+
147
+ def display(api_key):
148
+ if not api_key==openai.api_key:
149
+ chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
150
+ elif len(qas)<1:
151
+ chatbot_ret = [(f"Your entered api_key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")]
152
+ else:
153
+ time_takes = time.time() - start_time
154
+ time_remains = time_takes * (len(qs)-len(qas)) / len(qas)
155
+ end_time = start_time + time_remains
156
+
157
+ messages = []
158
+ for qa in qas:
159
+ messages.append({"role":"user", "content":qas["q"]})
160
+ messages.append({"role":"assistant", "content":qas["a"]})
161
+ num_tokens_processed = num_tokens_from_messages(messages)
162
+ num_tokens_total = num_tokens_processed * len(qs) / len(qas)
163
+ dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000)
164
+ dollars_tokens_total = 0.002 * int(num_tokens_total/1000)
165
+
166
+ chatbot_ret = [(f"Your entered api_key is correct.", f"The information of progress is displayed below.")]
167
+ chatbot_ret += [(f"The number of processed / total queries:", f"{len(qas)} / {len(qs)}")]
168
+ chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")]
169
+ chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")]
170
+ chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")]
171
+ chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed} / {dollars_tokens_total}")]
172
+
173
+ return chatbot_ret
174
+
175
+
176
+ with gr.Blocks() as demo:
177
+
178
+ with gr.Column(variant="panel"):
179
+ chatbot = gr.Chatbot()
180
+ txt = gr.Textbox(show_label=False, placeholder="Enter my API_KEY to access this private space").style(container=False)
181
+ with gr.Row():
182
+ button_showcase = gr.Button("Show Recent Query-Responses")
183
+ button_download = gr.Button("Download All Query-Responses")
184
+ button_display = gr.Button("Display Progress Infomation")
185
+
186
+ downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)
187
+
188
+ button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot])
189
+ button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile])
190
+ button_display.click(fn=display, inputs=[txt], outputs=[chatbot])
191
+
192
+ demo.launch()
dialogues_set/dialogues_film.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1065ddc9706d8644b5cb686e43c197e4ae1a6273a2ee85892ab014a3d0589048
3
+ size 1990042
dialogues_set/dialogues_jindong.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c3cb0b5351f451d7079ba1f647c61d50c65955af1a231b4ebba5ad4336dfcec
3
+ size 875228
dialogues_set/dialogues_music.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5edd415cdb29221cd727ea3536770440a56e0732f23e9dadc2037bc9c3242f2
3
+ size 1316759
dialogues_set/dialogues_natural.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94a0738b9d7778afa4294392a60bd4b4fd1da395c70bf7943c774da1cc9126a
3
+ size 27800692
dialogues_set/dialogues_taobao.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ce15361975a74e91acdd032862c7e3ac421d347057b1fb375647d5ac6bb826
3
+ size 2472553
dialogues_set/dialogues_travel_kd.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c779ff0148f28403e3856b374fb7b4fac2125c749da067a55a7b499b7be2e87f
3
+ size 1397624
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ openai==0.27.0
2
+ tiktoken==0.3.0